GenAIExamples/MultimodalQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml

# Copyright (C) 2024 Advanced Micro Devices, Inc.
# SPDX-License-Identifier: Apache-2.0

services:
  whisper-service:
    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
    container_name: whisper-service
    ports:
      - "7066:7066"
    ipc: host
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
    restart: unless-stopped
  redis-vector-db:
    image: redis/redis-stack:7.2.0-v9
    container_name: redis-vector-db
    ports:
      - "6379:6379"
      - "8001:8001"
  dataprep-multimodal-redis:
    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-multimodal-redis
    depends_on:
      - redis-vector-db
      - lvm
    ports:
      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      REDIS_URL: ${REDIS_URL}
      REDIS_HOST: ${REDIS_HOST}
      INDEX_NAME: ${INDEX_NAME}
      LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:9399/v1/lvm"
      HUGGINGFACEHUB_API_TOKEN: ${MULTIMODAL_HUGGINGFACEHUB_API_TOKEN}
      MULTIMODAL_DATAPREP: true
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS"
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
      timeout: 5s
      retries: 10
    restart: unless-stopped
  embedding-multimodal-bridgetower:
    image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower:${TAG:-latest}
    container_name: embedding-multimodal-bridgetower
    ports:
      - ${EMBEDDER_PORT}:${EMBEDDER_PORT}
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      PORT: ${EMBEDDER_PORT}
    healthcheck:
      test: ["CMD-SHELL", "http_proxy='' curl -f http://localhost:${EMBEDDER_PORT}/v1/health_check"]
      interval: 10s
      timeout: 6s
      retries: 18
      start_period: 30s
    entrypoint: ["python", "bridgetower_server.py", "--device", "cpu", "--model_name_or_path", $EMBEDDING_MODEL_ID]
    restart: unless-stopped
  embedding:
    image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
    container_name: embedding
    depends_on:
      embedding-multimodal-bridgetower:
        condition: service_healthy
    ports:
      - ${MM_EMBEDDING_PORT_MICROSERVICE}:${MM_EMBEDDING_PORT_MICROSERVICE}
    ipc: host
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      MMEI_EMBEDDING_ENDPOINT: ${MMEI_EMBEDDING_ENDPOINT}
      MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE}
      MULTIMODAL_EMBEDDING: true
    restart: unless-stopped
  retriever-redis:
    image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
    container_name: retriever-redis
    depends_on:
      - redis-vector-db
    ports:
      - "7000:7000"
    ipc: host
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      REDIS_URL: ${REDIS_URL}
      INDEX_NAME: ${INDEX_NAME}
      BRIDGE_TOWER_EMBEDDING: ${BRIDGE_TOWER_EMBEDDING}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
  multimodalqna-vllm-service:
    image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
    container_name: multimodalqna-vllm-service
    ports:
      - "${MULTIMODAL_VLLM_SERVICE_PORT:-8081}:8011"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      HUGGINGFACEHUB_API_TOKEN: ${MULTIMODAL_HUGGINGFACEHUB_API_TOKEN}
      HF_TOKEN: ${MULTIMODAL_HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      WILM_USE_TRITON_FLASH_ATTENTION: 0
      PYTORCH_JIT: 0
    volumes:
      - "${MODEL_CACHE:-./data}:/data"
    shm_size: 20G
    devices:
      - /dev/kfd:/dev/kfd
      - /dev/dri/:/dev/dri/
    cap_add:
      - SYS_PTRACE
    group_add:
      - video
    security_opt:
      - seccomp:unconfined
      - apparmor=unconfined
    command: "--model ${MULTIMODAL_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 1 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
    ipc: host
  lvm:
    image: ${REGISTRY:-opea}/lvm:${TAG:-latest}
    container_name: lvm
    depends_on:
      - multimodalqna-vllm-service
    ports:
      - "9399:9399"
    ipc: host
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      LVM_COMPONENT_NAME: "OPEA_VLLM_LVM"
      LVM_ENDPOINT: ${LVM_ENDPOINT}
      LLM_MODEL_ID: ${MULTIMODAL_LLM_MODEL_ID}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    restart: unless-stopped
  multimodalqna:
    image: ${REGISTRY:-opea}/multimodalqna:${TAG:-latest}
    container_name: multimodalqna-backend-server
    depends_on:
      redis-vector-db:
        condition: service_started
      dataprep-multimodal-redis:
        condition: service_healthy
      embedding:
        condition: service_started
      retriever-redis:
        condition: service_started
      lvm:
        condition: service_started
    ports:
      - "8888:8888"
    environment:
      no_proxy: ${no_proxy}
      https_proxy: ${https_proxy}
      http_proxy: ${http_proxy}
      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
      MM_EMBEDDING_SERVICE_HOST_IP: ${MM_EMBEDDING_SERVICE_HOST_IP}
      MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE}
      MM_RETRIEVER_SERVICE_HOST_IP: ${MM_RETRIEVER_SERVICE_HOST_IP}
      LVM_SERVICE_HOST_IP: ${LVM_SERVICE_HOST_IP}
      WHISPER_SERVER_PORT: ${WHISPER_PORT}
      WHISPER_SERVER_ENDPOINT: ${WHISPER_SERVER_ENDPOINT}
    ipc: host
    restart: always
  multimodalqna-ui:
    image: ${REGISTRY:-opea}/multimodalqna-ui:${TAG:-latest}
    container_name: multimodalqna-gradio-ui-server
    depends_on:
      - multimodalqna
    ports:
      - "5173:5173"
    environment:
      - no_proxy=${no_proxy}
      - https_proxy=${https_proxy}
      - http_proxy=${http_proxy}
      - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT}
      - DATAPREP_INGEST_SERVICE_ENDPOINT=${DATAPREP_INGEST_SERVICE_ENDPOINT}
      - DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT=${DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT}
      - DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT=${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT}
    ipc: host
    restart: always

networks:
  default:
    driver: bridge