GenAIExamples/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

services:
  redis-vector-db:
    image: redis/redis-stack:7.2.0-v9
    container_name: redis-vector-db
    ports:
      - "${REDIS_DB_PORT}:${REDIS_DB_PORT}"
      - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}"
  whisper-service:
    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
    container_name: whisper-service
    ports:
      - "${WHISPER_PORT}:${WHISPER_PORT}"
    ipc: host
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      WHISPER_PORT: ${WHISPER_PORT}
      WHISPER_SERVER_ENDPOINT: ${WHISPER_SERVER_ENDPOINT}
    restart: unless-stopped
  speecht5-service:
    image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
    container_name: speecht5-service
    ports:
      - "${TTS_PORT}:7055"
    ipc: host
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TTS_PORT: ${TTS_PORT}
      TTS_ENDPOINT: ${TTS_ENDPOINT}
    restart: unless-stopped
  dataprep-multimodal-redis:
    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-multimodal-redis
    depends_on:
      - redis-vector-db
      - lvm
    ports:
      - "${DATAPREP_MMR_PORT}:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      REDIS_URL: ${REDIS_URL}
      REDIS_HOST: ${REDIS_HOST}
      DATAPREP_MMR_PORT: ${DATAPREP_MMR_PORT}
      INDEX_NAME: ${INDEX_NAME}
      LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:${LVM_PORT}/v1/lvm"
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      MULTIMODAL_DATAPREP: true
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS"
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
      timeout: 5s
      retries: 10
    restart: unless-stopped
  embedding-multimodal-bridgetower-gaudi:
    image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower-gaudi:${TAG:-latest}
    container_name: embedding-multimodal-bridgetower-gaudi
    ports:
      - ${EMM_BRIDGETOWER_PORT}:${EMM_BRIDGETOWER_PORT}
    ipc: host
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      PORT: ${EMM_BRIDGETOWER_PORT}
      HABANA_VISIBLE_DEVICES: all
    runtime: habana
    cap_add:
      - SYS_NICE
    healthcheck:
      test: ["CMD-SHELL", "http_proxy='' curl -f http://localhost:${EMM_BRIDGETOWER_PORT}/v1/health_check"]
      interval: 10s
      timeout: 6s
      retries: 18
      start_period: 30s
    entrypoint: ["python", "bridgetower_server.py", "--device", "hpu", "--model_name_or_path", $EMBEDDING_MODEL_ID]
    restart: unless-stopped
  embedding:
    image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
    container_name: embedding
    depends_on:
      embedding-multimodal-bridgetower-gaudi:
        condition: service_healthy
    ports:
      - ${MM_EMBEDDING_PORT_MICROSERVICE}:${MM_EMBEDDING_PORT_MICROSERVICE}
    ipc: host
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      MMEI_EMBEDDING_ENDPOINT: ${MMEI_EMBEDDING_ENDPOINT}
      MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE}
      EMM_BRIDGETOWER_PORT: ${EMM_BRIDGETOWER_PORT}
      MULTIMODAL_EMBEDDING: true
    restart: unless-stopped
  retriever-redis:
    image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
    container_name: retriever-redis
    depends_on:
      - redis-vector-db
    ports:
      - "${REDIS_RETRIEVER_PORT}:${REDIS_RETRIEVER_PORT}"
    ipc: host
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      REDIS_URL: ${REDIS_URL}
      REDIS_DB_PORT: ${REDIS_DB_PORT}
      REDIS_INSIGHTS_PORT: ${REDIS_INSIGHTS_PORT}
      REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT}
      INDEX_NAME: ${INDEX_NAME}
      BRIDGE_TOWER_EMBEDDING: ${BRIDGE_TOWER_EMBEDDING}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
  tgi-gaudi:
    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
    container_name: tgi-llava-gaudi-server
    ports:
      - ${TGI_GAUDI_PORT}
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TGI_GAUDI_PORT: ${TGI_GAUDI_PORT}
      LLAVA_SERVER_PORT: ${LLAVA_SERVER_PORT}
      LVM_PORT: ${LVM_PORT}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      PREFILL_BATCH_BUCKET_SIZE: 1
      BATCH_BUCKET_SIZE: 1
      MAX_BATCH_TOTAL_TOKENS: 4096
      ENABLE_HPU_GRAPH: true
      LIMIT_HPU_GRAPH: true
      USE_FLASH_ATTENTION: true
      FLASH_ATTENTION_RECOMPUTE: true
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://${host_ip}:8399/health || exit 1"]
      interval: 10s
      timeout: 10s
      retries: 100
    runtime: habana
    cap_add:
      - SYS_NICE
    ipc: host
    command: --model-id ${LVM_MODEL_ID} --max-input-tokens 3048 --max-total-tokens 4096
    restart: unless-stopped
  lvm:
    image: ${REGISTRY:-opea}/lvm:${TAG:-latest}
    container_name: lvm
    depends_on:
      - tgi-gaudi
    ports:
      - "${LVM_PORT}:${LVM_PORT}"
    ipc: host
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      LVM_COMPONENT_NAME: "OPEA_TGI_LLAVA_LVM"
      LVM_ENDPOINT: ${LVM_ENDPOINT}
      LLAVA_SERVER_PORT: ${LLAVA_SERVER_PORT}
      LVM_PORT: ${LVM_PORT}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      MAX_IMAGES: ${MAX_IMAGES:-1}
    restart: unless-stopped
  multimodalqna:
    image: ${REGISTRY:-opea}/multimodalqna:${TAG:-latest}
    container_name: multimodalqna-backend-server
    depends_on:
      redis-vector-db:
        condition: service_started
      dataprep-multimodal-redis:
        condition: service_healthy
      embedding:
        condition: service_started
      retriever-redis:
        condition: service_started
      lvm:
        condition: service_started
    ports:
      - "${MEGA_SERVICE_PORT}:${MEGA_SERVICE_PORT}"
    environment:
      no_proxy: ${no_proxy}
      https_proxy: ${https_proxy}
      http_proxy: ${http_proxy}
      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
      MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT}
      MM_EMBEDDING_SERVICE_HOST_IP: ${MM_EMBEDDING_SERVICE_HOST_IP}
      MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE}
      MM_RETRIEVER_SERVICE_HOST_IP: ${MM_RETRIEVER_SERVICE_HOST_IP}
      LVM_SERVICE_HOST_IP: ${LVM_SERVICE_HOST_IP}
      LVM_MODEL_ID: ${LVM_MODEL_ID}
      WHISPER_PORT: ${WHISPER_PORT}
      WHISPER_SERVER_ENDPOINT: ${WHISPER_SERVER_ENDPOINT}
      TTS_PORT: ${TTS_PORT}
      TTS_ENDPOINT: ${TTS_ENDPOINT}
    ipc: host
    restart: always
  multimodalqna-ui:
    image: ${REGISTRY:-opea}/multimodalqna-ui:${TAG:-latest}
    container_name: multimodalqna-gradio-ui-server
    depends_on:
      - multimodalqna
    ports:
      - "${UI_PORT}:${UI_PORT}"
    environment:
      - no_proxy=${no_proxy}
      - https_proxy=${https_proxy}
      - http_proxy=${http_proxy}
      - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT}
      - DATAPREP_INGEST_SERVICE_ENDPOINT=${DATAPREP_INGEST_SERVICE_ENDPOINT}
      - DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT=${DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT}
      - DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT=${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT}
      - DATAPREP_GET_FILE_ENDPOINT=${DATAPREP_GET_FILE_ENDPOINT}
      - DATAPREP_DELETE_FILE_ENDPOINT=${DATAPREP_DELETE_FILE_ENDPOINT}
      - MEGA_SERVICE_PORT:=${MEGA_SERVICE_PORT}
      - UI_PORT=${UI_PORT}
      - UI_TIMEOUT=${UI_TIMEOUT}
      - DATAPREP_MMR_PORT=${DATAPREP_MMR_PORT}
    ipc: host
    restart: always

networks:
  default:
    driver: bridge