GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm_multi-arc.yaml

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

services:
  server:
    image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest}
    container_name: edgecraftrag-server
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      HF_ENDPOINT: ${HF_ENDPOINT}
      vLLM_ENDPOINT: ${vLLM_ENDPOINT}
      LLM_MODEL: ${LLM_MODEL}
      ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false}
    volumes:
      - ${MODEL_PATH:-${PWD}}:/home/user/models
      - ${DOC_PATH:-${PWD}}:/home/user/docs
      - ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
      - ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
      - ${PROMPT_PATH:-${PWD}}:/templates/custom
    ports:
      - ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010}
    devices:
      - /dev/dri:/dev/dri
    group_add:
      - ${VIDEOGROUPID:-44}
      - ${RENDERGROUPID:-109}
  ecrag:
    image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest}
    container_name: edgecraftrag
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011}
      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}}
      PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010}
      PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
    ports:
      - ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011}
    depends_on:
      - server
  ui:
    image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest}
    container_name: edgecraftrag-ui
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011}
      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}}
      PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010}
      PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
      UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082}
      UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0}
    volumes:
      - ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
    ports:
      - ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082}
    restart: always
    depends_on:
      - server
      - ecrag
  llm-serving-xpu:
    container_name: ipex-llm-serving-xpu-container
    image: intelanalytics/ipex-llm-serving-xpu:latest
    privileged: true
    ports:
      - ${VLLM_SERVICE_PORT:-8008}:8000
    group_add:
      - video
      - ${VIDEOGROUPID:-44}
      - ${RENDERGROUPID:-109}
    volumes:
      - ${LLM_MODEL_PATH:-${PWD}}:/llm/models
    devices:
      - /dev/dri
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      HF_ENDPOINT: ${HF_ENDPOINT}
      MODEL_PATH: "/llm/models"
      SERVED_MODEL_NAME: ${LLM_MODEL}
      TENSOR_PARALLEL_SIZE: ${TENSOR_PARALLEL_SIZE:-1}
    shm_size: '16g'
    entrypoint: /bin/bash -c "\
      cd /llm && \
      bash start-vllm-service.sh"
networks:
  default:
    driver: bridge