Files
GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm_multi-arc.yaml
2025-04-03 13:37:49 +08:00

94 lines
3.0 KiB
YAML

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
server:
image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest}
container_name: edgecraftrag-server
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_ENDPOINT: ${HF_ENDPOINT}
vLLM_ENDPOINT: ${vLLM_ENDPOINT}
LLM_MODEL: ${LLM_MODEL}
ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false}
volumes:
- ${MODEL_PATH:-${PWD}}:/home/user/models
- ${DOC_PATH:-${PWD}}:/home/user/docs
- ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
- ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
- ${PROMPT_PATH:-${PWD}}:/templates/custom
ports:
- ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010}
devices:
- /dev/dri:/dev/dri
group_add:
- ${VIDEOGROUPID:-44}
- ${RENDERGROUPID:-109}
ecrag:
image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest}
container_name: edgecraftrag
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011}
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}}
PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010}
PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
ports:
- ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011}
depends_on:
- server
ui:
image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest}
container_name: edgecraftrag-ui
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011}
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}}
PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010}
PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082}
UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0}
volumes:
- ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
ports:
- ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082}
restart: always
depends_on:
- server
- ecrag
llm-serving-xpu:
container_name: ipex-llm-serving-xpu-container
image: intelanalytics/ipex-llm-serving-xpu:latest
privileged: true
ports:
- ${VLLM_SERVICE_PORT:-8008}:8000
group_add:
- video
- ${VIDEOGROUPID:-44}
- ${RENDERGROUPID:-109}
volumes:
- ${LLM_MODEL_PATH:-${PWD}}:/llm/models
devices:
- /dev/dri
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_ENDPOINT: ${HF_ENDPOINT}
MODEL_PATH: "/llm/models"
SERVED_MODEL_NAME: ${LLM_MODEL}
TENSOR_PARALLEL_SIZE: ${TENSOR_PARALLEL_SIZE:-1}
shm_size: '16g'
entrypoint: /bin/bash -c "\
cd /llm && \
bash start-vllm-service.sh"
networks:
default:
driver: bridge