Files
GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml
2025-04-25 23:11:23 +08:00

95 lines
3.1 KiB
YAML

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
server:
image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest}
container_name: edgecraftrag-server
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_ENDPOINT: ${HF_ENDPOINT}
vLLM_ENDPOINT: ${vLLM_ENDPOINT}
ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false}
volumes:
- ${MODEL_PATH:-${PWD}}:/home/user/models
- ${DOC_PATH:-${PWD}}:/home/user/docs
- ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
- ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
- ${PROMPT_PATH:-${PWD}}:/templates/custom
ports:
- ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010}
devices:
- /dev/dri:/dev/dri
group_add:
- ${VIDEOGROUPID:-44}
- ${RENDERGROUPID:-109}
ecrag:
image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest}
container_name: edgecraftrag
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011}
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}}
PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010}
PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
ports:
- ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011}
depends_on:
- server
ui:
image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest}
container_name: edgecraftrag-ui
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011}
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}}
PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010}
PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082}
UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0}
volumes:
- ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
ports:
- ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082}
restart: always
depends_on:
- server
- ecrag
vllm-openvino-server:
container_name: vllm-openvino-server
image: ${REGISTRY:-opea}/vllm-arc:${TAG:-latest}
ports:
- ${VLLM_SERVICE_PORT:-8008}:80
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
VLLM_OPENVINO_DEVICE: GPU
HF_ENDPOINT: ${HF_ENDPOINT}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
volumes:
- ${HF_CACHE:-${HOME}/.cache}:/root/.cache
devices:
- /dev/dri
group_add:
- ${VIDEOGROUPID:-44}
- ${RENDERGROUPID:-109}
entrypoint: /bin/bash -c "\
cd / && \
export VLLM_CPU_KVCACHE_SPACE=50 && \
export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \
python3 -m vllm.entrypoints.openai.api_server \
--model '${LLM_MODEL}' \
--max_model_len=4096 \
--host 0.0.0.0 \
--port 80"
networks:
default:
driver: bridge