Enchance health check in GenAIExample docker-compose (#1410)
Fix service launch issue 1. Update Gaudi TGI image from 2.0.6 to 2.3.1 2. Change the hpu-gaudi TGI health check condition. Signed-off-by: Wang, Xigui <xigui.wang@intel.com>
This commit is contained in:
@@ -37,6 +37,11 @@ services:
|
|||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 100
|
||||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||||
audioqna-xeon-backend-server:
|
audioqna-xeon-backend-server:
|
||||||
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ services:
|
|||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
tgi-service:
|
tgi-service:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||||
container_name: tgi-gaudi-server
|
container_name: tgi-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "3006:80"
|
- "3006:80"
|
||||||
@@ -54,6 +54,11 @@ services:
|
|||||||
LIMIT_HPU_GRAPH: true
|
LIMIT_HPU_GRAPH: true
|
||||||
USE_FLASH_ATTENTION: true
|
USE_FLASH_ATTENTION: true
|
||||||
FLASH_ATTENTION_RECOMPUTE: true
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 100
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
|
|||||||
@@ -53,6 +53,7 @@ function start_services() {
|
|||||||
export LLM_SERVER_PORT=3006
|
export LLM_SERVER_PORT=3006
|
||||||
|
|
||||||
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
||||||
|
export host_ip=${ip_address}
|
||||||
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
|
|||||||
@@ -53,6 +53,7 @@ function start_services() {
|
|||||||
export LLM_SERVER_PORT=3006
|
export LLM_SERVER_PORT=3006
|
||||||
|
|
||||||
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
||||||
|
export host_ip=${ip_address}
|
||||||
|
|
||||||
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||||
|
|
||||||
|
|||||||
@@ -38,6 +38,11 @@ services:
|
|||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -f http://${host_ip}:3006/health || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 100
|
||||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||||
wav2lip-service:
|
wav2lip-service:
|
||||||
image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
|
image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
|
||||||
|
|||||||
@@ -89,6 +89,11 @@ services:
|
|||||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -f http://$host_ip:9009/health || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 100
|
||||||
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
|
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
|
||||||
chatqna-xeon-backend-server:
|
chatqna-xeon-backend-server:
|
||||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||||
|
|||||||
@@ -93,6 +93,11 @@ services:
|
|||||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -f http://$host_ip:8007/health || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 100
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ function start_services() {
|
|||||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||||
export INDEX_NAME="rag-redis"
|
export INDEX_NAME="rag-redis"
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
export host_ip=${ip_address}
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ function start_services() {
|
|||||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||||
export INDEX_NAME="rag-redis"
|
export INDEX_NAME="rag-redis"
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
export host_ip=${ip_address}
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
services:
|
services:
|
||||||
tgi-service:
|
tgi-service:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||||
container_name: tgi-gaudi-server
|
container_name: tgi-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8028:80"
|
- "8028:80"
|
||||||
@@ -21,10 +21,10 @@ services:
|
|||||||
USE_FLASH_ATTENTION: true
|
USE_FLASH_ATTENTION: true
|
||||||
FLASH_ATTENTION_RECOMPUTE: true
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "sleep 500 && exit 0"]
|
test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
|
||||||
interval: 1s
|
interval: 10s
|
||||||
timeout: 505s
|
timeout: 10s
|
||||||
retries: 1
|
retries: 100
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
services:
|
services:
|
||||||
tgi-service:
|
tgi-service:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||||
container_name: tgi-gaudi-server
|
container_name: tgi-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- ${LLM_ENDPOINT_PORT:-8008}:80
|
- ${LLM_ENDPOINT_PORT:-8008}:80
|
||||||
@@ -31,10 +31,10 @@ services:
|
|||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
ipc: host
|
ipc: host
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "sleep 500 && exit 0"]
|
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
|
||||||
interval: 1s
|
interval: 10s
|
||||||
timeout: 505s
|
timeout: 10s
|
||||||
retries: 1
|
retries: 100
|
||||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 4096
|
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 4096
|
||||||
llm_faqgen:
|
llm_faqgen:
|
||||||
image: ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest}
|
image: ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest}
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ services:
|
|||||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
|
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
tgi-gaudi:
|
tgi-gaudi:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||||
container_name: tgi-llava-gaudi-server
|
container_name: tgi-llava-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8399:80"
|
- "8399:80"
|
||||||
@@ -110,6 +110,11 @@ services:
|
|||||||
LIMIT_HPU_GRAPH: true
|
LIMIT_HPU_GRAPH: true
|
||||||
USE_FLASH_ATTENTION: true
|
USE_FLASH_ATTENTION: true
|
||||||
FLASH_ATTENTION_RECOMPUTE: true
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -f http://${host_ip}:8399/health || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 100
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
|
|||||||
@@ -98,7 +98,7 @@ services:
|
|||||||
LOGFLAG: ${LOGFLAG}
|
LOGFLAG: ${LOGFLAG}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
tgi-service:
|
tgi-service:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||||
container_name: tgi-gaudi-server
|
container_name: tgi-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "3006:80"
|
- "3006:80"
|
||||||
@@ -118,10 +118,10 @@ services:
|
|||||||
USE_FLASH_ATTENTION: true
|
USE_FLASH_ATTENTION: true
|
||||||
FLASH_ATTENTION_RECOMPUTE: true
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "sleep 800 && exit 0"]
|
test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"]
|
||||||
interval: 1s
|
interval: 10s
|
||||||
timeout: 805s
|
timeout: 10s
|
||||||
retries: 1
|
retries: 100
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
services:
|
services:
|
||||||
tgi-service:
|
tgi-service:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||||
container_name: tgi-gaudi-server
|
container_name: tgi-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8008:80"
|
- "8008:80"
|
||||||
@@ -21,10 +21,10 @@ services:
|
|||||||
USE_FLASH_ATTENTION: true
|
USE_FLASH_ATTENTION: true
|
||||||
FLASH_ATTENTION_RECOMPUTE: true
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "sleep 500 && exit 0"]
|
test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"]
|
||||||
interval: 1s
|
interval: 10s
|
||||||
timeout: 505s
|
timeout: 10s
|
||||||
retries: 1
|
retries: 100
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
|||||||
service_list="translation translation-ui llm-textgen nginx"
|
service_list="translation translation-ui llm-textgen nginx"
|
||||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||||
|
|
||||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||||
docker images && sleep 1s
|
docker images && sleep 1s
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -41,6 +41,7 @@ function start_services() {
|
|||||||
export BACKEND_SERVICE_NAME=translation
|
export BACKEND_SERVICE_NAME=translation
|
||||||
export BACKEND_SERVICE_IP=${ip_address}
|
export BACKEND_SERVICE_IP=${ip_address}
|
||||||
export BACKEND_SERVICE_PORT=8888
|
export BACKEND_SERVICE_PORT=8888
|
||||||
|
export host_ip=${ip_address}
|
||||||
|
|
||||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
services:
|
services:
|
||||||
llava-tgi-service:
|
llava-tgi-service:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||||
container_name: tgi-llava-gaudi-server
|
container_name: tgi-llava-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8399:80"
|
- "8399:80"
|
||||||
@@ -22,6 +22,11 @@ services:
|
|||||||
LIMIT_HPU_GRAPH: true
|
LIMIT_HPU_GRAPH: true
|
||||||
USE_FLASH_ATTENTION: true
|
USE_FLASH_ATTENTION: true
|
||||||
FLASH_ATTENTION_RECOMPUTE: true
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -f http://$host_ip:8399/health || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 60
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ function start_services() {
|
|||||||
export BACKEND_SERVICE_IP=${ip_address}
|
export BACKEND_SERVICE_IP=${ip_address}
|
||||||
export BACKEND_SERVICE_PORT=8888
|
export BACKEND_SERVICE_PORT=8888
|
||||||
export NGINX_PORT=80
|
export NGINX_PORT=80
|
||||||
|
export host_ip=${ip_address}
|
||||||
|
|
||||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user