diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml index d23684172..78a17dda0 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -37,6 +37,11 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 audioqna-xeon-backend-server: image: ${REGISTRY:-opea}/audioqna:${TAG:-latest} diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 18840be34..2624dbf53 100644 --- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -35,7 +35,7 @@ services: - SYS_NICE restart: unless-stopped tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.6 + image: ghcr.io/huggingface/tgi-gaudi:2.3.1 container_name: tgi-gaudi-server ports: - "3006:80" @@ -54,6 +54,11 @@ services: LIMIT_HPU_GRAPH: true USE_FLASH_ATTENTION: true FLASH_ATTENTION_RECOMPUTE: true + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 runtime: habana cap_add: - SYS_NICE diff --git a/AudioQnA/tests/test_compose_on_gaudi.sh b/AudioQnA/tests/test_compose_on_gaudi.sh index 1fe091af8..2eb0bf340 100644 --- a/AudioQnA/tests/test_compose_on_gaudi.sh +++ b/AudioQnA/tests/test_compose_on_gaudi.sh @@ -53,6 +53,7 @@ function start_services() { export LLM_SERVER_PORT=3006 export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna + export host_ip=${ip_address} # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env # Start Docker Containers diff --git a/AudioQnA/tests/test_compose_on_xeon.sh b/AudioQnA/tests/test_compose_on_xeon.sh index 20b3e8505..48047948c 100644 --- a/AudioQnA/tests/test_compose_on_xeon.sh +++ b/AudioQnA/tests/test_compose_on_xeon.sh @@ -53,6 +53,7 @@ function start_services() { export LLM_SERVER_PORT=3006 export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna + export host_ip=${ip_address} # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml index c0af8d71d..12225ec41 100644 --- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml +++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml @@ -38,6 +38,11 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + healthcheck: + test: ["CMD-SHELL", "curl -f http://${host_ip}:3006/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 wav2lip-service: image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest} diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml index f34868b6d..544d40b57 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -89,6 +89,11 @@ services: HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} LLM_MODEL_ID: ${LLM_MODEL_ID} VLLM_TORCH_PROFILER_DIR: "/mnt" + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:9009/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 chatqna-xeon-backend-server: image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml index ddd1afade..8c2132577 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -93,6 +93,11 @@ services: OMPI_MCA_btl_vader_single_copy_mechanism: none LLM_MODEL_ID: ${LLM_MODEL_ID} VLLM_TORCH_PROFILER_DIR: "/mnt" + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:8007/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 runtime: habana cap_add: - SYS_NICE diff --git a/ChatQnA/tests/test_compose_on_gaudi.sh b/ChatQnA/tests/test_compose_on_gaudi.sh index f9d0e48d0..f352a75d9 100644 --- a/ChatQnA/tests/test_compose_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_on_gaudi.sh @@ -35,6 +35,7 @@ function start_services() { export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export INDEX_NAME="rag-redis" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export host_ip=${ip_address} # Start Docker Containers docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log diff --git a/ChatQnA/tests/test_compose_on_xeon.sh b/ChatQnA/tests/test_compose_on_xeon.sh index 1808dbd45..a9d437397 100644 --- a/ChatQnA/tests/test_compose_on_xeon.sh +++ b/ChatQnA/tests/test_compose_on_xeon.sh @@ -36,6 +36,7 @@ function start_services() { export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export INDEX_NAME="rag-redis" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export host_ip=${ip_address} # Start Docker Containers docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml index e2f72ff93..4d5ed9568 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.6 + image: ghcr.io/huggingface/tgi-gaudi:2.3.1 container_name: tgi-gaudi-server ports: - "8028:80" @@ -21,10 +21,10 @@ services: USE_FLASH_ATTENTION: true FLASH_ATTENTION_RECOMPUTE: true healthcheck: - test: ["CMD-SHELL", "sleep 500 && exit 0"] - interval: 1s - timeout: 505s - retries: 1 + test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 runtime: habana cap_add: - SYS_NICE diff --git a/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml b/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml index 4d2c767df..6ca5cddf2 100644 --- a/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.6 + image: ghcr.io/huggingface/tgi-gaudi:2.3.1 container_name: tgi-gaudi-server ports: - ${LLM_ENDPOINT_PORT:-8008}:80 @@ -31,10 +31,10 @@ services: - SYS_NICE ipc: host healthcheck: - test: ["CMD-SHELL", "sleep 500 && exit 0"] - interval: 1s - timeout: 505s - retries: 1 + test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 4096 llm_faqgen: image: ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest} diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 7a2641c9a..2a134a548 100644 --- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -91,7 +91,7 @@ services: RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped tgi-gaudi: - image: ghcr.io/huggingface/tgi-gaudi:2.0.6 + image: ghcr.io/huggingface/tgi-gaudi:2.3.1 container_name: tgi-llava-gaudi-server ports: - "8399:80" @@ -110,6 +110,11 @@ services: LIMIT_HPU_GRAPH: true USE_FLASH_ATTENTION: true FLASH_ATTENTION_RECOMPUTE: true + healthcheck: + test: ["CMD-SHELL", "curl -f http://${host_ip}:8399/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 runtime: habana cap_add: - SYS_NICE diff --git a/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml index c294bac43..f79bb9758 100644 --- a/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -98,7 +98,7 @@ services: LOGFLAG: ${LOGFLAG} restart: unless-stopped tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.6 + image: ghcr.io/huggingface/tgi-gaudi:2.3.1 container_name: tgi-gaudi-server ports: - "3006:80" @@ -118,10 +118,10 @@ services: USE_FLASH_ATTENTION: true FLASH_ATTENTION_RECOMPUTE: true healthcheck: - test: ["CMD-SHELL", "sleep 800 && exit 0"] - interval: 1s - timeout: 805s - retries: 1 + test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 runtime: habana cap_add: - SYS_NICE diff --git a/Translation/docker_compose/intel/hpu/gaudi/compose.yaml b/Translation/docker_compose/intel/hpu/gaudi/compose.yaml index de3714aca..be983b7b1 100644 --- a/Translation/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/Translation/docker_compose/intel/hpu/gaudi/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.6 + image: ghcr.io/huggingface/tgi-gaudi:2.3.1 container_name: tgi-gaudi-server ports: - "8008:80" @@ -21,10 +21,10 @@ services: USE_FLASH_ATTENTION: true FLASH_ATTENTION_RECOMPUTE: true healthcheck: - test: ["CMD-SHELL", "sleep 500 && exit 0"] - interval: 1s - timeout: 505s - retries: 1 + test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 runtime: habana cap_add: - SYS_NICE diff --git a/Translation/tests/test_compose_on_gaudi.sh b/Translation/tests/test_compose_on_gaudi.sh index 1f2f94a3c..b82127b96 100644 --- a/Translation/tests/test_compose_on_gaudi.sh +++ b/Translation/tests/test_compose_on_gaudi.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="translation translation-ui llm-textgen nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 + docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1 docker images && sleep 1s } @@ -41,6 +41,7 @@ function start_services() { export BACKEND_SERVICE_NAME=translation export BACKEND_SERVICE_IP=${ip_address} export BACKEND_SERVICE_PORT=8888 + export host_ip=${ip_address} sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env diff --git a/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 3f3142291..fa17cf36d 100644 --- a/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -3,7 +3,7 @@ services: llava-tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.6 + image: ghcr.io/huggingface/tgi-gaudi:2.3.1 container_name: tgi-llava-gaudi-server ports: - "8399:80" @@ -22,6 +22,11 @@ services: LIMIT_HPU_GRAPH: true USE_FLASH_ATTENTION: true FLASH_ATTENTION_RECOMPUTE: true + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:8399/health || exit 1"] + interval: 10s + timeout: 10s + retries: 60 runtime: habana cap_add: - SYS_NICE diff --git a/VisualQnA/tests/test_compose_on_gaudi.sh b/VisualQnA/tests/test_compose_on_gaudi.sh index 312d0b4db..46e5fa1d0 100644 --- a/VisualQnA/tests/test_compose_on_gaudi.sh +++ b/VisualQnA/tests/test_compose_on_gaudi.sh @@ -41,6 +41,7 @@ function start_services() { export BACKEND_SERVICE_IP=${ip_address} export BACKEND_SERVICE_PORT=8888 export NGINX_PORT=80 + export host_ip=${ip_address} sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env