Enchance health check in GenAIExample docker-compose (#1410)

Fix service launch issue 1. Update Gaudi TGI image from 2.0.6 to 2.3.1 2. Change the hpu-gaudi TGI health check condition. Signed-off-by: Wang, Xigui <xigui.wang@intel.com>
2025-01-20 20:13:13 +08:00
parent 59722d2bc9
commit 2d5898244c
17 changed files with 65 additions and 24 deletions
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -37,6 +37,11 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 100
    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
  audioqna-xeon-backend-server:
    image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -35,7 +35,7 @@ services:
      - SYS_NICE
    restart: unless-stopped
  tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
+    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
    container_name: tgi-gaudi-server
    ports:
      - "3006:80"
@@ -54,6 +54,11 @@ services:
      LIMIT_HPU_GRAPH: true
      USE_FLASH_ATTENTION: true
      FLASH_ATTENTION_RECOMPUTE: true
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 100
    runtime: habana
    cap_add:
      - SYS_NICE