Update TGI CPU image to latest official release 2.4.0 (#1035)

Signed-off-by: lvliang-intel <liang1.lv@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-11-04 11:28:43 +08:00
parent 3372b9d480
commit 0306c620b5
40 changed files with 49 additions and 49 deletions
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -41,7 +41,7 @@ services:
    environment:
      TTS_ENDPOINT: ${TTS_ENDPOINT}
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi-service
    ports:
      - "3006:80"
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
@@ -26,7 +26,7 @@ services:
      https_proxy: ${https_proxy}
    restart: unless-stopped
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi-service
    ports:
      - "3006:80"
--- a/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
+++ b/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
@@ -247,7 +247,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+        image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
        name: llm-dependency-deploy-demo
        securityContext:
          capabilities:
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
@@ -42,7 +42,7 @@ services:
    environment:
      TTS_ENDPOINT: ${TTS_ENDPOINT}
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi-service
    ports:
      - "3006:80"
--- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
@@ -195,7 +195,7 @@ For users in China who are unable to download models directly from Huggingface,
   export HF_TOKEN=${your_hf_token}
   export HF_ENDPOINT="https://hf-mirror.com"
   model_name="Intel/neural-chat-7b-v3-3"
-   docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id $model_name
+   docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model_name
   ```

 2. Offline
@@ -209,7 +209,7 @@ For users in China who are unable to download models directly from Huggingface,
     ```bash
     export HF_TOKEN=${your_hf_token}
     export model_path="/path/to/model"
-     docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id /data
+     docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id /data
     ```

 ### Setup Environment Variables
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -73,7 +73,7 @@ services:
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi-service
    ports:
      - "9009:80"
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
@@ -72,7 +72,7 @@ services:
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi-service
    ports:
      - "6042:80"
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
@@ -57,7 +57,7 @@ services:
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi-service
    ports:
      - "9009:80"
--- a/ChatQnA/kubernetes/intel/README_gmc.md
+++ b/ChatQnA/kubernetes/intel/README_gmc.md
@@ -18,7 +18,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
 - tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
 - retriever: opea/retriever-redis:latest
 - tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+- tgi-service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
 - chaqna-xeon-backend-server: opea/chatqna:latest

 Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
--- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml
+++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml
@@ -1100,7 +1100,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
@@ -1180,7 +1180,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
+++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
@@ -922,7 +922,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml
+++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml
@@ -925,7 +925,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/ChatQnA/tests/test_compose_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
    service_list="chatqna chatqna-ui chatqna-conversation-ui dataprep-redis retriever-redis nginx"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5

    docker images && sleep 1s
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi-service
    ports:
      - "8028:80"
--- a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
+++ b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
@@ -404,7 +404,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml
+++ b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml
@@ -126,7 +126,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/CodeGen/tests/test_compose_on_xeon.sh
+++ b/CodeGen/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
    service_list="codegen codegen-ui llm-tgi"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    docker images && sleep 1s
 }

--- a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: codetrans-tgi-service
    ports:
      - "8008:80"
--- a/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml
+++ b/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml
@@ -404,7 +404,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/CodeTrans/tests/test_compose_on_xeon.sh
+++ b/CodeTrans/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
    service_list="codetrans codetrans-ui llm-tgi nginx"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    docker images && sleep 1s
 }

--- a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi-service
    ports:
      - "8008:80"
--- a/DocSum/kubernetes/intel/README_gmc.md
+++ b/DocSum/kubernetes/intel/README_gmc.md
@@ -8,7 +8,7 @@ Install GMC in your Kubernetes cluster, if you have not already done so, by foll
 The DocSum application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not it starts them and then proceeds to connect them. When the DocSum RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular embedding, retriever, rerank, and llm.

 The DocSum pipeline uses  prebuilt images. The Xeon version uses the prebuilt image `llm-docsum-tgi:latest` which internally leverages the
-the image `ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
+the image `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
 service tgi-gaudi-svc, which uses the image `ghcr.io/huggingface/tgi-gaudi:2.0.5`. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use `Intel/neural-chat-7b-v3-3`.

 [NOTE]
--- a/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
+++ b/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
@@ -404,7 +404,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml
+++ b/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml
@@ -126,7 +126,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi-xeon-server
    ports:
      - "8008:80"
--- a/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml
+++ b/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml
@@ -126,7 +126,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
@@ -993,7 +993,7 @@ spec:
                name: chatqna-tgi-config
          securityContext:
            {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
@@ -229,7 +229,7 @@ spec:
                name: codegen-tgi-config
          securityContext:
            {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
@@ -229,7 +229,7 @@ spec:
                name: docsum-tgi-config
          securityContext:
            {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml
@@ -138,7 +138,7 @@ spec:
            - configMapRef:
                name: faqgen-tgi-config
          securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -73,7 +73,7 @@ services:
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi-service
    ports:
      - "3006:80"
--- a/SearchQnA/tests/test_compose_on_xeon.sh
+++ b/SearchQnA/tests/test_compose_on_xeon.sh
@@ -23,7 +23,7 @@ function build_docker_images() {
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    docker images && sleep 1s
 }

--- a/Translation/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/Translation/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi-service
    ports:
      - "8008:80"
--- a/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml
+++ b/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml
@@ -361,7 +361,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/Translation/tests/test_compose_on_xeon.sh
+++ b/Translation/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
    service_list="translation translation-ui llm-tgi nginx"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    docker images && sleep 1s
 }

--- a/VisualQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/VisualQnA/docker_compose/intel/cpu/xeon/README.md
@@ -67,12 +67,12 @@ docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$htt
 ### 4. Pull TGI Xeon Image

 ```bash
-docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
 ```

 Then run the command `docker images`, you will have the following 5 Docker Images:

-1. `ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu`
+1. `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`
 2. `opea/lvm-tgi:latest`
 3. `opea/visualqna:latest`
 4. `opea/visualqna-ui:latest`
--- a/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@

 services:
  llava-tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi-llava-xeon-server
    ports:
      - "8399:80"
--- a/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml
+++ b/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml
@@ -216,7 +216,7 @@ spec:
                name: visualqna-tgi-config
          securityContext:
            {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/VisualQnA/tests/test_compose_on_xeon.sh
+++ b/VisualQnA/tests/test_compose_on_xeon.sh
@@ -21,7 +21,7 @@ function build_docker_images() {
    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
    docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log

-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    docker images && sleep 1s
 }

--- a/VisualQnA/ui/svelte/package.json
+++ b/VisualQnA/ui/svelte/package.json
@@ -15,8 +15,7 @@
    "@fortawesome/free-solid-svg-icons": "6.2.0",
    "@playwright/test": "^1.33.0",
    "@sveltejs/adapter-auto": "1.0.0-next.75",
-    "@sveltejs/adapter-static": "^3.0.0",
-    "@sveltejs/kit": "^2.0.0",
+    "@sveltejs/kit": "^1.30.4",
    "@tailwindcss/typography": "0.5.7",
    "@types/debug": "4.1.7",
    "@types/node": "^20.12.13",
@@ -29,20 +28,21 @@
    "eslint": "^8.16.0",
    "eslint-config-prettier": "^8.3.0",
    "eslint-plugin-neverthrow": "1.1.4",
+    "eslint-plugin-svelte3": "^4.0.0",
    "postcss": "^8.4.31",
    "postcss-load-config": "^4.0.1",
    "postcss-preset-env": "^8.3.2",
    "prettier": "^2.8.8",
    "prettier-plugin-svelte": "^2.7.0",
    "prettier-plugin-tailwindcss": "^0.3.0",
-    "svelte": "^4.0.0",
-    "svelte-check": "^3.0.0",
+    "svelte": "^3.59.1",
+    "svelte-check": "^2.7.1",
    "svelte-fa": "3.0.3",
-    "svelte-preprocess": "^6.0.2",
+    "svelte-preprocess": "^4.10.7",
    "tailwindcss": "^3.1.5",
    "tslib": "^2.3.1",
-    "typescript": "^5.0.0",
-    "vite": "^5.0.0"
+    "typescript": "^4.7.4",
+    "vite": "^4.5.2"
  },
  "type": "module",
  "dependencies": {