Using TGI official release docker image for intel cpu (#581)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
2024-08-18 17:17:44 +08:00
parent e81e0e557c
commit b2771ad3f2
25 changed files with 48 additions and 39 deletions
--- a/AudioQnA/docker/xeon/compose.yaml
+++ b/AudioQnA/docker/xeon/compose.yaml
@@ -41,7 +41,7 @@ services:
    environment:
      TTS_ENDPOINT: ${TTS_ENDPOINT}
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:1.4
+    image: ghcr.io/huggingface/text-generation-inference:latest-intel-cpu
    container_name: tgi-service
    ports:
      - "3006:80"
@@ -53,7 +53,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-    command: --model-id ${LLM_MODEL_ID}
+    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
  llm:
    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
    container_name: llm-tgi-server