Using TGI official release docker image for intel cpu (#581)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
2024-08-18 17:17:44 +08:00
parent e81e0e557c
commit b2771ad3f2
25 changed files with 48 additions and 39 deletions
--- a/CodeTrans/docker/xeon/compose.yaml
+++ b/CodeTrans/docker/xeon/compose.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:1.4
+    image: ghcr.io/huggingface/text-generation-inference:latest-intel-cpu
    container_name: codetrans-tgi-service
    ports:
      - "8008:80"
@@ -15,7 +15,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-    command: --model-id ${LLM_MODEL_ID}
+    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
  llm:
    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
    container_name: llm-tgi-server
--- a/CodeTrans/kubernetes/manifests/xeon/codetrans.yaml
+++ b/CodeTrans/kubernetes/manifests/xeon/codetrans.yaml
@@ -41,6 +41,7 @@ metadata:
 data:
  MODEL_ID: "HuggingFaceH4/mistral-7b-grok"
  PORT: "2080"
+  CUDA_GRAPHS: "0"
  HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
  HF_TOKEN: "insert-your-huggingface-token-here"
  MAX_INPUT_TOKENS: "1024"
@@ -229,7 +230,7 @@ spec:
                name: codetrans-tgi-config
          securityContext:
            {}
-          image: "ghcr.io/huggingface/text-generation-inference:2.1.0"
+          image: "ghcr.io/huggingface/text-generation-inference:latest-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data