Using TGI official release docker image for intel cpu (#581)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
This commit is contained in:
lvliang-intel
2024-08-18 17:17:44 +08:00
committed by GitHub
parent e81e0e557c
commit b2771ad3f2
25 changed files with 48 additions and 39 deletions

View File

@@ -3,7 +3,7 @@
services:
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:1.4
image: ghcr.io/huggingface/text-generation-inference:latest-intel-cpu
container_name: codetrans-tgi-service
ports:
- "8008:80"
@@ -15,7 +15,7 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
command: --model-id ${LLM_MODEL_ID}
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-server

View File

@@ -41,6 +41,7 @@ metadata:
data:
MODEL_ID: "HuggingFaceH4/mistral-7b-grok"
PORT: "2080"
CUDA_GRAPHS: "0"
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
HF_TOKEN: "insert-your-huggingface-token-here"
MAX_INPUT_TOKENS: "1024"
@@ -229,7 +230,7 @@ spec:
name: codetrans-tgi-config
securityContext:
{}
image: "ghcr.io/huggingface/text-generation-inference:2.1.0"
image: "ghcr.io/huggingface/text-generation-inference:latest-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data