Using TGI official release docker image for intel cpu (#581)
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:1.4
|
||||
image: ghcr.io/huggingface/text-generation-inference:latest-intel-cpu
|
||||
container_name: codetrans-tgi-service
|
||||
ports:
|
||||
- "8008:80"
|
||||
@@ -15,7 +15,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
command: --model-id ${LLM_MODEL_ID}
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-server
|
||||
|
||||
@@ -41,6 +41,7 @@ metadata:
|
||||
data:
|
||||
MODEL_ID: "HuggingFaceH4/mistral-7b-grok"
|
||||
PORT: "2080"
|
||||
CUDA_GRAPHS: "0"
|
||||
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
MAX_INPUT_TOKENS: "1024"
|
||||
@@ -229,7 +230,7 @@ spec:
|
||||
name: codetrans-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.1.0"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:latest-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
Reference in New Issue
Block a user