Update TGI CPU image to latest official release 2.4.0 (#1035)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
lvliang-intel
2024-11-04 11:28:43 +08:00
committed by GitHub
parent 3372b9d480
commit 0306c620b5
40 changed files with 49 additions and 49 deletions

View File

@@ -195,7 +195,7 @@ For users in China who are unable to download models directly from Huggingface,
export HF_TOKEN=${your_hf_token}
export HF_ENDPOINT="https://hf-mirror.com"
model_name="Intel/neural-chat-7b-v3-3"
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id $model_name
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model_name
```
2. Offline
@@ -209,7 +209,7 @@ For users in China who are unable to download models directly from Huggingface,
```bash
export HF_TOKEN=${your_hf_token}
export model_path="/path/to/model"
docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id /data
docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id /data
```
### Setup Environment Variables

View File

@@ -73,7 +73,7 @@ services:
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-service
ports:
- "9009:80"

View File

@@ -72,7 +72,7 @@ services:
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-service
ports:
- "6042:80"

View File

@@ -57,7 +57,7 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-service
ports:
- "9009:80"

View File

@@ -18,7 +18,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
- tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
- retriever: opea/retriever-redis:latest
- tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
- tgi-service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
- chaqna-xeon-backend-server: opea/chatqna:latest
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.

View File

@@ -1100,7 +1100,7 @@ spec:
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
@@ -1180,7 +1180,7 @@ spec:
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data

View File

@@ -922,7 +922,7 @@ spec:
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data

View File

@@ -925,7 +925,7 @@ spec:
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data

View File

@@ -22,7 +22,7 @@ function build_docker_images() {
service_list="chatqna chatqna-ui chatqna-conversation-ui dataprep-redis retriever-redis nginx"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
docker images && sleep 1s