Upgrade TGI Gaudi version to v2.0.6 (#1088)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
This commit is contained in:
lvliang-intel
2024-11-12 14:38:22 +08:00
committed by GitHub
parent f7a7f8aa3f
commit 1ff85f6a85
74 changed files with 94 additions and 85 deletions

View File

@@ -192,7 +192,7 @@ For users in China who are unable to download models directly from Huggingface,
export HF_TOKEN=${your_hf_token}
export HF_ENDPOINT="https://hf-mirror.com"
model_name="Intel/neural-chat-7b-v3-3"
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model_name --max-input-tokens 1024 --max-total-tokens 2048
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id $model_name --max-input-tokens 1024 --max-total-tokens 2048
```
2. Offline
@@ -206,7 +206,7 @@ For users in China who are unable to download models directly from Huggingface,
```bash
export HF_TOKEN=${your_hf_token}
export model_path="/path/to/model"
docker run -p 8008:80 -v $model_path:/data --name tgi_service --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id /data --max-input-tokens 1024 --max-total-tokens 2048
docker run -p 8008:80 -v $model_path:/data --name tgi_service --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id /data --max-input-tokens 1024 --max-total-tokens 2048
```
### Setup Environment Variables

View File

@@ -78,7 +78,7 @@ services:
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
container_name: tgi-gaudi-server
ports:
- "8005:80"

View File

@@ -26,7 +26,7 @@ services:
TEI_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tgi-guardrails-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
container_name: tgi-guardrails-server
ports:
- "8088:80"
@@ -117,7 +117,7 @@ services:
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
container_name: tgi-gaudi-server
ports:
- "8008:80"

View File

@@ -57,7 +57,7 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
container_name: tgi-gaudi-server
ports:
- "8005:80"

View File

@@ -48,16 +48,16 @@ f810f3b4d329 opea/embedding-tei:latest "python e
2fa17d84605f opea/dataprep-redis:latest "python prepare_doc_…" 2 minutes ago Up 2 minutes 0.0.0.0:6007->6007/tcp, :::6007->6007/tcp dataprep-redis-server
69e1fb59e92c opea/retriever-redis:latest "/home/user/comps/re…" 2 minutes ago Up 2 minutes 0.0.0.0:7000->7000/tcp, :::7000->7000/tcp retriever-redis-server
313b9d14928a opea/reranking-tei:latest "python reranking_te…" 2 minutes ago Up 2 minutes 0.0.0.0:8000->8000/tcp, :::8000->8000/tcp reranking-tei-gaudi-server
05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.6 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
174bd43fa6b5 ghcr.io/huggingface/tei-gaudi:latest "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, :::8090->80/tcp tei-embedding-gaudi-server
74084469aa33 redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 2 minutes ago Up 2 minutes 0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp redis-vector-db
88399dbc9e43 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8808->80/tcp, :::8808->80/tcp tei-reranking-gaudi-server
```
In this case, `ghcr.io/huggingface/tgi-gaudi:2.0.5` Existed.
In this case, `ghcr.io/huggingface/tgi-gaudi:2.0.6` Existed.
```
05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.6 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
```
Next we can check the container logs to get to know what happened during the docker start.
@@ -68,7 +68,7 @@ Check the log of container by:
`docker logs <CONTAINER ID> -t`
View the logs of `ghcr.io/huggingface/tgi-gaudi:2.0.5`
View the logs of `ghcr.io/huggingface/tgi-gaudi:2.0.6`
`docker logs 05c40b636239 -t`
@@ -97,7 +97,7 @@ So just make sure the devices are available.
Here is another failure example:
```
f7a08f9867f9 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 16 seconds ago Exited (2) 14 seconds ago tgi-gaudi-server
f7a08f9867f9 ghcr.io/huggingface/tgi-gaudi:2.0.6 "text-generation-lau…" 16 seconds ago Exited (2) 14 seconds ago tgi-gaudi-server
```
Check the log by `docker logs f7a08f9867f9 -t`.
@@ -114,7 +114,7 @@ View the docker input parameters in `./ChatQnA/docker_compose/intel/hpu/gaudi/co
```
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
container_name: tgi-gaudi-server
ports:
- "8008:80"