Upgrade TGI Gaudi version to v2.0.6 (#1088)
Signed-off-by: lvliang-intel <liang1.lv@intel.com> Co-authored-by: chen, suyue <suyue.chen@intel.com>
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-server:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-server
|
||||
ports:
|
||||
- "8085:80"
|
||||
|
||||
@@ -51,7 +51,7 @@ services:
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "3006:80"
|
||||
|
||||
@@ -25,7 +25,7 @@ The AudioQnA uses the below prebuilt images if you choose a Xeon deployment
|
||||
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
|
||||
For Gaudi:
|
||||
|
||||
- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
- whisper-gaudi: opea/whisper-gaudi:latest
|
||||
- speecht5-gaudi: opea/speecht5-gaudi:latest
|
||||
|
||||
|
||||
@@ -271,7 +271,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="audioqna whisper-gaudi asr llm-tgi speecht5-gaudi tts"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="audioqna whisper asr llm-tgi speecht5 tts"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ services:
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "3006:80"
|
||||
|
||||
@@ -29,7 +29,7 @@ function build_docker_images() {
|
||||
service_list="avatarchatbot whisper-gaudi asr llm-tgi speecht5-gaudi tts wav2lip-gaudi animation"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ function build_docker_images() {
|
||||
service_list="avatarchatbot whisper asr llm-tgi speecht5 tts wav2lip animation"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ To setup a LLM model, we can use [tgi-gaudi](https://github.com/huggingface/tgi-
|
||||
docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2
|
||||
|
||||
# for better performance, set `PREFILL_BATCH_BUCKET_SIZE`, `BATCH_BUCKET_SIZE`, `max-batch-total-tokens`, `max-batch-prefill-tokens`
|
||||
docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} -e PREFILL_BATCH_BUCKET_SIZE=1 -e BATCH_BUCKET_SIZE=8 --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 2048
|
||||
docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} -e PREFILL_BATCH_BUCKET_SIZE=1 -e BATCH_BUCKET_SIZE=8 --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 2048
|
||||
```
|
||||
|
||||
### Prepare Dataset
|
||||
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
|
||||
@@ -38,7 +38,7 @@ opea_micro_services:
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
|
||||
@@ -192,7 +192,7 @@ For users in China who are unable to download models directly from Huggingface,
|
||||
export HF_TOKEN=${your_hf_token}
|
||||
export HF_ENDPOINT="https://hf-mirror.com"
|
||||
model_name="Intel/neural-chat-7b-v3-3"
|
||||
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model_name --max-input-tokens 1024 --max-total-tokens 2048
|
||||
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id $model_name --max-input-tokens 1024 --max-total-tokens 2048
|
||||
```
|
||||
|
||||
2. Offline
|
||||
@@ -206,7 +206,7 @@ For users in China who are unable to download models directly from Huggingface,
|
||||
```bash
|
||||
export HF_TOKEN=${your_hf_token}
|
||||
export model_path="/path/to/model"
|
||||
docker run -p 8008:80 -v $model_path:/data --name tgi_service --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id /data --max-input-tokens 1024 --max-total-tokens 2048
|
||||
docker run -p 8008:80 -v $model_path:/data --name tgi_service --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id /data --max-input-tokens 1024 --max-total-tokens 2048
|
||||
```
|
||||
|
||||
### Setup Environment Variables
|
||||
|
||||
@@ -78,7 +78,7 @@ services:
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8005:80"
|
||||
|
||||
@@ -26,7 +26,7 @@ services:
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tgi-guardrails-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-guardrails-server
|
||||
ports:
|
||||
- "8088:80"
|
||||
@@ -117,7 +117,7 @@ services:
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
|
||||
@@ -57,7 +57,7 @@ services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8005:80"
|
||||
|
||||
@@ -48,16 +48,16 @@ f810f3b4d329 opea/embedding-tei:latest "python e
|
||||
2fa17d84605f opea/dataprep-redis:latest "python prepare_doc_…" 2 minutes ago Up 2 minutes 0.0.0.0:6007->6007/tcp, :::6007->6007/tcp dataprep-redis-server
|
||||
69e1fb59e92c opea/retriever-redis:latest "/home/user/comps/re…" 2 minutes ago Up 2 minutes 0.0.0.0:7000->7000/tcp, :::7000->7000/tcp retriever-redis-server
|
||||
313b9d14928a opea/reranking-tei:latest "python reranking_te…" 2 minutes ago Up 2 minutes 0.0.0.0:8000->8000/tcp, :::8000->8000/tcp reranking-tei-gaudi-server
|
||||
05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
|
||||
05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.6 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
|
||||
174bd43fa6b5 ghcr.io/huggingface/tei-gaudi:latest "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, :::8090->80/tcp tei-embedding-gaudi-server
|
||||
74084469aa33 redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 2 minutes ago Up 2 minutes 0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp redis-vector-db
|
||||
88399dbc9e43 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8808->80/tcp, :::8808->80/tcp tei-reranking-gaudi-server
|
||||
```
|
||||
|
||||
In this case, `ghcr.io/huggingface/tgi-gaudi:2.0.5` Existed.
|
||||
In this case, `ghcr.io/huggingface/tgi-gaudi:2.0.6` Existed.
|
||||
|
||||
```
|
||||
05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
|
||||
05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.6 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
|
||||
```
|
||||
|
||||
Next we can check the container logs to get to know what happened during the docker start.
|
||||
@@ -68,7 +68,7 @@ Check the log of container by:
|
||||
|
||||
`docker logs <CONTAINER ID> -t`
|
||||
|
||||
View the logs of `ghcr.io/huggingface/tgi-gaudi:2.0.5`
|
||||
View the logs of `ghcr.io/huggingface/tgi-gaudi:2.0.6`
|
||||
|
||||
`docker logs 05c40b636239 -t`
|
||||
|
||||
@@ -97,7 +97,7 @@ So just make sure the devices are available.
|
||||
Here is another failure example:
|
||||
|
||||
```
|
||||
f7a08f9867f9 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 16 seconds ago Exited (2) 14 seconds ago tgi-gaudi-server
|
||||
f7a08f9867f9 ghcr.io/huggingface/tgi-gaudi:2.0.6 "text-generation-lau…" 16 seconds ago Exited (2) 14 seconds ago tgi-gaudi-server
|
||||
```
|
||||
|
||||
Check the log by `docker logs f7a08f9867f9 -t`.
|
||||
@@ -114,7 +114,7 @@ View the docker input parameters in `./ChatQnA/docker_compose/intel/hpu/gaudi/co
|
||||
|
||||
```
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
|
||||
@@ -25,7 +25,7 @@ Should you desire to use the Gaudi accelerator, two alternate images are used fo
|
||||
For Gaudi:
|
||||
|
||||
- tei-embedding-service: ghcr.io/huggingface/tei-gaudi:latest
|
||||
- tgi-service: gghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
- tgi-service: gghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
|
||||
> [NOTE]
|
||||
> Please refer to [Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker_compose/intel/cpu/xeon/README.md) or [Gaudi README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker_compose/intel/hpu/gaudi/README.md) to build the OPEA images. These too will be available on Docker Hub soon to simplify use.
|
||||
|
||||
@@ -1103,7 +1103,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
|
||||
imagePullPolicy: Always
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -1184,8 +1184,13 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
<<<<<<< HEAD
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
|
||||
imagePullPolicy: IfNotPresent
|
||||
=======
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
imagePullPolicy: Always
|
||||
>>>>>>> e3187be819ad088c24bf1b2cbb419255af0f2be3
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
|
||||
@@ -924,7 +924,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
|
||||
imagePullPolicy: Always
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="chatqna-guardrails chatqna-ui dataprep-redis retriever-redis guardrails-tgi nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="chatqna chatqna-ui dataprep-redis retriever-redis nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ function build_docker_images() {
|
||||
service_list="chatqna chatqna-ui dataprep-redis retriever-redis vllm nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
docker images && sleep 1s
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="chatqna-without-rerank chatqna-ui dataprep-redis retriever-redis nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="chatqna-without-rerank chatqna-ui chatqna-conversation-ui dataprep-redis retriever-redis nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
docker images && sleep 1s
|
||||
|
||||
@@ -6,7 +6,7 @@ opea_micro_services:
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8028:80"
|
||||
|
||||
@@ -405,7 +405,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="codegen codegen-ui llm-tgi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ opea_micro_services:
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: codetrans-tgi-service
|
||||
ports:
|
||||
- "8008:80"
|
||||
|
||||
@@ -405,7 +405,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="codetrans codetrans-ui llm-tgi nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ First of all, you need to build Docker Images locally. This step can be ignored
|
||||
As TGI Gaudi has been officially published as a Docker image, we simply need to pull it:
|
||||
|
||||
```bash
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
```
|
||||
|
||||
### 2. Build LLM Image
|
||||
@@ -53,7 +53,7 @@ docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT
|
||||
|
||||
Then run the command `docker images`, you will have the following Docker Images:
|
||||
|
||||
1. `ghcr.io/huggingface/tgi-gaudi:2.0.5`
|
||||
1. `ghcr.io/huggingface/tgi-gaudi:2.0.6`
|
||||
2. `opea/llm-docsum-tgi:latest`
|
||||
3. `opea/docsum:latest`
|
||||
4. `opea/docsum-ui:latest`
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
|
||||
@@ -6,7 +6,7 @@ opea_micro_services:
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
|
||||
@@ -9,7 +9,7 @@ The DocSum application is defined as a Custom Resource (CR) file that the above
|
||||
|
||||
The DocSum pipeline uses prebuilt images. The Xeon version uses the prebuilt image `llm-docsum-tgi:latest` which internally leverages the
|
||||
the image `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
|
||||
service tgi-gaudi-svc, which uses the image `ghcr.io/huggingface/tgi-gaudi:2.0.5`. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use `Intel/neural-chat-7b-v3-3`.
|
||||
service tgi-gaudi-svc, which uses the image `ghcr.io/huggingface/tgi-gaudi:2.0.6`. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use `Intel/neural-chat-7b-v3-3`.
|
||||
|
||||
[NOTE]
|
||||
Refer to [Docker Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/DocSum/docker_compose/intel/cpu/xeon/README.md) or
|
||||
|
||||
@@ -405,7 +405,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="docsum docsum-ui llm-docsum-tgi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ docker run -it --rm \
|
||||
--ipc=host \
|
||||
-e HTTPS_PROXY=$https_proxy \
|
||||
-e HTTP_PROXY=$https_proxy \
|
||||
ghcr.io/huggingface/tgi-gaudi:2.0.5 \
|
||||
ghcr.io/huggingface/tgi-gaudi:2.0.6 \
|
||||
--model-id $model_name \
|
||||
--max-input-tokens $max_input_tokens \
|
||||
--max-total-tokens $max_total_tokens \
|
||||
|
||||
@@ -11,7 +11,7 @@ First of all, you need to build Docker Images locally. This step can be ignored
|
||||
As TGI Gaudi has been officially published as a Docker image, we simply need to pull it:
|
||||
|
||||
```bash
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
```
|
||||
|
||||
### 2. Build LLM Image
|
||||
@@ -53,7 +53,7 @@ docker build -t opea/faqgen-react-ui:latest --build-arg https_proxy=$https_proxy
|
||||
|
||||
Then run the command `docker images`, you will have the following Docker Images:
|
||||
|
||||
1. `ghcr.io/huggingface/tgi-gaudi:2.0.5`
|
||||
1. `ghcr.io/huggingface/tgi-gaudi:2.0.6`
|
||||
2. `opea/llm-faqgen-tgi:latest`
|
||||
3. `opea/faqgen:latest`
|
||||
4. `opea/faqgen-ui:latest`
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
|
||||
@@ -6,7 +6,7 @@ opea_micro_services:
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
|
||||
@@ -47,7 +47,7 @@ spec:
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="faqgen faqgen-ui llm-faqgen-tgi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
ipc: host
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
tgi-gaudi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "6005:80"
|
||||
|
||||
@@ -23,7 +23,7 @@ function build_docker_images() {
|
||||
service_list="graphrag dataprep-neo4j-llamaindex retriever-neo4j-llamaindex chatqna-gaudi-ui-server chatqna-gaudi-nginx-server"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull neo4j:latest
|
||||
docker images && sleep 1s
|
||||
|
||||
@@ -80,7 +80,7 @@ docker build --no-cache -t opea/retriever-multimodal-redis:latest --build-arg ht
|
||||
Build TGI Gaudi image
|
||||
|
||||
```bash
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
```
|
||||
|
||||
Build lvm-tgi microservice image
|
||||
@@ -118,7 +118,7 @@ Then run the command `docker images`, you will have the following 8 Docker Image
|
||||
|
||||
1. `opea/dataprep-multimodal-redis:latest`
|
||||
2. `opea/lvm-tgi:latest`
|
||||
3. `ghcr.io/huggingface/tgi-gaudi:2.0.5`
|
||||
3. `ghcr.io/huggingface/tgi-gaudi:2.0.6`
|
||||
4. `opea/retriever-multimodal-redis:latest`
|
||||
5. `opea/embedding-multimodal:latest`
|
||||
6. `opea/embedding-multimodal-bridgetower:latest`
|
||||
|
||||
@@ -69,7 +69,7 @@ services:
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
restart: unless-stopped
|
||||
tgi-gaudi:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-llava-gaudi-server
|
||||
ports:
|
||||
- "8399:80"
|
||||
@@ -84,6 +84,10 @@ services:
|
||||
PREFILL_BATCH_BUCKET_SIZE: 1
|
||||
BATCH_BUCKET_SIZE: 1
|
||||
MAX_BATCH_TOTAL_TOKENS: 4096
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -25,7 +25,7 @@ function build_docker_images() {
|
||||
service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding-multimodal retriever-multimodal-redis lvm-tgi dataprep-multimodal-redis"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -80,7 +80,7 @@ services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "3006:80"
|
||||
|
||||
@@ -23,7 +23,7 @@ function build_docker_images() {
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
|
||||
@@ -362,7 +362,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="translation translation-ui llm-tgi nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ opea_micro_services:
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
|
||||
@@ -18,7 +18,7 @@ docker build --no-cache -t opea/nginx:latest --build-arg https_proxy=$https_prox
|
||||
### 2. Pull TGI Gaudi Image
|
||||
|
||||
```bash
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
```
|
||||
|
||||
### 3. Build MegaService Docker Image
|
||||
@@ -43,7 +43,7 @@ docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$htt
|
||||
|
||||
Then run the command `docker images`, you will have the following 5 Docker Images:
|
||||
|
||||
1. `ghcr.io/huggingface/tgi-gaudi:2.0.5`
|
||||
1. `ghcr.io/huggingface/tgi-gaudi:2.0.6`
|
||||
2. `opea/lvm-tgi:latest`
|
||||
3. `opea/visualqna:latest`
|
||||
4. `opea/visualqna-ui:latest`
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
llava-tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-llava-gaudi-server
|
||||
ports:
|
||||
- "8399:80"
|
||||
|
||||
@@ -21,7 +21,7 @@ function build_docker_images() {
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user