diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml b/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml index cd5dd202d..59c5671e1 100644 --- a/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml +++ b/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml @@ -3,7 +3,7 @@ services: tgi-server: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-server ports: - "8085:80" diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml index c3f885fce..b536522c4 100644 --- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -51,7 +51,7 @@ services: environment: TTS_ENDPOINT: ${TTS_ENDPOINT} tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-gaudi-server ports: - "3006:80" diff --git a/AudioQnA/kubernetes/intel/README_gmc.md b/AudioQnA/kubernetes/intel/README_gmc.md index 30d879e19..767fdf366 100644 --- a/AudioQnA/kubernetes/intel/README_gmc.md +++ b/AudioQnA/kubernetes/intel/README_gmc.md @@ -25,7 +25,7 @@ The AudioQnA uses the below prebuilt images if you choose a Xeon deployment Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services. For Gaudi: -- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.5 +- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.6 - whisper-gaudi: opea/whisper-gaudi:latest - speecht5-gaudi: opea/speecht5-gaudi:latest diff --git a/AudioQnA/kubernetes/intel/hpu/gaudi/manifest/audioqna.yaml b/AudioQnA/kubernetes/intel/hpu/gaudi/manifest/audioqna.yaml index 2d0c567e3..6659a7811 100644 --- a/AudioQnA/kubernetes/intel/hpu/gaudi/manifest/audioqna.yaml +++ b/AudioQnA/kubernetes/intel/hpu/gaudi/manifest/audioqna.yaml @@ -271,7 +271,7 @@ spec: - envFrom: - configMapRef: name: audio-qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 name: llm-dependency-deploy-demo securityContext: capabilities: diff --git a/AudioQnA/tests/test_compose_on_gaudi.sh b/AudioQnA/tests/test_compose_on_gaudi.sh index 69270736d..e626b2671 100644 --- a/AudioQnA/tests/test_compose_on_gaudi.sh +++ b/AudioQnA/tests/test_compose_on_gaudi.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="audioqna whisper-gaudi asr llm-tgi speecht5-gaudi tts" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker images && sleep 1s } diff --git a/AudioQnA/tests/test_compose_on_xeon.sh b/AudioQnA/tests/test_compose_on_xeon.sh index b36b5c7de..926a51a33 100644 --- a/AudioQnA/tests/test_compose_on_xeon.sh +++ b/AudioQnA/tests/test_compose_on_xeon.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="audioqna whisper asr llm-tgi speecht5 tts" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker images && sleep 1s } diff --git a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml index 06a7e4e05..298149ef1 100644 --- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml @@ -54,7 +54,7 @@ services: environment: TTS_ENDPOINT: ${TTS_ENDPOINT} tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-gaudi-server ports: - "3006:80" diff --git a/AvatarChatbot/tests/test_compose_on_gaudi.sh b/AvatarChatbot/tests/test_compose_on_gaudi.sh index ebb185120..ae404b236 100755 --- a/AvatarChatbot/tests/test_compose_on_gaudi.sh +++ b/AvatarChatbot/tests/test_compose_on_gaudi.sh @@ -29,7 +29,7 @@ function build_docker_images() { service_list="avatarchatbot whisper-gaudi asr llm-tgi speecht5-gaudi tts wav2lip-gaudi animation" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker images && sleep 1s } diff --git a/AvatarChatbot/tests/test_compose_on_xeon.sh b/AvatarChatbot/tests/test_compose_on_xeon.sh index 1b1780a1b..2bed682cf 100755 --- a/AvatarChatbot/tests/test_compose_on_xeon.sh +++ b/AvatarChatbot/tests/test_compose_on_xeon.sh @@ -29,7 +29,7 @@ function build_docker_images() { service_list="avatarchatbot whisper asr llm-tgi speecht5 tts wav2lip animation" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker images && sleep 1s } diff --git a/ChatQnA/benchmark/accuracy/README.md b/ChatQnA/benchmark/accuracy/README.md index 0cfae4564..c07313948 100644 --- a/ChatQnA/benchmark/accuracy/README.md +++ b/ChatQnA/benchmark/accuracy/README.md @@ -48,7 +48,7 @@ To setup a LLM model, we can use [tgi-gaudi](https://github.com/huggingface/tgi- docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2 # for better performance, set `PREFILL_BATCH_BUCKET_SIZE`, `BATCH_BUCKET_SIZE`, `max-batch-total-tokens`, `max-batch-prefill-tokens` -docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} -e PREFILL_BATCH_BUCKET_SIZE=1 -e BATCH_BUCKET_SIZE=8 --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 2048 +docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} -e PREFILL_BATCH_BUCKET_SIZE=1 -e BATCH_BUCKET_SIZE=8 --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 2048 ``` ### Prepare Dataset diff --git a/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/eight_gaudi/oob_eight_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/eight_gaudi/oob_eight_gaudi_with_rerank.yaml index 8e74fe6ad..6198e2fac 100644 --- a/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/eight_gaudi/oob_eight_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/eight_gaudi/oob_eight_gaudi_with_rerank.yaml @@ -237,7 +237,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent name: llm-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml index 95f00644c..9c04cfc06 100644 --- a/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml @@ -237,7 +237,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent name: llm-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml index 4fb165707..30df76b97 100644 --- a/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml @@ -237,7 +237,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent name: llm-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml index 0d379f5b5..54cefbaf9 100644 --- a/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml @@ -237,7 +237,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent name: llm-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/eight_gaudi/oob_eight_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/eight_gaudi/oob_eight_gaudi_without_rerank.yaml index 42e9ed4d4..91554a812 100644 --- a/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/eight_gaudi/oob_eight_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/eight_gaudi/oob_eight_gaudi_without_rerank.yaml @@ -237,7 +237,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent name: llm-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml index 0338a8768..7b81e252a 100644 --- a/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml @@ -237,7 +237,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent name: llm-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml index 9d423ffaf..61346908f 100644 --- a/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml @@ -237,7 +237,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent name: llm-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml index f405bcce6..72ada0191 100644 --- a/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml @@ -237,7 +237,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent name: llm-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/eight_gaudi/eight_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/eight_gaudi/eight_gaudi_with_rerank.yaml index 9b47fa0be..80122d731 100644 --- a/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/eight_gaudi/eight_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/eight_gaudi/eight_gaudi_with_rerank.yaml @@ -255,7 +255,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent name: llm-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml index 813a8e44c..970d2652a 100644 --- a/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml @@ -255,7 +255,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent name: llm-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml index 32e5bd884..a38294b3b 100644 --- a/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml @@ -255,7 +255,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent name: llm-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml index 055f66f0d..e6f94894c 100644 --- a/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml @@ -255,7 +255,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent name: llm-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/eight_gaudi/tuned_eight_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/eight_gaudi/tuned_eight_gaudi_without_rerank.yaml index 5d6793cd9..365cd5ab5 100644 --- a/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/eight_gaudi/tuned_eight_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/eight_gaudi/tuned_eight_gaudi_without_rerank.yaml @@ -255,7 +255,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent name: llm-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml index 0cfb09b0f..6af61b1ff 100644 --- a/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml @@ -255,7 +255,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent name: llm-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml index a9f7e4639..dc56cc96f 100644 --- a/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml @@ -255,7 +255,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent name: llm-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml index a072d36ff..f14ebc215 100644 --- a/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml @@ -255,7 +255,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent name: llm-dependency-deploy ports: diff --git a/ChatQnA/chatqna.yaml b/ChatQnA/chatqna.yaml index e8a2d2735..89aac8aa8 100644 --- a/ChatQnA/chatqna.yaml +++ b/ChatQnA/chatqna.yaml @@ -38,7 +38,7 @@ opea_micro_services: tgi-service: host: ${TGI_SERVICE_IP} ports: ${TGI_SERVICE_PORT} - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 volumes: - "./data:/data" runtime: habana diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md index 02620ea7b..ad56d525a 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md @@ -192,7 +192,7 @@ For users in China who are unable to download models directly from Huggingface, export HF_TOKEN=${your_hf_token} export HF_ENDPOINT="https://hf-mirror.com" model_name="Intel/neural-chat-7b-v3-3" - docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model_name --max-input-tokens 1024 --max-total-tokens 2048 + docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id $model_name --max-input-tokens 1024 --max-total-tokens 2048 ``` 2. Offline @@ -206,7 +206,7 @@ For users in China who are unable to download models directly from Huggingface, ```bash export HF_TOKEN=${your_hf_token} export model_path="/path/to/model" - docker run -p 8008:80 -v $model_path:/data --name tgi_service --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id /data --max-input-tokens 1024 --max-total-tokens 2048 + docker run -p 8008:80 -v $model_path:/data --name tgi_service --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id /data --max-input-tokens 1024 --max-total-tokens 2048 ``` ### Setup Environment Variables diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml index a47575b31..0658c0c2f 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -78,7 +78,7 @@ services: MAX_WARMUP_SEQUENCE_LENGTH: 512 command: --model-id ${RERANK_MODEL_ID} --auto-truncate tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-gaudi-server ports: - "8005:80" diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml index 570d689c2..79cf3f371 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml @@ -26,7 +26,7 @@ services: TEI_ENDPOINT: http://tei-embedding-service:80 HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} tgi-guardrails-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-guardrails-server ports: - "8088:80" @@ -117,7 +117,7 @@ services: MAX_WARMUP_SEQUENCE_LENGTH: 512 command: --model-id ${RERANK_MODEL_ID} --auto-truncate tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-gaudi-server ports: - "8008:80" diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml index 9117a13a6..524b44c1a 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml @@ -57,7 +57,7 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-gaudi-server ports: - "8005:80" diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md index 7448ae625..9214960c2 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md @@ -48,16 +48,16 @@ f810f3b4d329 opea/embedding-tei:latest "python e 2fa17d84605f opea/dataprep-redis:latest "python prepare_doc_…" 2 minutes ago Up 2 minutes 0.0.0.0:6007->6007/tcp, :::6007->6007/tcp dataprep-redis-server 69e1fb59e92c opea/retriever-redis:latest "/home/user/comps/re…" 2 minutes ago Up 2 minutes 0.0.0.0:7000->7000/tcp, :::7000->7000/tcp retriever-redis-server 313b9d14928a opea/reranking-tei:latest "python reranking_te…" 2 minutes ago Up 2 minutes 0.0.0.0:8000->8000/tcp, :::8000->8000/tcp reranking-tei-gaudi-server -05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server +05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.6 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server 174bd43fa6b5 ghcr.io/huggingface/tei-gaudi:latest "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, :::8090->80/tcp tei-embedding-gaudi-server 74084469aa33 redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 2 minutes ago Up 2 minutes 0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp redis-vector-db 88399dbc9e43 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8808->80/tcp, :::8808->80/tcp tei-reranking-gaudi-server ``` -In this case, `ghcr.io/huggingface/tgi-gaudi:2.0.5` Existed. +In this case, `ghcr.io/huggingface/tgi-gaudi:2.0.6` Existed. ``` -05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server +05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.6 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server ``` Next we can check the container logs to get to know what happened during the docker start. @@ -68,7 +68,7 @@ Check the log of container by: `docker logs -t` -View the logs of `ghcr.io/huggingface/tgi-gaudi:2.0.5` +View the logs of `ghcr.io/huggingface/tgi-gaudi:2.0.6` `docker logs 05c40b636239 -t` @@ -97,7 +97,7 @@ So just make sure the devices are available. Here is another failure example: ``` -f7a08f9867f9 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 16 seconds ago Exited (2) 14 seconds ago tgi-gaudi-server +f7a08f9867f9 ghcr.io/huggingface/tgi-gaudi:2.0.6 "text-generation-lau…" 16 seconds ago Exited (2) 14 seconds ago tgi-gaudi-server ``` Check the log by `docker logs f7a08f9867f9 -t`. @@ -114,7 +114,7 @@ View the docker input parameters in `./ChatQnA/docker_compose/intel/hpu/gaudi/co ``` tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-gaudi-server ports: - "8008:80" diff --git a/ChatQnA/kubernetes/intel/README_gmc.md b/ChatQnA/kubernetes/intel/README_gmc.md index 860bae720..a2ffed26b 100644 --- a/ChatQnA/kubernetes/intel/README_gmc.md +++ b/ChatQnA/kubernetes/intel/README_gmc.md @@ -25,7 +25,7 @@ Should you desire to use the Gaudi accelerator, two alternate images are used fo For Gaudi: - tei-embedding-service: ghcr.io/huggingface/tei-gaudi:latest -- tgi-service: gghcr.io/huggingface/tgi-gaudi:2.0.5 +- tgi-service: gghcr.io/huggingface/tgi-gaudi:2.0.6 > [NOTE] > Please refer to [Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker_compose/intel/cpu/xeon/README.md) or [Gaudi README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker_compose/intel/hpu/gaudi/README.md) to build the OPEA images. These too will be available on Docker Hub soon to simplify use. diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml index cd40efed1..b2cc29e89 100644 --- a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml +++ b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml @@ -1103,7 +1103,7 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/tgi-gaudi:2.0.5" + image: "ghcr.io/huggingface/tgi-gaudi:2.0.6" imagePullPolicy: Always volumeMounts: - mountPath: /data @@ -1184,8 +1184,13 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault +<<<<<<< HEAD + image: "ghcr.io/huggingface/tgi-gaudi:2.0.6" + imagePullPolicy: IfNotPresent +======= image: "ghcr.io/huggingface/tgi-gaudi:2.0.5" imagePullPolicy: Always +>>>>>>> e3187be819ad088c24bf1b2cbb419255af0f2be3 volumeMounts: - mountPath: /data name: model-volume diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml index 1c80ca5af..72ba1ded9 100644 --- a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml +++ b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml @@ -924,7 +924,7 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/tgi-gaudi:2.0.5" + image: "ghcr.io/huggingface/tgi-gaudi:2.0.6" imagePullPolicy: Always volumeMounts: - mountPath: /data diff --git a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh index fcc3f8041..79871e10a 100644 --- a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="chatqna-guardrails chatqna-ui dataprep-redis retriever-redis guardrails-tgi nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 docker pull ghcr.io/huggingface/tei-gaudi:latest diff --git a/ChatQnA/tests/test_compose_on_gaudi.sh b/ChatQnA/tests/test_compose_on_gaudi.sh index 1d5b8bc8a..204ad3039 100644 --- a/ChatQnA/tests/test_compose_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_on_gaudi.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="chatqna chatqna-ui dataprep-redis retriever-redis nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 docker pull ghcr.io/huggingface/tei-gaudi:latest diff --git a/ChatQnA/tests/test_compose_vllm_on_xeon.sh b/ChatQnA/tests/test_compose_vllm_on_xeon.sh index b664a6af8..f53fd3aea 100644 --- a/ChatQnA/tests/test_compose_vllm_on_xeon.sh +++ b/ChatQnA/tests/test_compose_vllm_on_xeon.sh @@ -23,7 +23,7 @@ function build_docker_images() { service_list="chatqna chatqna-ui dataprep-redis retriever-redis vllm nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 docker images && sleep 1s diff --git a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh index 22c5e8c94..0a59eabf7 100644 --- a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="chatqna-without-rerank chatqna-ui dataprep-redis retriever-redis nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 docker pull ghcr.io/huggingface/tei-gaudi:latest diff --git a/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh b/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh index b0ffc22bc..89b492261 100644 --- a/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh +++ b/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="chatqna-without-rerank chatqna-ui chatqna-conversation-ui dataprep-redis retriever-redis nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 docker images && sleep 1s diff --git a/CodeGen/codegen.yaml b/CodeGen/codegen.yaml index 95f2d78e6..8dc864f6f 100644 --- a/CodeGen/codegen.yaml +++ b/CodeGen/codegen.yaml @@ -6,7 +6,7 @@ opea_micro_services: tgi-service: host: ${TGI_SERVICE_IP} ports: ${TGI_SERVICE_PORT} - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 volumes: - "./data:/data" runtime: habana diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml index 153b9f59a..92b70b099 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-gaudi-server ports: - "8028:80" diff --git a/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml b/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml index b506d17d4..dc032cd25 100644 --- a/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml +++ b/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml @@ -405,7 +405,7 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/tgi-gaudi:2.0.5" + image: "ghcr.io/huggingface/tgi-gaudi:2.0.6" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh index ec1658314..f90e0aaa4 100644 --- a/CodeGen/tests/test_compose_on_gaudi.sh +++ b/CodeGen/tests/test_compose_on_gaudi.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="codegen codegen-ui llm-tgi" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker images && sleep 1s } diff --git a/CodeTrans/codetrans.yaml b/CodeTrans/codetrans.yaml index 9d7f70b4e..c36259978 100644 --- a/CodeTrans/codetrans.yaml +++ b/CodeTrans/codetrans.yaml @@ -6,7 +6,7 @@ opea_micro_services: tgi-service: host: ${TGI_SERVICE_IP} ports: ${TGI_SERVICE_PORT} - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 volumes: - "./data:/data" runtime: habana diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml index 09b82ed3f..2f87d10c2 100644 --- a/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: codetrans-tgi-service ports: - "8008:80" diff --git a/CodeTrans/kubernetes/intel/hpu/gaudi/manifest/codetrans.yaml b/CodeTrans/kubernetes/intel/hpu/gaudi/manifest/codetrans.yaml index 076104e77..a2efecf44 100644 --- a/CodeTrans/kubernetes/intel/hpu/gaudi/manifest/codetrans.yaml +++ b/CodeTrans/kubernetes/intel/hpu/gaudi/manifest/codetrans.yaml @@ -405,7 +405,7 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/tgi-gaudi:2.0.5" + image: "ghcr.io/huggingface/tgi-gaudi:2.0.6" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/CodeTrans/tests/test_compose_on_gaudi.sh b/CodeTrans/tests/test_compose_on_gaudi.sh index 884f2dffa..c6e8b8c9b 100644 --- a/CodeTrans/tests/test_compose_on_gaudi.sh +++ b/CodeTrans/tests/test_compose_on_gaudi.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="codetrans codetrans-ui llm-tgi nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker images && sleep 1s } diff --git a/DocSum/docker_compose/intel/hpu/gaudi/README.md b/DocSum/docker_compose/intel/hpu/gaudi/README.md index cf655dd08..6882f0eba 100644 --- a/DocSum/docker_compose/intel/hpu/gaudi/README.md +++ b/DocSum/docker_compose/intel/hpu/gaudi/README.md @@ -11,7 +11,7 @@ First of all, you need to build Docker Images locally. This step can be ignored As TGI Gaudi has been officially published as a Docker image, we simply need to pull it: ```bash -docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 +docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 ``` ### 2. Build LLM Image @@ -53,7 +53,7 @@ docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT Then run the command `docker images`, you will have the following Docker Images: -1. `ghcr.io/huggingface/tgi-gaudi:2.0.5` +1. `ghcr.io/huggingface/tgi-gaudi:2.0.6` 2. `opea/llm-docsum-tgi:latest` 3. `opea/docsum:latest` 4. `opea/docsum-ui:latest` diff --git a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml index ec9a2b355..71c52b40a 100644 --- a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-gaudi-server ports: - "8008:80" diff --git a/DocSum/docsum.yaml b/DocSum/docsum.yaml index bc87bc5b4..9e9936ff4 100644 --- a/DocSum/docsum.yaml +++ b/DocSum/docsum.yaml @@ -6,7 +6,7 @@ opea_micro_services: tgi-service: host: ${TGI_SERVICE_IP} ports: ${TGI_SERVICE_PORT} - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 volumes: - "./data:/data" runtime: habana diff --git a/DocSum/kubernetes/intel/README_gmc.md b/DocSum/kubernetes/intel/README_gmc.md index 6046ca4dc..00e9d8e1b 100644 --- a/DocSum/kubernetes/intel/README_gmc.md +++ b/DocSum/kubernetes/intel/README_gmc.md @@ -9,7 +9,7 @@ The DocSum application is defined as a Custom Resource (CR) file that the above The DocSum pipeline uses prebuilt images. The Xeon version uses the prebuilt image `llm-docsum-tgi:latest` which internally leverages the the image `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the -service tgi-gaudi-svc, which uses the image `ghcr.io/huggingface/tgi-gaudi:2.0.5`. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use `Intel/neural-chat-7b-v3-3`. +service tgi-gaudi-svc, which uses the image `ghcr.io/huggingface/tgi-gaudi:2.0.6`. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use `Intel/neural-chat-7b-v3-3`. [NOTE] Refer to [Docker Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/DocSum/docker_compose/intel/cpu/xeon/README.md) or diff --git a/DocSum/kubernetes/intel/hpu/gaudi/manifest/docsum.yaml b/DocSum/kubernetes/intel/hpu/gaudi/manifest/docsum.yaml index 5c10f3c76..7ab1df9b1 100644 --- a/DocSum/kubernetes/intel/hpu/gaudi/manifest/docsum.yaml +++ b/DocSum/kubernetes/intel/hpu/gaudi/manifest/docsum.yaml @@ -405,7 +405,7 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/tgi-gaudi:2.0.5" + image: "ghcr.io/huggingface/tgi-gaudi:2.0.6" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/DocSum/tests/test_compose_on_gaudi.sh b/DocSum/tests/test_compose_on_gaudi.sh index 305b56d5d..12a6a8861 100644 --- a/DocSum/tests/test_compose_on_gaudi.sh +++ b/DocSum/tests/test_compose_on_gaudi.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="docsum docsum-ui llm-docsum-tgi" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker images && sleep 1s } diff --git a/FaqGen/benchmark/accuracy/launch_tgi.sh b/FaqGen/benchmark/accuracy/launch_tgi.sh index a504f2a41..1a1d23ee8 100644 --- a/FaqGen/benchmark/accuracy/launch_tgi.sh +++ b/FaqGen/benchmark/accuracy/launch_tgi.sh @@ -19,7 +19,7 @@ docker run -it --rm \ --ipc=host \ -e HTTPS_PROXY=$https_proxy \ -e HTTP_PROXY=$https_proxy \ - ghcr.io/huggingface/tgi-gaudi:2.0.5 \ + ghcr.io/huggingface/tgi-gaudi:2.0.6 \ --model-id $model_name \ --max-input-tokens $max_input_tokens \ --max-total-tokens $max_total_tokens \ diff --git a/FaqGen/docker_compose/intel/hpu/gaudi/README.md b/FaqGen/docker_compose/intel/hpu/gaudi/README.md index 548a94e16..b157106bf 100644 --- a/FaqGen/docker_compose/intel/hpu/gaudi/README.md +++ b/FaqGen/docker_compose/intel/hpu/gaudi/README.md @@ -11,7 +11,7 @@ First of all, you need to build Docker Images locally. This step can be ignored As TGI Gaudi has been officially published as a Docker image, we simply need to pull it: ```bash -docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 +docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 ``` ### 2. Build LLM Image @@ -53,7 +53,7 @@ docker build -t opea/faqgen-react-ui:latest --build-arg https_proxy=$https_proxy Then run the command `docker images`, you will have the following Docker Images: -1. `ghcr.io/huggingface/tgi-gaudi:2.0.5` +1. `ghcr.io/huggingface/tgi-gaudi:2.0.6` 2. `opea/llm-faqgen-tgi:latest` 3. `opea/faqgen:latest` 4. `opea/faqgen-ui:latest` diff --git a/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml b/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml index 1ee36bd30..1416019b1 100644 --- a/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-gaudi-server ports: - "8008:80" diff --git a/FaqGen/faqgen.yaml b/FaqGen/faqgen.yaml index 8d354871e..5b924a38e 100644 --- a/FaqGen/faqgen.yaml +++ b/FaqGen/faqgen.yaml @@ -6,7 +6,7 @@ opea_micro_services: tgi-service: host: ${TGI_SERVICE_IP} ports: ${TGI_SERVICE_PORT} - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 volumes: - "./data:/data" runtime: habana diff --git a/FaqGen/kubernetes/intel/hpu/gaudi/manifest/faqgen.yaml b/FaqGen/kubernetes/intel/hpu/gaudi/manifest/faqgen.yaml index 2703cbc4e..a9b8ef199 100644 --- a/FaqGen/kubernetes/intel/hpu/gaudi/manifest/faqgen.yaml +++ b/FaqGen/kubernetes/intel/hpu/gaudi/manifest/faqgen.yaml @@ -47,7 +47,7 @@ spec: value: 'true' - name: FLASH_ATTENTION_RECOMPUTE value: 'true' - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 imagePullPolicy: IfNotPresent securityContext: capabilities: diff --git a/FaqGen/tests/test_compose_on_gaudi.sh b/FaqGen/tests/test_compose_on_gaudi.sh index 161c1e2a7..6eb229ca7 100644 --- a/FaqGen/tests/test_compose_on_gaudi.sh +++ b/FaqGen/tests/test_compose_on_gaudi.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="faqgen faqgen-ui llm-faqgen-tgi" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker images && sleep 1s } diff --git a/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml b/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml index b03cba56a..2cedab39e 100644 --- a/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml @@ -40,7 +40,7 @@ services: ipc: host command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate tgi-gaudi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-gaudi-server ports: - "6005:80" diff --git a/GraphRAG/tests/test_compose.sh b/GraphRAG/tests/test_compose.sh index 0fc227830..72b77b642 100755 --- a/GraphRAG/tests/test_compose.sh +++ b/GraphRAG/tests/test_compose.sh @@ -23,7 +23,7 @@ function build_docker_images() { service_list="graphrag dataprep-neo4j-llamaindex retriever-neo4j-llamaindex chatqna-gaudi-ui-server chatqna-gaudi-nginx-server" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 docker pull neo4j:latest docker images && sleep 1s diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md b/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md index 6d6ca88ff..9e7db70b7 100644 --- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md @@ -80,7 +80,7 @@ docker build --no-cache -t opea/retriever-multimodal-redis:latest --build-arg ht Build TGI Gaudi image ```bash -docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 +docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 ``` Build lvm-tgi microservice image @@ -118,7 +118,7 @@ Then run the command `docker images`, you will have the following 8 Docker Image 1. `opea/dataprep-multimodal-redis:latest` 2. `opea/lvm-tgi:latest` -3. `ghcr.io/huggingface/tgi-gaudi:2.0.5` +3. `ghcr.io/huggingface/tgi-gaudi:2.0.6` 4. `opea/retriever-multimodal-redis:latest` 5. `opea/embedding-multimodal:latest` 6. `opea/embedding-multimodal-bridgetower:latest` diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml index e66aea1f0..ddaf2b09d 100644 --- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -69,7 +69,7 @@ services: INDEX_NAME: ${INDEX_NAME} restart: unless-stopped tgi-gaudi: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-llava-gaudi-server ports: - "8399:80" @@ -84,6 +84,10 @@ services: PREFILL_BATCH_BUCKET_SIZE: 1 BATCH_BUCKET_SIZE: 1 MAX_BATCH_TOTAL_TOKENS: 4096 + ENABLE_HPU_GRAPH: true + LIMIT_HPU_GRAPH: true + USE_FLASH_ATTENTION: true + FLASH_ATTENTION_RECOMPUTE: true runtime: habana cap_add: - SYS_NICE diff --git a/MultimodalQnA/tests/test_compose_on_gaudi.sh b/MultimodalQnA/tests/test_compose_on_gaudi.sh index 3b629f52b..5ac1228db 100644 --- a/MultimodalQnA/tests/test_compose_on_gaudi.sh +++ b/MultimodalQnA/tests/test_compose_on_gaudi.sh @@ -25,7 +25,7 @@ function build_docker_images() { service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding-multimodal retriever-multimodal-redis lvm-tgi dataprep-multimodal-redis" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker images && sleep 1s } diff --git a/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 5ade94cc1..a2f4cda83 100644 --- a/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -80,7 +80,7 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-gaudi-server ports: - "3006:80" diff --git a/SearchQnA/tests/test_compose_on_gaudi.sh b/SearchQnA/tests/test_compose_on_gaudi.sh index cefadaa88..fee28643e 100644 --- a/SearchQnA/tests/test_compose_on_gaudi.sh +++ b/SearchQnA/tests/test_compose_on_gaudi.sh @@ -23,7 +23,7 @@ function build_docker_images() { docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker pull ghcr.io/huggingface/tei-gaudi:latest docker images && sleep 1s } diff --git a/Translation/docker_compose/intel/hpu/gaudi/compose.yaml b/Translation/docker_compose/intel/hpu/gaudi/compose.yaml index c470c441a..eabae1321 100644 --- a/Translation/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/Translation/docker_compose/intel/hpu/gaudi/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-gaudi-server ports: - "8008:80" diff --git a/Translation/kubernetes/intel/hpu/gaudi/manifest/translation.yaml b/Translation/kubernetes/intel/hpu/gaudi/manifest/translation.yaml index a1da33b79..25e39a700 100644 --- a/Translation/kubernetes/intel/hpu/gaudi/manifest/translation.yaml +++ b/Translation/kubernetes/intel/hpu/gaudi/manifest/translation.yaml @@ -362,7 +362,7 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/tgi-gaudi:2.0.5" + image: "ghcr.io/huggingface/tgi-gaudi:2.0.6" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/Translation/tests/test_compose_on_gaudi.sh b/Translation/tests/test_compose_on_gaudi.sh index 9515c95af..fad64f5ba 100644 --- a/Translation/tests/test_compose_on_gaudi.sh +++ b/Translation/tests/test_compose_on_gaudi.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="translation translation-ui llm-tgi nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker images && sleep 1s } diff --git a/Translation/translation.yaml b/Translation/translation.yaml index 882eca8e2..f3a07da96 100644 --- a/Translation/translation.yaml +++ b/Translation/translation.yaml @@ -6,7 +6,7 @@ opea_micro_services: tgi-service: host: ${TGI_SERVICE_IP} ports: ${TGI_SERVICE_PORT} - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 volumes: - "./data:/data" runtime: habana diff --git a/VisualQnA/docker_compose/intel/hpu/gaudi/README.md b/VisualQnA/docker_compose/intel/hpu/gaudi/README.md index 84783353a..abb341f28 100644 --- a/VisualQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/VisualQnA/docker_compose/intel/hpu/gaudi/README.md @@ -18,7 +18,7 @@ docker build --no-cache -t opea/nginx:latest --build-arg https_proxy=$https_prox ### 2. Pull TGI Gaudi Image ```bash -docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 +docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 ``` ### 3. Build MegaService Docker Image @@ -43,7 +43,7 @@ docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$htt Then run the command `docker images`, you will have the following 5 Docker Images: -1. `ghcr.io/huggingface/tgi-gaudi:2.0.5` +1. `ghcr.io/huggingface/tgi-gaudi:2.0.6` 2. `opea/lvm-tgi:latest` 3. `opea/visualqna:latest` 4. `opea/visualqna-ui:latest` diff --git a/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 45732e832..bd587aa6f 100644 --- a/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -3,7 +3,7 @@ services: llava-tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-llava-gaudi-server ports: - "8399:80" diff --git a/VisualQnA/tests/test_compose_on_gaudi.sh b/VisualQnA/tests/test_compose_on_gaudi.sh index a489a2c7a..15f9fe7f2 100644 --- a/VisualQnA/tests/test_compose_on_gaudi.sh +++ b/VisualQnA/tests/test_compose_on_gaudi.sh @@ -21,7 +21,7 @@ function build_docker_images() { echo "Build all the images with --no-cache, check docker_image_build.log for details..." docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker images && sleep 1s }