diff --git a/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/eight_gaudi/oob_eight_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/eight_gaudi/oob_eight_gaudi_with_rerank.yaml index 6198e2fac..0f7d6176b 100644 --- a/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/eight_gaudi/oob_eight_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/eight_gaudi/oob_eight_gaudi_with_rerank.yaml @@ -327,7 +327,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tei-gaudi:latest + image: ghcr.io/huggingface/tei-gaudi:1.5.0 imagePullPolicy: IfNotPresent name: reranking-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml index 9c04cfc06..4a5b7b601 100644 --- a/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml @@ -327,7 +327,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tei-gaudi:latest + image: ghcr.io/huggingface/tei-gaudi:1.5.0 imagePullPolicy: IfNotPresent name: reranking-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml index 30df76b97..9a8ce4a4b 100644 --- a/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml @@ -327,7 +327,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tei-gaudi:latest + image: ghcr.io/huggingface/tei-gaudi:1.5.0 imagePullPolicy: IfNotPresent name: reranking-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml index 54cefbaf9..c80fc03e3 100644 --- a/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml @@ -327,7 +327,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tei-gaudi:latest + image: ghcr.io/huggingface/tei-gaudi:1.5.0 imagePullPolicy: IfNotPresent name: reranking-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/eight_gaudi/eight_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/eight_gaudi/eight_gaudi_with_rerank.yaml index 80122d731..a9d63cb81 100644 --- a/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/eight_gaudi/eight_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/eight_gaudi/eight_gaudi_with_rerank.yaml @@ -345,7 +345,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tei-gaudi:latest + image: ghcr.io/huggingface/tei-gaudi:1.5.0 imagePullPolicy: IfNotPresent name: reranking-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml index 970d2652a..7ec356d93 100644 --- a/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml @@ -345,7 +345,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tei-gaudi:latest + image: ghcr.io/huggingface/tei-gaudi:1.5.0 imagePullPolicy: IfNotPresent name: reranking-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml index a38294b3b..f64be532a 100644 --- a/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml @@ -345,7 +345,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tei-gaudi:latest + image: ghcr.io/huggingface/tei-gaudi:1.5.0 imagePullPolicy: IfNotPresent name: reranking-dependency-deploy ports: diff --git a/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml index e6f94894c..ecf8de7b5 100644 --- a/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml @@ -345,7 +345,7 @@ spec: envFrom: - configMapRef: name: qna-config - image: ghcr.io/huggingface/tei-gaudi:latest + image: ghcr.io/huggingface/tei-gaudi:1.5.0 imagePullPolicy: IfNotPresent name: reranking-dependency-deploy ports: diff --git a/ChatQnA/chatqna.yaml b/ChatQnA/chatqna.yaml index 89aac8aa8..0344b2831 100644 --- a/ChatQnA/chatqna.yaml +++ b/ChatQnA/chatqna.yaml @@ -19,7 +19,7 @@ opea_micro_services: tei-embedding-service: host: ${TEI_EMBEDDING_SERVICE_IP} ports: ${TEI_EMBEDDING_SERVICE_PORT} - image: ghcr.io/huggingface/tei-gaudi:latest + image: ghcr.io/huggingface/tei-gaudi:1.5.0 volumes: - "./data:/data" runtime: habana diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 0658c0c2f..170ab5435 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -57,7 +57,7 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/tei-gaudi:latest + image: ghcr.io/huggingface/tei-gaudi:1.5.0 container_name: tei-reranking-gaudi-server ports: - "8808:80" diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml index 79cf3f371..7bebade29 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml @@ -96,7 +96,7 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/tei-gaudi:latest + image: ghcr.io/huggingface/tei-gaudi:1.5.0 container_name: tei-reranking-gaudi-server ports: - "8808:80" diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml index 0f42a3fc6..bfbbb9570 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml @@ -57,7 +57,7 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/tei-gaudi:latest + image: ghcr.io/huggingface/tei-gaudi:1.5.0 container_name: tei-reranking-gaudi-server ports: - "8808:80" diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md index 9214960c2..d9684e9db 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md @@ -48,8 +48,8 @@ f810f3b4d329 opea/embedding-tei:latest "python e 2fa17d84605f opea/dataprep-redis:latest "python prepare_doc_…" 2 minutes ago Up 2 minutes 0.0.0.0:6007->6007/tcp, :::6007->6007/tcp dataprep-redis-server 69e1fb59e92c opea/retriever-redis:latest "/home/user/comps/re…" 2 minutes ago Up 2 minutes 0.0.0.0:7000->7000/tcp, :::7000->7000/tcp retriever-redis-server 313b9d14928a opea/reranking-tei:latest "python reranking_te…" 2 minutes ago Up 2 minutes 0.0.0.0:8000->8000/tcp, :::8000->8000/tcp reranking-tei-gaudi-server +174bd43fa6b5 ghcr.io/huggingface/tei-gaudi:1.5.0 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, :::8090->80/tcp tei-embedding-gaudi-server 05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.6 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server -174bd43fa6b5 ghcr.io/huggingface/tei-gaudi:latest "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, :::8090->80/tcp tei-embedding-gaudi-server 74084469aa33 redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 2 minutes ago Up 2 minutes 0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp redis-vector-db 88399dbc9e43 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8808->80/tcp, :::8808->80/tcp tei-reranking-gaudi-server ``` diff --git a/ChatQnA/kubernetes/intel/README_gmc.md b/ChatQnA/kubernetes/intel/README_gmc.md index a2ffed26b..2c849c507 100644 --- a/ChatQnA/kubernetes/intel/README_gmc.md +++ b/ChatQnA/kubernetes/intel/README_gmc.md @@ -24,8 +24,9 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services. For Gaudi: -- tei-embedding-service: ghcr.io/huggingface/tei-gaudi:latest -- tgi-service: gghcr.io/huggingface/tgi-gaudi:2.0.6 +tei-embedding-service: ghcr.io/huggingface/tei-gaudi:1.5.0 +tgi-service: gghcr.io/huggingface/tgi-gaudi:2.0.6 + > [NOTE] > Please refer to [Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker_compose/intel/cpu/xeon/README.md) or [Gaudi README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker_compose/intel/hpu/gaudi/README.md) to build the OPEA images. These too will be available on Docker Hub soon to simplify use. diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml index b2cc29e89..a802889f8 100644 --- a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml +++ b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml @@ -920,8 +920,8 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/tei-gaudi:latest" - imagePullPolicy: Always + image: "ghcr.io/huggingface/tei-gaudi:1.5.0" + imagePullPolicy: IfNotPresent args: - "--auto-truncate" volumeMounts: diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml index 2ca62a504..949e7cd8e 100644 --- a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml +++ b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml @@ -1106,8 +1106,8 @@ spec: privileged: true capabilities: add: ["SYS_NICE"] - image: "ghcr.io/huggingface/tei-gaudi:latest" - imagePullPolicy: Always + image: "ghcr.io/huggingface/tei-gaudi:1.5.0" + imagePullPolicy: IfNotPresent args: - "--auto-truncate" volumeMounts: diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml index 72ba1ded9..7c31d09d6 100644 --- a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml +++ b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml @@ -741,8 +741,8 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/tei-gaudi:latest" - imagePullPolicy: Always + image: "ghcr.io/huggingface/tei-gaudi:1.5.0" + imagePullPolicy: IfNotPresent args: - "--auto-truncate" volumeMounts: diff --git a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh index 79871e10a..c186d6434 100644 --- a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh @@ -24,7 +24,7 @@ function build_docker_images() { docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - docker pull ghcr.io/huggingface/tei-gaudi:latest + docker pull ghcr.io/huggingface/tei-gaudi:1.5.0 docker images && sleep 1s } diff --git a/ChatQnA/tests/test_compose_on_gaudi.sh b/ChatQnA/tests/test_compose_on_gaudi.sh index 204ad3039..23c302e8c 100644 --- a/ChatQnA/tests/test_compose_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_on_gaudi.sh @@ -24,7 +24,7 @@ function build_docker_images() { docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - docker pull ghcr.io/huggingface/tei-gaudi:latest + docker pull ghcr.io/huggingface/tei-gaudi:1.5.0 docker images && sleep 1s } diff --git a/ChatQnA/tests/test_compose_vllm_on_gaudi.sh b/ChatQnA/tests/test_compose_vllm_on_gaudi.sh index 89ab76888..26bef067d 100644 --- a/ChatQnA/tests/test_compose_vllm_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_vllm_on_gaudi.sh @@ -24,7 +24,7 @@ function build_docker_images() { docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - docker pull ghcr.io/huggingface/tei-gaudi:latest + docker pull ghcr.io/huggingface/tei-gaudi:1.5.0 docker images && sleep 1s } diff --git a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh index 0a59eabf7..f06a189ef 100644 --- a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh @@ -24,7 +24,7 @@ function build_docker_images() { docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - docker pull ghcr.io/huggingface/tei-gaudi:latest + docker pull ghcr.io/huggingface/tei-gaudi:1.5.0 docker images && sleep 1s } diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml index 1d0a44505..fc8accadc 100644 --- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml @@ -28,7 +28,7 @@ services: TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} tei-embedding-service: - image: ghcr.io/huggingface/tei-gaudi:latest + image: ghcr.io/huggingface/tei-gaudi:1.5.0 container_name: tei-embedding-gaudi-server ports: - "8090:80" diff --git a/DocIndexRetriever/tests/test_compose_on_gaudi.sh b/DocIndexRetriever/tests/test_compose_on_gaudi.sh index 8779944be..e652ead26 100644 --- a/DocIndexRetriever/tests/test_compose_on_gaudi.sh +++ b/DocIndexRetriever/tests/test_compose_on_gaudi.sh @@ -24,7 +24,7 @@ function build_docker_images() { docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log docker pull redis/redis-stack:7.2.0-v9 - docker pull ghcr.io/huggingface/tei-gaudi:latest + docker pull ghcr.io/huggingface/tei-gaudi:1.5.0 docker images && sleep 1s } diff --git a/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml index a2f4cda83..9e679179c 100644 --- a/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -3,7 +3,7 @@ services: tei-embedding-service: - image: ghcr.io/huggingface/tei-gaudi:latest + image: ghcr.io/huggingface/tei-gaudi:1.5.0 container_name: tei-embedding-gaudi-server ports: - "3001:80" diff --git a/SearchQnA/tests/test_compose_on_gaudi.sh b/SearchQnA/tests/test_compose_on_gaudi.sh index fee28643e..94014051e 100644 --- a/SearchQnA/tests/test_compose_on_gaudi.sh +++ b/SearchQnA/tests/test_compose_on_gaudi.sh @@ -23,8 +23,8 @@ function build_docker_images() { docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + docker pull ghcr.io/huggingface/tei-gaudi:1.5.0 docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 - docker pull ghcr.io/huggingface/tei-gaudi:latest docker images && sleep 1s }