From 954a22051b51a92751705c7a028b0ea31c8deebc Mon Sep 17 00:00:00 2001 From: Steve Zhang Date: Tue, 24 Sep 2024 11:19:37 +0800 Subject: [PATCH] Make all xeon tgi image version consistent (#851) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml | 2 +- ChatQnA/docker_compose/intel/cpu/xeon/README.md | 4 ++-- .../intel/cpu/xeon/manifest/chatqna-guardrails.yaml | 4 ++-- DocSum/kubernetes/intel/README_gmc.md | 8 ++++---- .../kubernetes/intel/cpu/xeon/manifest/chatqna.yaml | 2 +- .../kubernetes/intel/cpu/xeon/manifest/codegen.yaml | 2 +- .../kubernetes/intel/cpu/xeon/manifest/docsum.yaml | 2 +- .../kubernetes/intel/cpu/xeon/manifest/faqgen.yaml | 2 +- .../kubernetes/intel/cpu/xeon/manifest/visualqna.yaml | 2 +- VisualQnA/tests/test_compose_on_xeon.sh | 2 +- 10 files changed, 15 insertions(+), 15 deletions(-) diff --git a/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml b/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml index 5502d49fe..bd7677483 100644 --- a/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml +++ b/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml @@ -247,7 +247,7 @@ spec: - envFrom: - configMapRef: name: audio-qna-config - image: ghcr.io/huggingface/text-generation-inference:2.2.0 + image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" name: llm-dependency-deploy-demo securityContext: capabilities: diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md index 689716f11..ff636ea2c 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md @@ -233,7 +233,7 @@ For users in China who are unable to download models directly from Huggingface, export HF_TOKEN=${your_hf_token} export HF_ENDPOINT="https://hf-mirror.com" model_name="Intel/neural-chat-7b-v3-3" - docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.2.0 --model-id $model_name + docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id $model_name ``` 2. Offline @@ -247,7 +247,7 @@ For users in China who are unable to download models directly from Huggingface, ```bash export HF_TOKEN=${your_hf_token} export model_path="/path/to/model" - docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.2.0 --model-id /data + docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id /data ``` ### Setup Environment Variables diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml index b8e95fbce..7e137bbfb 100644 --- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml +++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml @@ -1474,7 +1474,7 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/text-generation-inference:2.2.0" + image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data @@ -1554,7 +1554,7 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/text-generation-inference:2.2.0" + image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/DocSum/kubernetes/intel/README_gmc.md b/DocSum/kubernetes/intel/README_gmc.md index b050d7249..bc55df156 100644 --- a/DocSum/kubernetes/intel/README_gmc.md +++ b/DocSum/kubernetes/intel/README_gmc.md @@ -7,9 +7,9 @@ Install GMC in your Kubernetes cluster, if you have not already done so, by foll The DocSum application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not it starts them and then proceeds to connect them. When the DocSum RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular embedding, retriever, rerank, and llm. -The DocSum pipeline uses prebuilt images. The Xeon version uses the prebuilt image llm-docsum-tgi:latest which internally leverages the -the image ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu. The service is called tgi-svc. Meanwhile, the Gaudi version launches the -service tgi-gaudi-svc, which uses the image `ghcr.io/huggingface/tgi-gaudi:2.0.5`. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use Intel/neural-chat-7b-v3-3. +The DocSum pipeline uses prebuilt images. The Xeon version uses the prebuilt image `llm-docsum-tgi:latest` which internally leverages the +the image `ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the +service tgi-gaudi-svc, which uses the image `ghcr.io/huggingface/tgi-gaudi:2.0.5`. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use `Intel/neural-chat-7b-v3-3`. [NOTE] Refer to [Docker Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/DocSum/docker_compose/intel/cpu/xeon/README.md) or @@ -17,7 +17,7 @@ Refer to [Docker Xeon README](https://github.com/opea-project/GenAIExamples/blob These will be available on Docker Hub soon, simplifying installation. ## Deploy the RAG pipeline -This involves deploying the application pipeline custom resource. You can use docsum_xeon.yaml if you have just a Xeon cluster or docsum_gaudi.yaml if you have a Gaudi cluster. +This involves deploying the application pipeline custom resource. You can use `docsum_xeon.yaml` if you have just a Xeon cluster or `docsum_gaudi.yaml` if you have a Gaudi cluster. 1. Setup Environment variables. These are specific to the user. Skip the proxy settings if you are not operating behind one. diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml index be2dfb5cb..43de640ad 100644 --- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml +++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml @@ -993,7 +993,7 @@ spec: name: chatqna-tgi-config securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:2.1.0" + image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml index b9f38e9f7..6c52c5d92 100644 --- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml +++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml @@ -229,7 +229,7 @@ spec: name: codegen-tgi-config securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:1.4" + image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml index dba69096a..0fda41f5e 100644 --- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml +++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml @@ -229,7 +229,7 @@ spec: name: docsum-tgi-config securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:2.1.0" + image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml index b6f089f4d..749d98408 100644 --- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml +++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml @@ -138,7 +138,7 @@ spec: - configMapRef: name: faqgen-tgi-config securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:2.1.0" + image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml b/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml index c9a3ef32e..4d3ee3bf2 100644 --- a/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml +++ b/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml @@ -216,7 +216,7 @@ spec: name: visualqna-tgi-config securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:2.2.0" + image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/VisualQnA/tests/test_compose_on_xeon.sh b/VisualQnA/tests/test_compose_on_xeon.sh index 595561336..882989638 100644 --- a/VisualQnA/tests/test_compose_on_xeon.sh +++ b/VisualQnA/tests/test_compose_on_xeon.sh @@ -21,7 +21,7 @@ function build_docker_images() { echo "Build all the images with --no-cache, check docker_image_build.log for details..." docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:2.2.0 + docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu docker images && sleep 1s }