Compare commits

...

1 Commits

Author SHA1 Message Date
NeuralChatBot
6a705ad3d4 Update third party images tag
Signed-off-by: NeuralChatBot <grp_neural_chat_bot@intel.com>
2024-08-29 02:45:32 +00:00
21 changed files with 22 additions and 22 deletions

View File

@@ -41,7 +41,7 @@ services:
environment:
TTS_ENDPOINT: ${TTS_ENDPOINT}
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
container_name: tgi-service
ports:
- "3006:80"

View File

@@ -103,7 +103,7 @@ services:
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
container_name: tgi-service
ports:
- "9009:80"

View File

@@ -102,7 +102,7 @@ services:
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
container_name: tgi-service
ports:
- "6042:80"

View File

@@ -70,7 +70,7 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
container_name: tgi-service
ports:
- "9009:80"

View File

@@ -20,7 +20,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
- retriever: opea/retriever-redis:latest
- tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
- reranking: opea/reranking-tei:latest
- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
- llm: opea/llm-tgi:latest
- chaqna-xeon-backend-server: opea/chatqna:latest

View File

@@ -1122,7 +1122,7 @@ spec:
name: chatqna-tgi-config
securityContext:
{}
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data

View File

@@ -3,7 +3,7 @@
services:
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
container_name: tgi-service
ports:
- "8028:80"

View File

@@ -239,7 +239,7 @@ spec:
name: codegen-tgi-config
securityContext:
{}
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data

View File

@@ -126,7 +126,7 @@ spec:
- name: no_proxy
value:
securityContext: {}
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data

View File

@@ -3,7 +3,7 @@
services:
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
container_name: codetrans-tgi-service
ports:
- "8008:80"

View File

@@ -239,7 +239,7 @@ spec:
name: codetrans-tgi-config
securityContext:
{}
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data

View File

@@ -3,7 +3,7 @@
services:
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
container_name: tgi-service
ports:
- "8008:80"

View File

@@ -8,7 +8,7 @@ Install GMC in your Kubernetes cluster, if you have not already done so, by foll
The DocSum application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not it starts them and then proceeds to connect them. When the DocSum RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular embedding, retriever, rerank, and llm.
The DocSum pipeline uses prebuilt images. The Xeon version uses the prebuilt image llm-docsum-tgi:latest which internally leverages the
the image ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
the image ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
service tgi-gaudi-svc, which uses the image ghcr.io/huggingface/tgi-gaudi:1.2.1. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use Intel/neural-chat-7b-v3-3.
[NOTE]

View File

@@ -239,7 +239,7 @@ spec:
name: docsum-tgi-config
securityContext:
{}
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data

View File

@@ -126,7 +126,7 @@ spec:
- name: no_proxy
value:
securityContext: {}
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data

View File

@@ -3,7 +3,7 @@
services:
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
container_name: tgi-xeon-server
ports:
- "8008:80"

View File

@@ -126,7 +126,7 @@ spec:
- name: no_proxy
value:
securityContext: {}
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data

View File

@@ -73,7 +73,7 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
container_name: tgi-service
ports:
- "3006:80"

View File

@@ -3,7 +3,7 @@
services:
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
container_name: tgi-service
ports:
- "8008:80"

View File

@@ -71,12 +71,12 @@ cd ../../../..
### 4. Pull TGI Xeon Image
```bash
docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
docker pull ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
```
Then run the command `docker images`, you will have the following 4 Docker Images:
1. `ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu`
1. `ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu`
2. `opea/lvm-tgi:latest`
3. `opea/visualqna:latest`
4. `opea/visualqna-ui:latest`

View File

@@ -3,7 +3,7 @@
services:
llava-tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
container_name: tgi-llava-xeon-server
ports:
- "8399:80"