[Translation] Support manifests and nginx (#812)
Signed-off-by: letonghan <letong.han@intel.com> Signed-off-by: root <root@a4bf019305c5.jf.intel.com> Co-authored-by: root <root@a4bf019305c5.jf.intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
4
.github/CODEOWNERS
vendored
Normal file → Executable file
4
.github/CODEOWNERS
vendored
Normal file → Executable file
@@ -3,10 +3,10 @@
|
|||||||
/ChatQnA/ liang1.lv@intel.com
|
/ChatQnA/ liang1.lv@intel.com
|
||||||
/CodeGen/ liang1.lv@intel.com
|
/CodeGen/ liang1.lv@intel.com
|
||||||
/CodeTrans/ sihan.chen@intel.com
|
/CodeTrans/ sihan.chen@intel.com
|
||||||
/DocSum/ sihan.chen@intel.com
|
/DocSum/ letong.han@intel.com
|
||||||
/DocIndexRetriever/ xuhui.ren@intel.com chendi.xue@intel.com
|
/DocIndexRetriever/ xuhui.ren@intel.com chendi.xue@intel.com
|
||||||
/FaqGen/ xinyao.wang@intel.com
|
/FaqGen/ xinyao.wang@intel.com
|
||||||
/SearchQnA/ letong.han@intel.com
|
/SearchQnA/ sihan.chen@intel.com
|
||||||
/Translation/ liang1.lv@intel.com
|
/Translation/ liang1.lv@intel.com
|
||||||
/VisualQnA/ liang1.lv@intel.com
|
/VisualQnA/ liang1.lv@intel.com
|
||||||
/ProductivitySuite/ hoong.tee.yeoh@intel.com
|
/ProductivitySuite/ hoong.tee.yeoh@intel.com
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ Deployment are based on released docker images by default, check [docker image l
|
|||||||
| DocSum | [Xeon Instructions](DocSum/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](DocSum/docker_compose/intel/hpu/gaudi/README.md) | [DocSum with Manifests](DocSum/kubernetes/intel/README.md) | [DocSum with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/docsum/README.md) | [DocSum with GMC](DocSum/kubernetes/intel/README_gmc.md) |
|
| DocSum | [Xeon Instructions](DocSum/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](DocSum/docker_compose/intel/hpu/gaudi/README.md) | [DocSum with Manifests](DocSum/kubernetes/intel/README.md) | [DocSum with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/docsum/README.md) | [DocSum with GMC](DocSum/kubernetes/intel/README_gmc.md) |
|
||||||
| SearchQnA | [Xeon Instructions](SearchQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](SearchQnA/docker_compose/intel/hpu/gaudi/README.md) | Not Supported | Not Supported | [SearchQnA with GMC](SearchQnA/kubernetes/intel/README_gmc.md) |
|
| SearchQnA | [Xeon Instructions](SearchQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](SearchQnA/docker_compose/intel/hpu/gaudi/README.md) | Not Supported | Not Supported | [SearchQnA with GMC](SearchQnA/kubernetes/intel/README_gmc.md) |
|
||||||
| FaqGen | [Xeon Instructions](FaqGen/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](FaqGen/docker_compose/intel/hpu/gaudi/README.md) | [FaqGen with Manifests](FaqGen/kubernetes/intel/README.md) | Not Supported | [FaqGen with GMC](FaqGen/kubernetes/intel/README_gmc.md) |
|
| FaqGen | [Xeon Instructions](FaqGen/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](FaqGen/docker_compose/intel/hpu/gaudi/README.md) | [FaqGen with Manifests](FaqGen/kubernetes/intel/README.md) | Not Supported | [FaqGen with GMC](FaqGen/kubernetes/intel/README_gmc.md) |
|
||||||
| Translation | [Xeon Instructions](Translation/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](Translation/docker_compose/intel/hpu/gaudi/README.md) | Not Supported | Not Supported | [Translation with GMC](Translation/kubernetes/intel/README_gmc.md) |
|
| Translation | [Xeon Instructions](Translation/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](Translation/docker_compose/intel/hpu/gaudi/README.md) | [Translation with Manifests](Translation/kubernetes/intel/README.md) | Not Supported | [Translation with GMC](Translation/kubernetes/intel/README_gmc.md) |
|
||||||
| AudioQnA | [Xeon Instructions](AudioQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](AudioQnA/docker_compose/intel/hpu/gaudi/README.md) | [AudioQnA with Manifests](AudioQnA/kubernetes/intel/README.md) | Not Supported | [AudioQnA with GMC](AudioQnA/kubernetes/intel/README_gmc.md) |
|
| AudioQnA | [Xeon Instructions](AudioQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](AudioQnA/docker_compose/intel/hpu/gaudi/README.md) | [AudioQnA with Manifests](AudioQnA/kubernetes/intel/README.md) | Not Supported | [AudioQnA with GMC](AudioQnA/kubernetes/intel/README_gmc.md) |
|
||||||
| VisualQnA | [Xeon Instructions](VisualQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](VisualQnA/docker_compose/intel/hpu/gaudi/README.md) | [VisualQnA with Manifests](VisualQnA/kubernetes/intel/README.md) | Not Supported | [VisualQnA with GMC](VisualQnA/kubernetes/intel/README_gmc.md) |
|
| VisualQnA | [Xeon Instructions](VisualQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](VisualQnA/docker_compose/intel/hpu/gaudi/README.md) | [VisualQnA with Manifests](VisualQnA/kubernetes/intel/README.md) | Not Supported | [VisualQnA with GMC](VisualQnA/kubernetes/intel/README_gmc.md) |
|
||||||
| ProductivitySuite | [Xeon Instructions](ProductivitySuite/docker_compose/intel/cpu/xeon/README.md) | Not Supported | [ProductivitySuite with Manifests](ProductivitySuite/kubernetes/intel/README.md) | Not Supported | Not Supported |
|
| ProductivitySuite | [Xeon Instructions](ProductivitySuite/docker_compose/intel/cpu/xeon/README.md) | Not Supported | [ProductivitySuite with Manifests](ProductivitySuite/kubernetes/intel/README.md) | Not Supported | Not Supported |
|
||||||
|
|||||||
@@ -41,30 +41,59 @@ cd GenAIExamples/Translation/ui
|
|||||||
docker build -t opea/translation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile .
|
docker build -t opea/translation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile .
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### 4. Build Nginx Docker Image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd GenAIComps
|
||||||
|
docker build -t opea/translation-nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
|
||||||
|
```
|
||||||
|
|
||||||
Then run the command `docker images`, you will have the following Docker Images:
|
Then run the command `docker images`, you will have the following Docker Images:
|
||||||
|
|
||||||
1. `opea/llm-tgi:latest`
|
1. `opea/llm-tgi:latest`
|
||||||
2. `opea/translation:latest`
|
2. `opea/translation:latest`
|
||||||
3. `opea/translation-ui:latest`
|
3. `opea/translation-ui:latest`
|
||||||
|
4. `opea/translation-nginx:latest`
|
||||||
|
|
||||||
## 🚀 Start Microservices
|
## 🚀 Start Microservices
|
||||||
|
|
||||||
|
### Required Models
|
||||||
|
|
||||||
|
By default, the LLM model is set to a default value as listed below:
|
||||||
|
|
||||||
|
| Service | Model |
|
||||||
|
| ------- | ----------------- |
|
||||||
|
| LLM | haoranxu/ALMA-13B |
|
||||||
|
|
||||||
|
Change the `LLM_MODEL_ID` below for your needs.
|
||||||
|
|
||||||
### Setup Environment Variables
|
### Setup Environment Variables
|
||||||
|
|
||||||
Since the `compose.yaml` will consume some environment variables, you need to set up them in advance as below.
|
1. Set the required environment variables:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export http_proxy=${your_http_proxy}
|
# Example: host_ip="192.168.1.1"
|
||||||
export https_proxy=${your_http_proxy}
|
export host_ip="External_Public_IP"
|
||||||
export LLM_MODEL_ID="haoranxu/ALMA-13B"
|
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
export no_proxy="Your_No_Proxy"
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
# Example: NGINX_PORT=80
|
||||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
export NGINX_PORT=${your_nginx_port}
|
||||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/translation"
|
```
|
||||||
```
|
|
||||||
|
|
||||||
Note: Please replace with `host_ip` with you external IP address, do not use localhost.
|
2. If you are in a proxy environment, also set the proxy-related environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export http_proxy="Your_HTTP_Proxy"
|
||||||
|
export https_proxy="Your_HTTPs_Proxy"
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Set up other environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ../../../
|
||||||
|
source set_env.sh
|
||||||
|
```
|
||||||
|
|
||||||
### Start Microservice Docker Containers
|
### Start Microservice Docker Containers
|
||||||
|
|
||||||
@@ -99,6 +128,14 @@ docker compose up -d
|
|||||||
"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
|
"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
4. Nginx Service
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://${host_ip}:${NGINX_PORT}/v1/translation \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
|
||||||
|
```
|
||||||
|
|
||||||
Following the validation of all aforementioned microservices, we are now prepared to construct a mega-service.
|
Following the validation of all aforementioned microservices, we are now prepared to construct a mega-service.
|
||||||
|
|
||||||
## 🚀 Launch the UI
|
## 🚀 Launch the UI
|
||||||
|
|||||||
@@ -8,10 +8,12 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- "8008:80"
|
- "8008:80"
|
||||||
environment:
|
environment:
|
||||||
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||||
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
volumes:
|
volumes:
|
||||||
- "./data:/data"
|
- "./data:/data"
|
||||||
shm_size: 1g
|
shm_size: 1g
|
||||||
@@ -25,10 +27,13 @@ services:
|
|||||||
- "9000:9000"
|
- "9000:9000"
|
||||||
ipc: host
|
ipc: host
|
||||||
environment:
|
environment:
|
||||||
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||||
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
translation-xeon-backend-server:
|
translation-xeon-backend-server:
|
||||||
image: ${REGISTRY:-opea}/translation:${TAG:-latest}
|
image: ${REGISTRY:-opea}/translation:${TAG:-latest}
|
||||||
@@ -39,6 +44,7 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- "8888:8888"
|
- "8888:8888"
|
||||||
environment:
|
environment:
|
||||||
|
- no_proxy=${no_proxy}
|
||||||
- https_proxy=${https_proxy}
|
- https_proxy=${https_proxy}
|
||||||
- http_proxy=${http_proxy}
|
- http_proxy=${http_proxy}
|
||||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||||
@@ -53,11 +59,31 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- "5173:5173"
|
- "5173:5173"
|
||||||
environment:
|
environment:
|
||||||
|
- no_proxy=${no_proxy}
|
||||||
- https_proxy=${https_proxy}
|
- https_proxy=${https_proxy}
|
||||||
- http_proxy=${http_proxy}
|
- http_proxy=${http_proxy}
|
||||||
- BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
- BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||||
ipc: host
|
ipc: host
|
||||||
restart: always
|
restart: always
|
||||||
|
translation-xeon-nginx-server:
|
||||||
|
image: ${REGISTRY:-opea}/translation-nginx:${TAG:-latest}
|
||||||
|
container_name: translation-xeon-nginx-server
|
||||||
|
depends_on:
|
||||||
|
- translation-xeon-backend-server
|
||||||
|
- translation-xeon-ui-server
|
||||||
|
ports:
|
||||||
|
- "${NGINX_PORT:-80}:80"
|
||||||
|
environment:
|
||||||
|
- no_proxy=${no_proxy}
|
||||||
|
- https_proxy=${https_proxy}
|
||||||
|
- http_proxy=${http_proxy}
|
||||||
|
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
|
||||||
|
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
|
||||||
|
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
|
||||||
|
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
|
||||||
|
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
|
||||||
|
ipc: host
|
||||||
|
restart: always
|
||||||
networks:
|
networks:
|
||||||
default:
|
default:
|
||||||
driver: bridge
|
driver: bridge
|
||||||
|
|||||||
@@ -29,34 +29,63 @@ docker build -t opea/translation:latest --build-arg https_proxy=$https_proxy --b
|
|||||||
Construct the frontend Docker image using the command below:
|
Construct the frontend Docker image using the command below:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd GenAIExamples/Translation
|
cd GenAIExamples/Translation/ui/
|
||||||
docker build -t opea/translation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
|
docker build -t opea/translation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### 4. Build Nginx Docker Image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd GenAIComps
|
||||||
|
docker build -t opea/translation-nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
|
||||||
|
```
|
||||||
|
|
||||||
Then run the command `docker images`, you will have the following four Docker Images:
|
Then run the command `docker images`, you will have the following four Docker Images:
|
||||||
|
|
||||||
1. `opea/llm-tgi:latest`
|
1. `opea/llm-tgi:latest`
|
||||||
2. `opea/translation:latest`
|
2. `opea/translation:latest`
|
||||||
3. `opea/translation-ui:latest`
|
3. `opea/translation-ui:latest`
|
||||||
|
4. `opea/translation-nginx:latest`
|
||||||
|
|
||||||
## 🚀 Start Microservices
|
## 🚀 Start Microservices
|
||||||
|
|
||||||
|
### Required Models
|
||||||
|
|
||||||
|
By default, the LLM model is set to a default value as listed below:
|
||||||
|
|
||||||
|
| Service | Model |
|
||||||
|
| ------- | ----------------- |
|
||||||
|
| LLM | haoranxu/ALMA-13B |
|
||||||
|
|
||||||
|
Change the `LLM_MODEL_ID` below for your needs.
|
||||||
|
|
||||||
### Setup Environment Variables
|
### Setup Environment Variables
|
||||||
|
|
||||||
Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
|
1. Set the required environment variables:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export http_proxy=${your_http_proxy}
|
# Example: host_ip="192.168.1.1"
|
||||||
export https_proxy=${your_http_proxy}
|
export host_ip="External_Public_IP"
|
||||||
export LLM_MODEL_ID="haoranxu/ALMA-13B"
|
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
export no_proxy="Your_No_Proxy"
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
# Example: NGINX_PORT=80
|
||||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
export NGINX_PORT=${your_nginx_port}
|
||||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/translation"
|
```
|
||||||
```
|
|
||||||
|
|
||||||
Note: Please replace with `host_ip` with you external IP address, do not use localhost.
|
2. If you are in a proxy environment, also set the proxy-related environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export http_proxy="Your_HTTP_Proxy"
|
||||||
|
export https_proxy="Your_HTTPs_Proxy"
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Set up other environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ../../../
|
||||||
|
source set_env.sh
|
||||||
|
```
|
||||||
|
|
||||||
### Start Microservice Docker Containers
|
### Start Microservice Docker Containers
|
||||||
|
|
||||||
@@ -91,6 +120,14 @@ docker compose up -d
|
|||||||
"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
|
"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
4. Nginx Service
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://${host_ip}:${NGINX_PORT}/v1/translation \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
|
||||||
|
```
|
||||||
|
|
||||||
Following the validation of all aforementioned microservices, we are now prepared to construct a mega-service.
|
Following the validation of all aforementioned microservices, we are now prepared to construct a mega-service.
|
||||||
|
|
||||||
## 🚀 Launch the UI
|
## 🚀 Launch the UI
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
|
||||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
@@ -36,6 +35,8 @@ services:
|
|||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||||
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
translation-gaudi-backend-server:
|
translation-gaudi-backend-server:
|
||||||
image: ${REGISTRY:-opea}/translation:${TAG:-latest}
|
image: ${REGISTRY:-opea}/translation:${TAG:-latest}
|
||||||
@@ -65,6 +66,25 @@ services:
|
|||||||
- BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
- BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||||
ipc: host
|
ipc: host
|
||||||
restart: always
|
restart: always
|
||||||
|
translation-gaudi-nginx-server:
|
||||||
|
image: ${REGISTRY:-opea}/translation-nginx:${TAG:-latest}
|
||||||
|
container_name: translation-gaudi-nginx-server
|
||||||
|
depends_on:
|
||||||
|
- translation-gaudi-backend-server
|
||||||
|
- translation-gaudi-ui-server
|
||||||
|
ports:
|
||||||
|
- "${NGINX_PORT:-80}:80"
|
||||||
|
environment:
|
||||||
|
- no_proxy=${no_proxy}
|
||||||
|
- https_proxy=${https_proxy}
|
||||||
|
- http_proxy=${http_proxy}
|
||||||
|
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
|
||||||
|
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
|
||||||
|
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
|
||||||
|
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
|
||||||
|
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
|
||||||
|
ipc: host
|
||||||
|
restart: always
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
default:
|
default:
|
||||||
|
|||||||
18
Translation/docker_compose/set_env.sh
Normal file
18
Translation/docker_compose/set_env.sh
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
|
||||||
|
export LLM_MODEL_ID="haoranxu/ALMA-13B"
|
||||||
|
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||||
|
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||||
|
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||||
|
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/translation"
|
||||||
|
export NGINX_PORT=80
|
||||||
|
export FRONTEND_SERVICE_IP=${host_ip}
|
||||||
|
export FRONTEND_SERVICE_PORT=5173
|
||||||
|
export BACKEND_SERVICE_NAME=translation
|
||||||
|
export BACKEND_SERVICE_IP=${host_ip}
|
||||||
|
export BACKEND_SERVICE_PORT=8888
|
||||||
@@ -23,3 +23,9 @@ services:
|
|||||||
dockerfile: comps/llms/text-generation/tgi/Dockerfile
|
dockerfile: comps/llms/text-generation/tgi/Dockerfile
|
||||||
extends: translation
|
extends: translation
|
||||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||||
|
nginx:
|
||||||
|
build:
|
||||||
|
context: GenAIComps
|
||||||
|
dockerfile: comps/nginx/Dockerfile
|
||||||
|
extends: translation
|
||||||
|
image: ${REGISTRY:-opea}/translation-nginx:${TAG:-latest}
|
||||||
|
|||||||
41
Translation/kubernetes/intel/README.md
Normal file
41
Translation/kubernetes/intel/README.md
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# Deploy Translation in Kubernetes Cluster
|
||||||
|
|
||||||
|
> [NOTE]
|
||||||
|
> The following values must be set before you can deploy:
|
||||||
|
> HUGGINGFACEHUB_API_TOKEN
|
||||||
|
>
|
||||||
|
> You can also customize the "MODEL_ID" if needed.
|
||||||
|
>
|
||||||
|
> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the Translation workload is running. Otherwise, you need to modify the `translation.yaml` file to change the `model-volume` to a directory that exists on the node.
|
||||||
|
|
||||||
|
## Deploy On Xeon
|
||||||
|
|
||||||
|
```
|
||||||
|
cd GenAIExamples/Translation/kubernetes/intel/cpu/xeon/manifests
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||||
|
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" translation.yaml
|
||||||
|
kubectl apply -f translation.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deploy On Gaudi
|
||||||
|
|
||||||
|
```
|
||||||
|
cd GenAIExamples/Translation/kubernetes/intel/hpu/gaudi/manifests
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||||
|
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" translation.yaml
|
||||||
|
kubectl apply -f translation.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
## Verify Services
|
||||||
|
|
||||||
|
To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
|
||||||
|
|
||||||
|
Then run the command `kubectl port-forward svc/translation 8888:8888` to expose the Translation service for access.
|
||||||
|
|
||||||
|
Open another terminal and run the following command to verify the service if working:
|
||||||
|
|
||||||
|
```console
|
||||||
|
curl http://localhost:8888/v1/translation \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
|
||||||
|
```
|
||||||
495
Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml
Normal file
495
Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml
Normal file
@@ -0,0 +1,495 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: translation-tgi-config
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: tgi
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "2.1.0"
|
||||||
|
data:
|
||||||
|
LLM_MODEL_ID: "haoranxu/ALMA-13B"
|
||||||
|
PORT: "2080"
|
||||||
|
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||||
|
http_proxy: ""
|
||||||
|
https_proxy: ""
|
||||||
|
no_proxy: ""
|
||||||
|
HABANA_LOGS: "/tmp/habana_logs"
|
||||||
|
NUMBA_CACHE_DIR: "/tmp"
|
||||||
|
HF_HOME: "/tmp/.cache/huggingface"
|
||||||
|
CUDA_GRAPHS: "0"
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: translation-llm-uservice-config
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: llm-uservice
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
data:
|
||||||
|
TGI_LLM_ENDPOINT: "http://translation-tgi"
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||||
|
http_proxy: ""
|
||||||
|
https_proxy: ""
|
||||||
|
no_proxy: ""
|
||||||
|
LOGFLAG: ""
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: translation-ui-config
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation-ui
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
data:
|
||||||
|
BASE_URL: "/v1/translation"
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
data:
|
||||||
|
default.conf: |+
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
|
||||||
|
server {
|
||||||
|
listen 80;
|
||||||
|
listen [::]:80;
|
||||||
|
|
||||||
|
location /home {
|
||||||
|
alias /usr/share/nginx/html/index.html;
|
||||||
|
}
|
||||||
|
|
||||||
|
location / {
|
||||||
|
proxy_pass http://translation-ui:5173;
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
}
|
||||||
|
|
||||||
|
location /v1/translation {
|
||||||
|
proxy_pass http://translation:8888;
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: translation-nginx-config
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: translation-ui
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation-ui
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
ports:
|
||||||
|
- port: 5173
|
||||||
|
targetPort: ui
|
||||||
|
protocol: TCP
|
||||||
|
name: ui
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: translation-ui
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: translation-llm-uservice
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: llm-uservice
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
ports:
|
||||||
|
- port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
protocol: TCP
|
||||||
|
name: llm-uservice
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: llm-uservice
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: translation-tgi
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: tgi
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "2.1.0"
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
targetPort: 2080
|
||||||
|
protocol: TCP
|
||||||
|
name: tgi
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: tgi
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: translation-nginx
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app: translation-nginx
|
||||||
|
type: NodePort
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: translation
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
ports:
|
||||||
|
- port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
protocol: TCP
|
||||||
|
name: translation
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app: translation
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: translation-ui
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation-ui
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: translation-ui
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation-ui
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
{}
|
||||||
|
containers:
|
||||||
|
- name: translation-ui
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: translation-ui-config
|
||||||
|
securityContext:
|
||||||
|
{}
|
||||||
|
image: "opea/translation-ui:latest"
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
ports:
|
||||||
|
- name: ui
|
||||||
|
containerPort: 80
|
||||||
|
protocol: TCP
|
||||||
|
resources:
|
||||||
|
{}
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /tmp
|
||||||
|
name: tmp
|
||||||
|
volumes:
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: translation-llm-uservice
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: llm-uservice
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: llm-uservice
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: llm-uservice
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
{}
|
||||||
|
containers:
|
||||||
|
- name: translation
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: translation-llm-uservice-config
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
readOnlyRootFilesystem: false
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1000
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
image: "opea/llm-tgi:latest"
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
ports:
|
||||||
|
- name: llm-uservice
|
||||||
|
containerPort: 9000
|
||||||
|
protocol: TCP
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /tmp
|
||||||
|
name: tmp
|
||||||
|
livenessProbe:
|
||||||
|
failureThreshold: 24
|
||||||
|
httpGet:
|
||||||
|
path: v1/health_check
|
||||||
|
port: llm-uservice
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: v1/health_check
|
||||||
|
port: llm-uservice
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
startupProbe:
|
||||||
|
failureThreshold: 120
|
||||||
|
httpGet:
|
||||||
|
path: v1/health_check
|
||||||
|
port: llm-uservice
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
resources:
|
||||||
|
{}
|
||||||
|
volumes:
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: translation-tgi
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: tgi
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "2.1.0"
|
||||||
|
spec:
|
||||||
|
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: tgi
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: tgi
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
{}
|
||||||
|
containers:
|
||||||
|
- name: tgi
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: translation-tgi-config
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1000
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /tmp
|
||||||
|
name: tmp
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 2080
|
||||||
|
protocol: TCP
|
||||||
|
livenessProbe:
|
||||||
|
failureThreshold: 24
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
tcpSocket:
|
||||||
|
port: http
|
||||||
|
readinessProbe:
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
tcpSocket:
|
||||||
|
port: http
|
||||||
|
startupProbe:
|
||||||
|
failureThreshold: 120
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
tcpSocket:
|
||||||
|
port: http
|
||||||
|
resources:
|
||||||
|
{}
|
||||||
|
volumes:
|
||||||
|
- name: model-volume
|
||||||
|
emptyDir: {}
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: translation
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
app: translation
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app: translation
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app: translation
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
null
|
||||||
|
containers:
|
||||||
|
- name: translation
|
||||||
|
env:
|
||||||
|
- name: LLM_SERVICE_HOST_IP
|
||||||
|
value: translation-llm-uservice
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1000
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
image: "opea/translation:latest"
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /tmp
|
||||||
|
name: tmp
|
||||||
|
ports:
|
||||||
|
- name: translation
|
||||||
|
containerPort: 8888
|
||||||
|
protocol: TCP
|
||||||
|
resources:
|
||||||
|
null
|
||||||
|
volumes:
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: translation-nginx
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
app: translation-nginx
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app: translation-nginx
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app: translation-nginx
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- image: nginx:1.27.1
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: nginx
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /etc/nginx/conf.d
|
||||||
|
name: nginx-config-volume
|
||||||
|
securityContext: {}
|
||||||
|
volumes:
|
||||||
|
- configMap:
|
||||||
|
defaultMode: 420
|
||||||
|
name: translation-nginx-config
|
||||||
|
name: nginx-config-volume
|
||||||
497
Translation/kubernetes/intel/hpu/gaudi/manifest/translation.yaml
Normal file
497
Translation/kubernetes/intel/hpu/gaudi/manifest/translation.yaml
Normal file
@@ -0,0 +1,497 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: translation-tgi-config
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: tgi
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "2.1.0"
|
||||||
|
data:
|
||||||
|
LLM_MODEL_ID: "haoranxu/ALMA-13B"
|
||||||
|
PORT: "2080"
|
||||||
|
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||||
|
http_proxy: ""
|
||||||
|
https_proxy: ""
|
||||||
|
no_proxy: ""
|
||||||
|
HABANA_LOGS: "/tmp/habana_logs"
|
||||||
|
NUMBA_CACHE_DIR: "/tmp"
|
||||||
|
HF_HOME: "/tmp/.cache/huggingface"
|
||||||
|
MAX_INPUT_LENGTH: "1024"
|
||||||
|
MAX_TOTAL_TOKENS: "2048"
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: translation-llm-uservice-config
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: llm-uservice
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
data:
|
||||||
|
TGI_LLM_ENDPOINT: "http://translation-tgi"
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||||
|
http_proxy: ""
|
||||||
|
https_proxy: ""
|
||||||
|
no_proxy: ""
|
||||||
|
LOGFLAG: ""
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: translation-ui-config
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation-ui
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
data:
|
||||||
|
BASE_URL: "/v1/translation"
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
data:
|
||||||
|
default.conf: |+
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
|
||||||
|
server {
|
||||||
|
listen 80;
|
||||||
|
listen [::]:80;
|
||||||
|
|
||||||
|
location /home {
|
||||||
|
alias /usr/share/nginx/html/index.html;
|
||||||
|
}
|
||||||
|
|
||||||
|
location / {
|
||||||
|
proxy_pass http://translation-ui:5173;
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
}
|
||||||
|
|
||||||
|
location /v1/translation {
|
||||||
|
proxy_pass http://translation;
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: translation-nginx-config
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: translation-ui
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation-ui
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
ports:
|
||||||
|
- port: 5173
|
||||||
|
targetPort: ui
|
||||||
|
protocol: TCP
|
||||||
|
name: ui
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: translation-ui
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: translation-llm-uservice
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: llm-uservice
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
ports:
|
||||||
|
- port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
protocol: TCP
|
||||||
|
name: llm-uservice
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: llm-uservice
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: translation-tgi
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: tgi
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "2.1.0"
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
targetPort: 2080
|
||||||
|
protocol: TCP
|
||||||
|
name: tgi
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: tgi
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: translation-nginx
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app: translation-nginx
|
||||||
|
type: NodePort
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: translation
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
ports:
|
||||||
|
- port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
protocol: TCP
|
||||||
|
name: translation
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app: translation
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: translation-ui
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation-ui
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: translation-ui
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation-ui
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
{}
|
||||||
|
containers:
|
||||||
|
- name: translation-ui
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: translation-ui-config
|
||||||
|
securityContext:
|
||||||
|
{}
|
||||||
|
image: "opea/translation-ui:latest"
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
ports:
|
||||||
|
- name: ui
|
||||||
|
containerPort: 80
|
||||||
|
protocol: TCP
|
||||||
|
resources:
|
||||||
|
{}
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /tmp
|
||||||
|
name: tmp
|
||||||
|
volumes:
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: translation-llm-uservice
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: llm-uservice
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: llm-uservice
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: llm-uservice
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
{}
|
||||||
|
containers:
|
||||||
|
- name: translation
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: translation-llm-uservice-config
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
readOnlyRootFilesystem: false
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1000
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
image: "opea/llm-tgi:latest"
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
ports:
|
||||||
|
- name: llm-uservice
|
||||||
|
containerPort: 9000
|
||||||
|
protocol: TCP
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /tmp
|
||||||
|
name: tmp
|
||||||
|
livenessProbe:
|
||||||
|
failureThreshold: 24
|
||||||
|
httpGet:
|
||||||
|
path: v1/health_check
|
||||||
|
port: llm-uservice
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: v1/health_check
|
||||||
|
port: llm-uservice
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
startupProbe:
|
||||||
|
failureThreshold: 120
|
||||||
|
httpGet:
|
||||||
|
path: v1/health_check
|
||||||
|
port: llm-uservice
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
resources:
|
||||||
|
{}
|
||||||
|
volumes:
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: translation-tgi
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: tgi
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "2.1.0"
|
||||||
|
spec:
|
||||||
|
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: tgi
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: tgi
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
{}
|
||||||
|
containers:
|
||||||
|
- name: tgi
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: translation-tgi-config
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1000
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /tmp
|
||||||
|
name: tmp
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 2080
|
||||||
|
protocol: TCP
|
||||||
|
livenessProbe:
|
||||||
|
failureThreshold: 24
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
tcpSocket:
|
||||||
|
port: http
|
||||||
|
readinessProbe:
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
tcpSocket:
|
||||||
|
port: http
|
||||||
|
startupProbe:
|
||||||
|
failureThreshold: 120
|
||||||
|
initialDelaySeconds: 20
|
||||||
|
periodSeconds: 5
|
||||||
|
tcpSocket:
|
||||||
|
port: http
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
volumes:
|
||||||
|
- name: model-volume
|
||||||
|
emptyDir: {}
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: translation
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
app: translation
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app: translation
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app: translation
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
null
|
||||||
|
containers:
|
||||||
|
- name: translation
|
||||||
|
env:
|
||||||
|
- name: LLM_SERVICE_HOST_IP
|
||||||
|
value: translation-llm-uservice
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1000
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
image: "opea/translation:latest"
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /tmp
|
||||||
|
name: tmp
|
||||||
|
ports:
|
||||||
|
- name: translation
|
||||||
|
containerPort: 8888
|
||||||
|
protocol: TCP
|
||||||
|
resources:
|
||||||
|
null
|
||||||
|
volumes:
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: translation-nginx
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app.kubernetes.io/version: "v1.0"
|
||||||
|
app: translation-nginx
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app: translation-nginx
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: translation
|
||||||
|
app.kubernetes.io/instance: translation
|
||||||
|
app: translation-nginx
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- image: nginx:1.27.1
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: nginx
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /etc/nginx/conf.d
|
||||||
|
name: nginx-config-volume
|
||||||
|
securityContext: {}
|
||||||
|
volumes:
|
||||||
|
- configMap:
|
||||||
|
defaultMode: 420
|
||||||
|
name: translation-nginx-config
|
||||||
|
name: nginx-config-volume
|
||||||
@@ -19,7 +19,7 @@ function build_docker_images() {
|
|||||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="translation translation-ui llm-tgi"
|
service_list="translation translation-ui llm-tgi nginx"
|
||||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||||
|
|
||||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||||
@@ -35,6 +35,12 @@ function start_services() {
|
|||||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/translation"
|
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/translation"
|
||||||
|
export NGINX_PORT=80
|
||||||
|
export FRONTEND_SERVICE_IP=${ip_address}
|
||||||
|
export FRONTEND_SERVICE_PORT=5173
|
||||||
|
export BACKEND_SERVICE_NAME=translation
|
||||||
|
export BACKEND_SERVICE_IP=${ip_address}
|
||||||
|
export BACKEND_SERVICE_PORT=8888
|
||||||
|
|
||||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||||
|
|
||||||
@@ -80,8 +86,6 @@ function validate_services() {
|
|||||||
sleep 1s
|
sleep 1s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
function validate_microservices() {
|
function validate_microservices() {
|
||||||
# Check if the microservices are running correctly.
|
# Check if the microservices are running correctly.
|
||||||
|
|
||||||
@@ -110,6 +114,14 @@ function validate_megaservice() {
|
|||||||
"mega-translation" \
|
"mega-translation" \
|
||||||
"translation-gaudi-backend-server" \
|
"translation-gaudi-backend-server" \
|
||||||
'{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
|
'{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
|
||||||
|
|
||||||
|
# test the megeservice via nginx
|
||||||
|
validate_services \
|
||||||
|
"${ip_address}:80/v1/translation" \
|
||||||
|
"translation" \
|
||||||
|
"mega-translation-nginx" \
|
||||||
|
"translation-gaudi-nginx-server" \
|
||||||
|
'{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
|
||||||
}
|
}
|
||||||
|
|
||||||
function validate_frontend() {
|
function validate_frontend() {
|
||||||
|
|||||||
@@ -19,10 +19,10 @@ function build_docker_images() {
|
|||||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="translation translation-ui llm-tgi"
|
service_list="translation translation-ui llm-tgi nginx"
|
||||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||||
|
|
||||||
docker pull ghcr.io/huggingface/text-generation-inference:1.4
|
docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||||
docker images && sleep 1s
|
docker images && sleep 1s
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -35,6 +35,12 @@ function start_services() {
|
|||||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/translation"
|
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/translation"
|
||||||
|
export NGINX_PORT=80
|
||||||
|
export FRONTEND_SERVICE_IP=${ip_address}
|
||||||
|
export FRONTEND_SERVICE_PORT=5173
|
||||||
|
export BACKEND_SERVICE_NAME=translation
|
||||||
|
export BACKEND_SERVICE_IP=${ip_address}
|
||||||
|
export BACKEND_SERVICE_PORT=8888
|
||||||
|
|
||||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||||
|
|
||||||
@@ -42,7 +48,8 @@ function start_services() {
|
|||||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
|
||||||
n=0
|
n=0
|
||||||
until [[ "$n" -ge 100 ]]; do
|
# wait long for llm model download
|
||||||
|
until [[ "$n" -ge 500 ]]; do
|
||||||
docker logs tgi-service > ${LOG_PATH}/tgi_service_start.log
|
docker logs tgi-service > ${LOG_PATH}/tgi_service_start.log
|
||||||
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
|
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
|
||||||
break
|
break
|
||||||
@@ -108,6 +115,14 @@ function validate_megaservice() {
|
|||||||
"mega-translation" \
|
"mega-translation" \
|
||||||
"translation-xeon-backend-server" \
|
"translation-xeon-backend-server" \
|
||||||
'{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
|
'{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
|
||||||
|
|
||||||
|
# test the megeservice via nginx
|
||||||
|
validate_services \
|
||||||
|
"${ip_address}:80/v1/translation" \
|
||||||
|
"translation" \
|
||||||
|
"mega-translation-nginx" \
|
||||||
|
"translation-xeon-nginx-server" \
|
||||||
|
'{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
|
||||||
}
|
}
|
||||||
|
|
||||||
function validate_frontend() {
|
function validate_frontend() {
|
||||||
|
|||||||
91
Translation/tests/test_manifest_on_gaudi.sh
Executable file
91
Translation/tests/test_manifest_on_gaudi.sh
Executable file
@@ -0,0 +1,91 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
set -xe
|
||||||
|
USER_ID=$(whoami)
|
||||||
|
LOG_PATH=/home/$(whoami)/logs
|
||||||
|
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
||||||
|
IMAGE_REPO=${IMAGE_REPO:-}
|
||||||
|
IMAGE_TAG=${IMAGE_TAG:-latest}
|
||||||
|
|
||||||
|
function init_translation() {
|
||||||
|
# executed under path manifest/translation/xeon
|
||||||
|
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
|
||||||
|
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
|
||||||
|
if [ $CONTEXT == "CI" ]; then
|
||||||
|
# replace megaservice image tag
|
||||||
|
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/translation:latest#image: \"opea/translation:${IMAGE_TAG}#g" {} \;
|
||||||
|
else
|
||||||
|
# replace microservice image tag
|
||||||
|
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \;
|
||||||
|
fi
|
||||||
|
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/"
|
||||||
|
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}opea/#g" {} \;
|
||||||
|
# set huggingface token
|
||||||
|
find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
|
||||||
|
}
|
||||||
|
|
||||||
|
function install_translation {
|
||||||
|
echo "namespace is $NAMESPACE"
|
||||||
|
kubectl apply -f translation.yaml -n $NAMESPACE
|
||||||
|
sleep 50s
|
||||||
|
}
|
||||||
|
|
||||||
|
function validate_translation() {
|
||||||
|
ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
|
||||||
|
port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
|
||||||
|
echo "try to curl http://${ip_address}:${port}/v1/translation..."
|
||||||
|
|
||||||
|
# generate a random logfile name to avoid conflict among multiple runners
|
||||||
|
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
|
||||||
|
# Curl the Mega Service
|
||||||
|
curl http://${ip_address}:${port}/v1/translation \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' > $LOGFILE
|
||||||
|
exit_code=$?
|
||||||
|
if [ $exit_code -ne 0 ]; then
|
||||||
|
echo "Megaservice translation failed, please check the logs in $LOGFILE!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Checking response results, make sure the output is reasonable. "
|
||||||
|
local status=false
|
||||||
|
if [[ -f $LOGFILE ]] && \
|
||||||
|
[[ $(grep -c "translation" $LOGFILE) != 0 ]]; then
|
||||||
|
status=true
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $status == false ]; then
|
||||||
|
echo "Response check failed, please check the logs in artifacts!"
|
||||||
|
else
|
||||||
|
echo "Response check succeed!"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ $# -eq 0 ]; then
|
||||||
|
echo "Usage: $0 <function_name>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
case "$1" in
|
||||||
|
init_Translation)
|
||||||
|
pushd Translation/kubernetes/intel/hpu/gaudi/manifest
|
||||||
|
init_translation
|
||||||
|
popd
|
||||||
|
;;
|
||||||
|
install_Translation)
|
||||||
|
pushd Translation/kubernetes/intel/hpu/gaudi/manifest
|
||||||
|
NAMESPACE=$2
|
||||||
|
install_translation
|
||||||
|
popd
|
||||||
|
;;
|
||||||
|
validate_Translation)
|
||||||
|
NAMESPACE=$2
|
||||||
|
SERVICE_NAME=translation
|
||||||
|
validate_translation
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unknown function: $1"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
90
Translation/tests/test_manifest_on_xeon.sh
Executable file
90
Translation/tests/test_manifest_on_xeon.sh
Executable file
@@ -0,0 +1,90 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
set -xe
|
||||||
|
USER_ID=$(whoami)
|
||||||
|
LOG_PATH=/home/$(whoami)/logs
|
||||||
|
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
||||||
|
IMAGE_REPO=${IMAGE_REPO:-}
|
||||||
|
IMAGE_TAG=${IMAGE_TAG:-latest}
|
||||||
|
|
||||||
|
function init_translation() {
|
||||||
|
# executed under path manifest/translation/xeon
|
||||||
|
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
|
||||||
|
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
|
||||||
|
if [ $CONTEXT == "CI" ]; then
|
||||||
|
# replace megaservice image tag
|
||||||
|
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/translation:latest#image: \"opea/translation:${IMAGE_TAG}#g" {} \;
|
||||||
|
else
|
||||||
|
# replace microservice image tag
|
||||||
|
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \;
|
||||||
|
fi
|
||||||
|
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/"
|
||||||
|
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}opea/#g" {} \;
|
||||||
|
# set huggingface token
|
||||||
|
find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
|
||||||
|
}
|
||||||
|
|
||||||
|
function install_translation {
|
||||||
|
echo "namespace is $NAMESPACE"
|
||||||
|
kubectl apply -f translation.yaml -n $NAMESPACE
|
||||||
|
}
|
||||||
|
|
||||||
|
function validate_translation() {
|
||||||
|
ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
|
||||||
|
port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
|
||||||
|
echo "try to curl http://${ip_address}:${port}/v1/translation..."
|
||||||
|
|
||||||
|
# generate a random logfile name to avoid conflict among multiple runners
|
||||||
|
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
|
||||||
|
# Curl the Mega Service
|
||||||
|
curl http://${ip_address}:${port}/v1/translation \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' > $LOGFILE
|
||||||
|
exit_code=$?
|
||||||
|
if [ $exit_code -ne 0 ]; then
|
||||||
|
echo "Megaservice translation failed, please check the logs in $LOGFILE!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Checking response results, make sure the output is reasonable. "
|
||||||
|
local status=false
|
||||||
|
if [[ -f $LOGFILE ]] && \
|
||||||
|
[[ $(grep -c "translation" $LOGFILE) != 0 ]]; then
|
||||||
|
status=true
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $status == false ]; then
|
||||||
|
echo "Response check failed, please check the logs in artifacts!"
|
||||||
|
else
|
||||||
|
echo "Response check succeed!"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ $# -eq 0 ]; then
|
||||||
|
echo "Usage: $0 <function_name>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
case "$1" in
|
||||||
|
init_Translation)
|
||||||
|
pushd Translation/kubernetes/intel/cpu/xeon/manifest
|
||||||
|
init_translation
|
||||||
|
popd
|
||||||
|
;;
|
||||||
|
install_Translation)
|
||||||
|
pushd Translation/kubernetes/intel/cpu/xeon/manifest
|
||||||
|
NAMESPACE=$2
|
||||||
|
install_translation
|
||||||
|
popd
|
||||||
|
;;
|
||||||
|
validate_Translation)
|
||||||
|
NAMESPACE=$2
|
||||||
|
SERVICE_NAME=translation
|
||||||
|
validate_translation
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unknown function: $1"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
Reference in New Issue
Block a user