diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS old mode 100644 new mode 100755 index 5853274a1..3a6070efd --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -3,10 +3,10 @@ /ChatQnA/ liang1.lv@intel.com /CodeGen/ liang1.lv@intel.com /CodeTrans/ sihan.chen@intel.com -/DocSum/ sihan.chen@intel.com +/DocSum/ letong.han@intel.com /DocIndexRetriever/ xuhui.ren@intel.com chendi.xue@intel.com /FaqGen/ xinyao.wang@intel.com -/SearchQnA/ letong.han@intel.com +/SearchQnA/ sihan.chen@intel.com /Translation/ liang1.lv@intel.com /VisualQnA/ liang1.lv@intel.com /ProductivitySuite/ hoong.tee.yeoh@intel.com diff --git a/README.md b/README.md index 5a168648b..cbcabbe2f 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ Deployment are based on released docker images by default, check [docker image l | DocSum | [Xeon Instructions](DocSum/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](DocSum/docker_compose/intel/hpu/gaudi/README.md) | [DocSum with Manifests](DocSum/kubernetes/intel/README.md) | [DocSum with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/docsum/README.md) | [DocSum with GMC](DocSum/kubernetes/intel/README_gmc.md) | | SearchQnA | [Xeon Instructions](SearchQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](SearchQnA/docker_compose/intel/hpu/gaudi/README.md) | Not Supported | Not Supported | [SearchQnA with GMC](SearchQnA/kubernetes/intel/README_gmc.md) | | FaqGen | [Xeon Instructions](FaqGen/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](FaqGen/docker_compose/intel/hpu/gaudi/README.md) | [FaqGen with Manifests](FaqGen/kubernetes/intel/README.md) | Not Supported | [FaqGen with GMC](FaqGen/kubernetes/intel/README_gmc.md) | -| Translation | [Xeon Instructions](Translation/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](Translation/docker_compose/intel/hpu/gaudi/README.md) | Not Supported | Not Supported | [Translation with GMC](Translation/kubernetes/intel/README_gmc.md) | +| Translation | [Xeon Instructions](Translation/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](Translation/docker_compose/intel/hpu/gaudi/README.md) | [Translation with Manifests](Translation/kubernetes/intel/README.md) | Not Supported | [Translation with GMC](Translation/kubernetes/intel/README_gmc.md) | | AudioQnA | [Xeon Instructions](AudioQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](AudioQnA/docker_compose/intel/hpu/gaudi/README.md) | [AudioQnA with Manifests](AudioQnA/kubernetes/intel/README.md) | Not Supported | [AudioQnA with GMC](AudioQnA/kubernetes/intel/README_gmc.md) | | VisualQnA | [Xeon Instructions](VisualQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](VisualQnA/docker_compose/intel/hpu/gaudi/README.md) | [VisualQnA with Manifests](VisualQnA/kubernetes/intel/README.md) | Not Supported | [VisualQnA with GMC](VisualQnA/kubernetes/intel/README_gmc.md) | | ProductivitySuite | [Xeon Instructions](ProductivitySuite/docker_compose/intel/cpu/xeon/README.md) | Not Supported | [ProductivitySuite with Manifests](ProductivitySuite/kubernetes/intel/README.md) | Not Supported | Not Supported | diff --git a/Translation/docker_compose/intel/cpu/xeon/README.md b/Translation/docker_compose/intel/cpu/xeon/README.md index 31e6e9654..306f8e35d 100644 --- a/Translation/docker_compose/intel/cpu/xeon/README.md +++ b/Translation/docker_compose/intel/cpu/xeon/README.md @@ -41,30 +41,59 @@ cd GenAIExamples/Translation/ui docker build -t opea/translation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . ``` +### 4. Build Nginx Docker Image + +```bash +cd GenAIComps +docker build -t opea/translation-nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile . +``` + Then run the command `docker images`, you will have the following Docker Images: 1. `opea/llm-tgi:latest` 2. `opea/translation:latest` 3. `opea/translation-ui:latest` +4. `opea/translation-nginx:latest` ## 🚀 Start Microservices +### Required Models + +By default, the LLM model is set to a default value as listed below: + +| Service | Model | +| ------- | ----------------- | +| LLM | haoranxu/ALMA-13B | + +Change the `LLM_MODEL_ID` below for your needs. + ### Setup Environment Variables -Since the `compose.yaml` will consume some environment variables, you need to set up them in advance as below. +1. Set the required environment variables: -```bash -export http_proxy=${your_http_proxy} -export https_proxy=${your_http_proxy} -export LLM_MODEL_ID="haoranxu/ALMA-13B" -export TGI_LLM_ENDPOINT="http://${host_ip}:8008" -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export MEGA_SERVICE_HOST_IP=${host_ip} -export LLM_SERVICE_HOST_IP=${host_ip} -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/translation" -``` + ```bash + # Example: host_ip="192.168.1.1" + export host_ip="External_Public_IP" + # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" + export no_proxy="Your_No_Proxy" + export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" + # Example: NGINX_PORT=80 + export NGINX_PORT=${your_nginx_port} + ``` -Note: Please replace with `host_ip` with you external IP address, do not use localhost. +2. If you are in a proxy environment, also set the proxy-related environment variables: + + ```bash + export http_proxy="Your_HTTP_Proxy" + export https_proxy="Your_HTTPs_Proxy" + ``` + +3. Set up other environment variables: + + ```bash + cd ../../../ + source set_env.sh + ``` ### Start Microservice Docker Containers @@ -99,6 +128,14 @@ docker compose up -d "language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' ``` +4. Nginx Service + + ```bash + curl http://${host_ip}:${NGINX_PORT}/v1/translation \ + -H "Content-Type: application/json" \ + -d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + ``` + Following the validation of all aforementioned microservices, we are now prepared to construct a mega-service. ## 🚀 Launch the UI diff --git a/Translation/docker_compose/intel/cpu/xeon/compose.yaml b/Translation/docker_compose/intel/cpu/xeon/compose.yaml index 4ba224bf3..e8eafca4f 100644 --- a/Translation/docker_compose/intel/cpu/xeon/compose.yaml +++ b/Translation/docker_compose/intel/cpu/xeon/compose.yaml @@ -8,10 +8,12 @@ services: ports: - "8008:80" environment: + no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 volumes: - "./data:/data" shm_size: 1g @@ -25,10 +27,13 @@ services: - "9000:9000" ipc: host environment: + no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 restart: unless-stopped translation-xeon-backend-server: image: ${REGISTRY:-opea}/translation:${TAG:-latest} @@ -39,6 +44,7 @@ services: ports: - "8888:8888" environment: + - no_proxy=${no_proxy} - https_proxy=${https_proxy} - http_proxy=${http_proxy} - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} @@ -53,11 +59,31 @@ services: ports: - "5173:5173" environment: + - no_proxy=${no_proxy} - https_proxy=${https_proxy} - http_proxy=${http_proxy} - BASE_URL=${BACKEND_SERVICE_ENDPOINT} ipc: host restart: always + translation-xeon-nginx-server: + image: ${REGISTRY:-opea}/translation-nginx:${TAG:-latest} + container_name: translation-xeon-nginx-server + depends_on: + - translation-xeon-backend-server + - translation-xeon-ui-server + ports: + - "${NGINX_PORT:-80}:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP} + - FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT} + - BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME} + - BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP} + - BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT} + ipc: host + restart: always networks: default: driver: bridge diff --git a/Translation/docker_compose/intel/hpu/gaudi/README.md b/Translation/docker_compose/intel/hpu/gaudi/README.md index 1f8f82837..9f234496c 100644 --- a/Translation/docker_compose/intel/hpu/gaudi/README.md +++ b/Translation/docker_compose/intel/hpu/gaudi/README.md @@ -29,34 +29,63 @@ docker build -t opea/translation:latest --build-arg https_proxy=$https_proxy --b Construct the frontend Docker image using the command below: ```bash -cd GenAIExamples/Translation +cd GenAIExamples/Translation/ui/ docker build -t opea/translation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile . ``` +### 4. Build Nginx Docker Image + +```bash +cd GenAIComps +docker build -t opea/translation-nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile . +``` + Then run the command `docker images`, you will have the following four Docker Images: 1. `opea/llm-tgi:latest` 2. `opea/translation:latest` 3. `opea/translation-ui:latest` +4. `opea/translation-nginx:latest` ## 🚀 Start Microservices +### Required Models + +By default, the LLM model is set to a default value as listed below: + +| Service | Model | +| ------- | ----------------- | +| LLM | haoranxu/ALMA-13B | + +Change the `LLM_MODEL_ID` below for your needs. + ### Setup Environment Variables -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. +1. Set the required environment variables: -```bash -export http_proxy=${your_http_proxy} -export https_proxy=${your_http_proxy} -export LLM_MODEL_ID="haoranxu/ALMA-13B" -export TGI_LLM_ENDPOINT="http://${host_ip}:8008" -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export MEGA_SERVICE_HOST_IP=${host_ip} -export LLM_SERVICE_HOST_IP=${host_ip} -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/translation" -``` + ```bash + # Example: host_ip="192.168.1.1" + export host_ip="External_Public_IP" + # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" + export no_proxy="Your_No_Proxy" + export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" + # Example: NGINX_PORT=80 + export NGINX_PORT=${your_nginx_port} + ``` -Note: Please replace with `host_ip` with you external IP address, do not use localhost. +2. If you are in a proxy environment, also set the proxy-related environment variables: + + ```bash + export http_proxy="Your_HTTP_Proxy" + export https_proxy="Your_HTTPs_Proxy" + ``` + +3. Set up other environment variables: + + ```bash + cd ../../../ + source set_env.sh + ``` ### Start Microservice Docker Containers @@ -91,6 +120,14 @@ docker compose up -d "language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' ``` +4. Nginx Service + + ```bash + curl http://${host_ip}:${NGINX_PORT}/v1/translation \ + -H "Content-Type: application/json" \ + -d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + ``` + Following the validation of all aforementioned microservices, we are now prepared to construct a mega-service. ## 🚀 Launch the UI diff --git a/Translation/docker_compose/intel/hpu/gaudi/compose.yaml b/Translation/docker_compose/intel/hpu/gaudi/compose.yaml index 32dbfdc3e..6eefd6492 100644 --- a/Translation/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/Translation/docker_compose/intel/hpu/gaudi/compose.yaml @@ -10,7 +10,6 @@ services: environment: http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 @@ -36,6 +35,8 @@ services: https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 restart: unless-stopped translation-gaudi-backend-server: image: ${REGISTRY:-opea}/translation:${TAG:-latest} @@ -65,6 +66,25 @@ services: - BASE_URL=${BACKEND_SERVICE_ENDPOINT} ipc: host restart: always + translation-gaudi-nginx-server: + image: ${REGISTRY:-opea}/translation-nginx:${TAG:-latest} + container_name: translation-gaudi-nginx-server + depends_on: + - translation-gaudi-backend-server + - translation-gaudi-ui-server + ports: + - "${NGINX_PORT:-80}:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP} + - FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT} + - BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME} + - BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP} + - BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT} + ipc: host + restart: always networks: default: diff --git a/Translation/docker_compose/set_env.sh b/Translation/docker_compose/set_env.sh new file mode 100644 index 000000000..c82c8d360 --- /dev/null +++ b/Translation/docker_compose/set_env.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +export LLM_MODEL_ID="haoranxu/ALMA-13B" +export TGI_LLM_ENDPOINT="http://${host_ip}:8008" +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export MEGA_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP=${host_ip} +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/translation" +export NGINX_PORT=80 +export FRONTEND_SERVICE_IP=${host_ip} +export FRONTEND_SERVICE_PORT=5173 +export BACKEND_SERVICE_NAME=translation +export BACKEND_SERVICE_IP=${host_ip} +export BACKEND_SERVICE_PORT=8888 diff --git a/Translation/docker_image_build/build.yaml b/Translation/docker_image_build/build.yaml index b326b125b..a1562060b 100644 --- a/Translation/docker_image_build/build.yaml +++ b/Translation/docker_image_build/build.yaml @@ -23,3 +23,9 @@ services: dockerfile: comps/llms/text-generation/tgi/Dockerfile extends: translation image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} + nginx: + build: + context: GenAIComps + dockerfile: comps/nginx/Dockerfile + extends: translation + image: ${REGISTRY:-opea}/translation-nginx:${TAG:-latest} diff --git a/Translation/kubernetes/intel/README.md b/Translation/kubernetes/intel/README.md new file mode 100644 index 000000000..7ca89d372 --- /dev/null +++ b/Translation/kubernetes/intel/README.md @@ -0,0 +1,41 @@ +# Deploy Translation in Kubernetes Cluster + +> [NOTE] +> The following values must be set before you can deploy: +> HUGGINGFACEHUB_API_TOKEN +> +> You can also customize the "MODEL_ID" if needed. +> +> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the Translation workload is running. Otherwise, you need to modify the `translation.yaml` file to change the `model-volume` to a directory that exists on the node. + +## Deploy On Xeon + +``` +cd GenAIExamples/Translation/kubernetes/intel/cpu/xeon/manifests +export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" +sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" translation.yaml +kubectl apply -f translation.yaml +``` + +## Deploy On Gaudi + +``` +cd GenAIExamples/Translation/kubernetes/intel/hpu/gaudi/manifests +export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" +sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" translation.yaml +kubectl apply -f translation.yaml +``` + +## Verify Services + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/translation 8888:8888` to expose the Translation service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:8888/v1/translation \ + -H 'Content-Type: application/json' \ + -d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' +``` diff --git a/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml b/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml new file mode 100644 index 000000000..e30fee338 --- /dev/null +++ b/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml @@ -0,0 +1,495 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: translation-tgi-config + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "2.1.0" +data: + LLM_MODEL_ID: "haoranxu/ALMA-13B" + PORT: "2080" + HF_TOKEN: "insert-your-huggingface-token-here" + http_proxy: "" + https_proxy: "" + no_proxy: "" + HABANA_LOGS: "/tmp/habana_logs" + NUMBA_CACHE_DIR: "/tmp" + HF_HOME: "/tmp/.cache/huggingface" + CUDA_GRAPHS: "0" +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: translation-llm-uservice-config + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" +data: + TGI_LLM_ENDPOINT: "http://translation-tgi" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + http_proxy: "" + https_proxy: "" + no_proxy: "" + LOGFLAG: "" +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: translation-ui-config + labels: + app.kubernetes.io/name: translation-ui + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" +data: + BASE_URL: "/v1/translation" +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + default.conf: |+ + # Copyright (C) 2024 Intel Corporation + # SPDX-License-Identifier: Apache-2.0 + + + server { + listen 80; + listen [::]:80; + + location /home { + alias /usr/share/nginx/html/index.html; + } + + location / { + proxy_pass http://translation-ui:5173; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /v1/translation { + proxy_pass http://translation:8888; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + } + +kind: ConfigMap +metadata: + name: translation-nginx-config +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: translation-ui + labels: + app.kubernetes.io/name: translation-ui + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" +spec: + type: ClusterIP + ports: + - port: 5173 + targetPort: ui + protocol: TCP + name: ui + selector: + app.kubernetes.io/name: translation-ui + app.kubernetes.io/instance: translation +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: translation-llm-uservice + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" +spec: + type: ClusterIP + ports: + - port: 9000 + targetPort: 9000 + protocol: TCP + name: llm-uservice + selector: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: translation +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: translation-tgi + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "2.1.0" +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2080 + protocol: TCP + name: tgi + selector: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: translation +--- +apiVersion: v1 +kind: Service +metadata: + name: translation-nginx +spec: + ports: + - port: 80 + protocol: TCP + targetPort: 80 + selector: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app: translation-nginx + type: NodePort +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: translation + labels: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" +spec: + type: ClusterIP + ports: + - port: 8888 + targetPort: 8888 + protocol: TCP + name: translation + selector: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app: translation +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: translation-ui + labels: + app.kubernetes.io/name: translation-ui + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: translation-ui + app.kubernetes.io/instance: translation + template: + metadata: + labels: + app.kubernetes.io/name: translation-ui + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" + spec: + securityContext: + {} + containers: + - name: translation-ui + envFrom: + - configMapRef: + name: translation-ui-config + securityContext: + {} + image: "opea/translation-ui:latest" + imagePullPolicy: IfNotPresent + ports: + - name: ui + containerPort: 80 + protocol: TCP + resources: + {} + volumeMounts: + - mountPath: /tmp + name: tmp + volumes: + - name: tmp + emptyDir: {} +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: translation-llm-uservice + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: translation + template: + metadata: + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: translation + spec: + securityContext: + {} + containers: + - name: translation + envFrom: + - configMapRef: + name: translation-llm-uservice-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/llm-tgi:latest" + imagePullPolicy: IfNotPresent + ports: + - name: llm-uservice + containerPort: 9000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: translation-tgi + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "2.1.0" +spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: translation + template: + metadata: + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: translation + spec: + securityContext: + {} + containers: + - name: tgi + envFrom: + - configMapRef: + name: translation-tgi-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2080 + protocol: TCP + livenessProbe: + failureThreshold: 24 + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + startupProbe: + failureThreshold: 120 + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + resources: + {} + volumes: + - name: model-volume + emptyDir: {} + - name: tmp + emptyDir: {} +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: translation + labels: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" + app: translation +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app: translation + template: + metadata: + labels: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app: translation + spec: + securityContext: + null + containers: + - name: translation + env: + - name: LLM_SERVICE_HOST_IP + value: translation-llm-uservice + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/translation:latest" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /tmp + name: tmp + ports: + - name: translation + containerPort: 8888 + protocol: TCP + resources: + null + volumes: + - name: tmp + emptyDir: {} +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: translation-nginx + labels: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" + app: translation-nginx +spec: + selector: + matchLabels: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app: translation-nginx + template: + metadata: + labels: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app: translation-nginx + spec: + containers: + - image: nginx:1.27.1 + imagePullPolicy: IfNotPresent + name: nginx + volumeMounts: + - mountPath: /etc/nginx/conf.d + name: nginx-config-volume + securityContext: {} + volumes: + - configMap: + defaultMode: 420 + name: translation-nginx-config + name: nginx-config-volume diff --git a/Translation/kubernetes/intel/hpu/gaudi/manifest/translation.yaml b/Translation/kubernetes/intel/hpu/gaudi/manifest/translation.yaml new file mode 100644 index 000000000..52d6c9b10 --- /dev/null +++ b/Translation/kubernetes/intel/hpu/gaudi/manifest/translation.yaml @@ -0,0 +1,497 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: translation-tgi-config + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "2.1.0" +data: + LLM_MODEL_ID: "haoranxu/ALMA-13B" + PORT: "2080" + HF_TOKEN: "insert-your-huggingface-token-here" + http_proxy: "" + https_proxy: "" + no_proxy: "" + HABANA_LOGS: "/tmp/habana_logs" + NUMBA_CACHE_DIR: "/tmp" + HF_HOME: "/tmp/.cache/huggingface" + MAX_INPUT_LENGTH: "1024" + MAX_TOTAL_TOKENS: "2048" +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: translation-llm-uservice-config + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" +data: + TGI_LLM_ENDPOINT: "http://translation-tgi" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + http_proxy: "" + https_proxy: "" + no_proxy: "" + LOGFLAG: "" +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: translation-ui-config + labels: + app.kubernetes.io/name: translation-ui + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" +data: + BASE_URL: "/v1/translation" +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + default.conf: |+ + # Copyright (C) 2024 Intel Corporation + # SPDX-License-Identifier: Apache-2.0 + + + server { + listen 80; + listen [::]:80; + + location /home { + alias /usr/share/nginx/html/index.html; + } + + location / { + proxy_pass http://translation-ui:5173; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /v1/translation { + proxy_pass http://translation; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + } + +kind: ConfigMap +metadata: + name: translation-nginx-config +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: translation-ui + labels: + app.kubernetes.io/name: translation-ui + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" +spec: + type: ClusterIP + ports: + - port: 5173 + targetPort: ui + protocol: TCP + name: ui + selector: + app.kubernetes.io/name: translation-ui + app.kubernetes.io/instance: translation +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: translation-llm-uservice + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" +spec: + type: ClusterIP + ports: + - port: 9000 + targetPort: 9000 + protocol: TCP + name: llm-uservice + selector: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: translation +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: translation-tgi + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "2.1.0" +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2080 + protocol: TCP + name: tgi + selector: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: translation +--- +apiVersion: v1 +kind: Service +metadata: + name: translation-nginx +spec: + ports: + - port: 80 + protocol: TCP + targetPort: 80 + selector: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app: translation-nginx + type: NodePort +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: translation + labels: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" +spec: + type: ClusterIP + ports: + - port: 8888 + targetPort: 8888 + protocol: TCP + name: translation + selector: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app: translation +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: translation-ui + labels: + app.kubernetes.io/name: translation-ui + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: translation-ui + app.kubernetes.io/instance: translation + template: + metadata: + labels: + app.kubernetes.io/name: translation-ui + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" + spec: + securityContext: + {} + containers: + - name: translation-ui + envFrom: + - configMapRef: + name: translation-ui-config + securityContext: + {} + image: "opea/translation-ui:latest" + imagePullPolicy: IfNotPresent + ports: + - name: ui + containerPort: 80 + protocol: TCP + resources: + {} + volumeMounts: + - mountPath: /tmp + name: tmp + volumes: + - name: tmp + emptyDir: {} +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: translation-llm-uservice + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: translation + template: + metadata: + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: translation + spec: + securityContext: + {} + containers: + - name: translation + envFrom: + - configMapRef: + name: translation-llm-uservice-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/llm-tgi:latest" + imagePullPolicy: IfNotPresent + ports: + - name: llm-uservice + containerPort: 9000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: translation-tgi + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "2.1.0" +spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: translation + template: + metadata: + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: translation + spec: + securityContext: + {} + containers: + - name: tgi + envFrom: + - configMapRef: + name: translation-tgi-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "ghcr.io/huggingface/tgi-gaudi:2.0.1" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2080 + protocol: TCP + livenessProbe: + failureThreshold: 24 + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + startupProbe: + failureThreshold: 120 + initialDelaySeconds: 20 + periodSeconds: 5 + tcpSocket: + port: http + resources: + limits: + habana.ai/gaudi: 1 + volumes: + - name: model-volume + emptyDir: {} + - name: tmp + emptyDir: {} +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: translation + labels: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" + app: translation +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app: translation + template: + metadata: + labels: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app: translation + spec: + securityContext: + null + containers: + - name: translation + env: + - name: LLM_SERVICE_HOST_IP + value: translation-llm-uservice + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/translation:latest" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /tmp + name: tmp + ports: + - name: translation + containerPort: 8888 + protocol: TCP + resources: + null + volumes: + - name: tmp + emptyDir: {} +--- +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: translation-nginx + labels: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app.kubernetes.io/version: "v1.0" + app: translation-nginx +spec: + selector: + matchLabels: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app: translation-nginx + template: + metadata: + labels: + app.kubernetes.io/name: translation + app.kubernetes.io/instance: translation + app: translation-nginx + spec: + containers: + - image: nginx:1.27.1 + imagePullPolicy: IfNotPresent + name: nginx + volumeMounts: + - mountPath: /etc/nginx/conf.d + name: nginx-config-volume + securityContext: {} + volumes: + - configMap: + defaultMode: 420 + name: translation-nginx-config + name: nginx-config-volume diff --git a/Translation/tests/test_compose_on_gaudi.sh b/Translation/tests/test_compose_on_gaudi.sh index f66af96cb..558ec9e28 100644 --- a/Translation/tests/test_compose_on_gaudi.sh +++ b/Translation/tests/test_compose_on_gaudi.sh @@ -19,7 +19,7 @@ function build_docker_images() { git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="translation translation-ui llm-tgi" + service_list="translation translation-ui llm-tgi nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1 @@ -35,6 +35,12 @@ function start_services() { export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/translation" + export NGINX_PORT=80 + export FRONTEND_SERVICE_IP=${ip_address} + export FRONTEND_SERVICE_PORT=5173 + export BACKEND_SERVICE_NAME=translation + export BACKEND_SERVICE_IP=${ip_address} + export BACKEND_SERVICE_PORT=8888 sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env @@ -80,8 +86,6 @@ function validate_services() { sleep 1s } - - function validate_microservices() { # Check if the microservices are running correctly. @@ -110,6 +114,14 @@ function validate_megaservice() { "mega-translation" \ "translation-gaudi-backend-server" \ '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + + # test the megeservice via nginx + validate_services \ + "${ip_address}:80/v1/translation" \ + "translation" \ + "mega-translation-nginx" \ + "translation-gaudi-nginx-server" \ + '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' } function validate_frontend() { diff --git a/Translation/tests/test_compose_on_xeon.sh b/Translation/tests/test_compose_on_xeon.sh index a648ba832..2d0c5306d 100644 --- a/Translation/tests/test_compose_on_xeon.sh +++ b/Translation/tests/test_compose_on_xeon.sh @@ -19,10 +19,10 @@ function build_docker_images() { git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="translation translation-ui llm-tgi" + service_list="translation translation-ui llm-tgi nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:1.4 + docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu docker images && sleep 1s } @@ -35,6 +35,12 @@ function start_services() { export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/translation" + export NGINX_PORT=80 + export FRONTEND_SERVICE_IP=${ip_address} + export FRONTEND_SERVICE_PORT=5173 + export BACKEND_SERVICE_NAME=translation + export BACKEND_SERVICE_IP=${ip_address} + export BACKEND_SERVICE_PORT=8888 sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env @@ -42,7 +48,8 @@ function start_services() { docker compose up -d > ${LOG_PATH}/start_services_with_compose.log n=0 - until [[ "$n" -ge 100 ]]; do + # wait long for llm model download + until [[ "$n" -ge 500 ]]; do docker logs tgi-service > ${LOG_PATH}/tgi_service_start.log if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then break @@ -108,6 +115,14 @@ function validate_megaservice() { "mega-translation" \ "translation-xeon-backend-server" \ '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + + # test the megeservice via nginx + validate_services \ + "${ip_address}:80/v1/translation" \ + "translation" \ + "mega-translation-nginx" \ + "translation-xeon-nginx-server" \ + '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' } function validate_frontend() { diff --git a/Translation/tests/test_manifest_on_gaudi.sh b/Translation/tests/test_manifest_on_gaudi.sh new file mode 100755 index 000000000..6e4edbeb4 --- /dev/null +++ b/Translation/tests/test_manifest_on_gaudi.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe +USER_ID=$(whoami) +LOG_PATH=/home/$(whoami)/logs +MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub +IMAGE_REPO=${IMAGE_REPO:-} +IMAGE_TAG=${IMAGE_TAG:-latest} + +function init_translation() { + # executed under path manifest/translation/xeon + # replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT" + find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \; + if [ $CONTEXT == "CI" ]; then + # replace megaservice image tag + find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/translation:latest#image: \"opea/translation:${IMAGE_TAG}#g" {} \; + else + # replace microservice image tag + find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \; + fi + # replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/" + find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}opea/#g" {} \; + # set huggingface token + find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \; +} + +function install_translation { + echo "namespace is $NAMESPACE" + kubectl apply -f translation.yaml -n $NAMESPACE + sleep 50s +} + +function validate_translation() { + ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}') + port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}') + echo "try to curl http://${ip_address}:${port}/v1/translation..." + + # generate a random logfile name to avoid conflict among multiple runners + LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log + # Curl the Mega Service + curl http://${ip_address}:${port}/v1/translation \ + -H 'Content-Type: application/json' \ + -d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' > $LOGFILE + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "Megaservice translation failed, please check the logs in $LOGFILE!" + exit 1 + fi + + echo "Checking response results, make sure the output is reasonable. " + local status=false + if [[ -f $LOGFILE ]] && \ + [[ $(grep -c "translation" $LOGFILE) != 0 ]]; then + status=true + fi + + if [ $status == false ]; then + echo "Response check failed, please check the logs in artifacts!" + else + echo "Response check succeed!" + fi +} + +if [ $# -eq 0 ]; then + echo "Usage: $0 " + exit 1 +fi + +case "$1" in + init_Translation) + pushd Translation/kubernetes/intel/hpu/gaudi/manifest + init_translation + popd + ;; + install_Translation) + pushd Translation/kubernetes/intel/hpu/gaudi/manifest + NAMESPACE=$2 + install_translation + popd + ;; + validate_Translation) + NAMESPACE=$2 + SERVICE_NAME=translation + validate_translation + ;; + *) + echo "Unknown function: $1" + ;; +esac diff --git a/Translation/tests/test_manifest_on_xeon.sh b/Translation/tests/test_manifest_on_xeon.sh new file mode 100755 index 000000000..34f04f5ab --- /dev/null +++ b/Translation/tests/test_manifest_on_xeon.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe +USER_ID=$(whoami) +LOG_PATH=/home/$(whoami)/logs +MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub +IMAGE_REPO=${IMAGE_REPO:-} +IMAGE_TAG=${IMAGE_TAG:-latest} + +function init_translation() { + # executed under path manifest/translation/xeon + # replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT" + find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \; + if [ $CONTEXT == "CI" ]; then + # replace megaservice image tag + find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/translation:latest#image: \"opea/translation:${IMAGE_TAG}#g" {} \; + else + # replace microservice image tag + find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \; + fi + # replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/" + find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}opea/#g" {} \; + # set huggingface token + find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \; +} + +function install_translation { + echo "namespace is $NAMESPACE" + kubectl apply -f translation.yaml -n $NAMESPACE +} + +function validate_translation() { + ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}') + port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}') + echo "try to curl http://${ip_address}:${port}/v1/translation..." + + # generate a random logfile name to avoid conflict among multiple runners + LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log + # Curl the Mega Service + curl http://${ip_address}:${port}/v1/translation \ + -H 'Content-Type: application/json' \ + -d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' > $LOGFILE + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "Megaservice translation failed, please check the logs in $LOGFILE!" + exit 1 + fi + + echo "Checking response results, make sure the output is reasonable. " + local status=false + if [[ -f $LOGFILE ]] && \ + [[ $(grep -c "translation" $LOGFILE) != 0 ]]; then + status=true + fi + + if [ $status == false ]; then + echo "Response check failed, please check the logs in artifacts!" + else + echo "Response check succeed!" + fi +} + +if [ $# -eq 0 ]; then + echo "Usage: $0 " + exit 1 +fi + +case "$1" in + init_Translation) + pushd Translation/kubernetes/intel/cpu/xeon/manifest + init_translation + popd + ;; + install_Translation) + pushd Translation/kubernetes/intel/cpu/xeon/manifest + NAMESPACE=$2 + install_translation + popd + ;; + validate_Translation) + NAMESPACE=$2 + SERVICE_NAME=translation + validate_translation + ;; + *) + echo "Unknown function: $1" + ;; +esac