[Translation] Support manifests and nginx (#812)

Signed-off-by: letonghan <letong.han@intel.com> Signed-off-by: root <root@a4bf019305c5.jf.intel.com> Co-authored-by: root <root@a4bf019305c5.jf.intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-09-19 07:08:13 +08:00
parent b205dc7571
commit 1e130314d9
15 changed files with 1422 additions and 37 deletions
--- a/Translation/kubernetes/intel/README.md
+++ b/Translation/kubernetes/intel/README.md
@@ -0,0 +1,41 @@
+# Deploy Translation in Kubernetes Cluster
+
+> [NOTE]
+> The following values must be set before you can deploy:
+> HUGGINGFACEHUB_API_TOKEN
+>
+> You can also customize the "MODEL_ID" if needed.
+>
+> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the Translation workload is running. Otherwise, you need to modify the `translation.yaml` file to change the `model-volume` to a directory that exists on the node.
+
+## Deploy On Xeon
+
+```
+cd GenAIExamples/Translation/kubernetes/intel/cpu/xeon/manifests
+export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
+sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" translation.yaml
+kubectl apply -f translation.yaml
+```
+
+## Deploy On Gaudi
+
+```
+cd GenAIExamples/Translation/kubernetes/intel/hpu/gaudi/manifests
+export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
+sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" translation.yaml
+kubectl apply -f translation.yaml
+```
+
+## Verify Services
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/translation 8888:8888` to expose the Translation service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:8888/v1/translation \
+    -H 'Content-Type: application/json' \
+    -d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
+```
--- a/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml
+++ b/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml
@@ -0,0 +1,495 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: translation-tgi-config
+  labels:
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "2.1.0"
+data:
+  LLM_MODEL_ID: "haoranxu/ALMA-13B"
+  PORT: "2080"
+  HF_TOKEN: "insert-your-huggingface-token-here"
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  HABANA_LOGS: "/tmp/habana_logs"
+  NUMBA_CACHE_DIR: "/tmp"
+  HF_HOME: "/tmp/.cache/huggingface"
+  CUDA_GRAPHS: "0"
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: translation-llm-uservice-config
+  labels:
+    app.kubernetes.io/name: llm-uservice
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+data:
+  TGI_LLM_ENDPOINT: "http://translation-tgi"
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  LOGFLAG: ""
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: translation-ui-config
+  labels:
+    app.kubernetes.io/name: translation-ui
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+data:
+  BASE_URL: "/v1/translation"
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+data:
+  default.conf: |+
+    # Copyright (C) 2024 Intel Corporation
+    # SPDX-License-Identifier: Apache-2.0
+
+
+    server {
+        listen       80;
+        listen  [::]:80;
+
+        location /home {
+            alias  /usr/share/nginx/html/index.html;
+        }
+
+        location / {
+            proxy_pass http://translation-ui:5173;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        location /v1/translation {
+            proxy_pass http://translation:8888;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+    }
+
+kind: ConfigMap
+metadata:
+  name: translation-nginx-config
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: translation-ui
+  labels:
+    app.kubernetes.io/name: translation-ui
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+spec:
+  type: ClusterIP
+  ports:
+    - port: 5173
+      targetPort: ui
+      protocol: TCP
+      name: ui
+  selector:
+    app.kubernetes.io/name: translation-ui
+    app.kubernetes.io/instance: translation
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: translation-llm-uservice
+  labels:
+    app.kubernetes.io/name: llm-uservice
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+spec:
+  type: ClusterIP
+  ports:
+    - port: 9000
+      targetPort: 9000
+      protocol: TCP
+      name: llm-uservice
+  selector:
+    app.kubernetes.io/name: llm-uservice
+    app.kubernetes.io/instance: translation
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: translation-tgi
+  labels:
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "2.1.0"
+spec:
+  type: ClusterIP
+  ports:
+    - port: 80
+      targetPort: 2080
+      protocol: TCP
+      name: tgi
+  selector:
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: translation
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: translation-nginx
+spec:
+  ports:
+  - port: 80
+    protocol: TCP
+    targetPort: 80
+  selector:
+    app.kubernetes.io/name: translation
+    app.kubernetes.io/instance: translation
+    app: translation-nginx
+  type: NodePort
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: translation
+  labels:
+    app.kubernetes.io/name: translation
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+spec:
+  type: ClusterIP
+  ports:
+    - port: 8888
+      targetPort: 8888
+      protocol: TCP
+      name: translation
+  selector:
+    app.kubernetes.io/name: translation
+    app.kubernetes.io/instance: translation
+    app: translation
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: translation-ui
+  labels:
+    app.kubernetes.io/name: translation-ui
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: translation-ui
+      app.kubernetes.io/instance: translation
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: translation-ui
+        app.kubernetes.io/instance: translation
+        app.kubernetes.io/version: "v1.0"
+    spec:
+      securityContext:
+        {}
+      containers:
+        - name: translation-ui
+          envFrom:
+            - configMapRef:
+                name: translation-ui-config
+          securityContext:
+            {}
+          image: "opea/translation-ui:latest"
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: ui
+              containerPort: 80
+              protocol: TCP
+          resources:
+            {}
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+      volumes:
+        - name: tmp
+          emptyDir: {}
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: translation-llm-uservice
+  labels:
+    app.kubernetes.io/name: llm-uservice
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: llm-uservice
+      app.kubernetes.io/instance: translation
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: llm-uservice
+        app.kubernetes.io/instance: translation
+    spec:
+      securityContext:
+        {}
+      containers:
+        - name: translation
+          envFrom:
+            - configMapRef:
+                name: translation-llm-uservice-config
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+              - ALL
+            readOnlyRootFilesystem: false
+            runAsNonRoot: true
+            runAsUser: 1000
+            seccompProfile:
+              type: RuntimeDefault
+          image: "opea/llm-tgi:latest"
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: llm-uservice
+              containerPort: 9000
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          livenessProbe:
+            failureThreshold: 24
+            httpGet:
+              path: v1/health_check
+              port: llm-uservice
+            initialDelaySeconds: 5
+            periodSeconds: 5
+          readinessProbe:
+            httpGet:
+              path: v1/health_check
+              port: llm-uservice
+            initialDelaySeconds: 5
+            periodSeconds: 5
+          startupProbe:
+            failureThreshold: 120
+            httpGet:
+              path: v1/health_check
+              port: llm-uservice
+            initialDelaySeconds: 5
+            periodSeconds: 5
+          resources:
+            {}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: translation-tgi
+  labels:
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "2.1.0"
+spec:
+  # use explicit replica counts only of HorizontalPodAutoscaler is disabled
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: tgi
+      app.kubernetes.io/instance: translation
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: tgi
+        app.kubernetes.io/instance: translation
+    spec:
+      securityContext:
+        {}
+      containers:
+        - name: tgi
+          envFrom:
+            - configMapRef:
+                name: translation-tgi-config
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+              - ALL
+            readOnlyRootFilesystem: true
+            runAsNonRoot: true
+            runAsUser: 1000
+            seccompProfile:
+              type: RuntimeDefault
+          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          imagePullPolicy: IfNotPresent
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+            - mountPath: /tmp
+              name: tmp
+          ports:
+            - name: http
+              containerPort: 2080
+              protocol: TCP
+          livenessProbe:
+            failureThreshold: 24
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            tcpSocket:
+              port: http
+          readinessProbe:
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            tcpSocket:
+              port: http
+          startupProbe:
+            failureThreshold: 120
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            tcpSocket:
+              port: http
+          resources:
+            {}
+      volumes:
+        - name: model-volume
+          emptyDir: {}
+        - name: tmp
+          emptyDir: {}
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: translation
+  labels:
+    app.kubernetes.io/name: translation
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+    app: translation
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: translation
+      app.kubernetes.io/instance: translation
+      app: translation
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: translation
+        app.kubernetes.io/instance: translation
+        app: translation
+    spec:
+      securityContext:
+        null
+      containers:
+        - name: translation
+          env:
+            - name: LLM_SERVICE_HOST_IP
+              value: translation-llm-uservice
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+              - ALL
+            readOnlyRootFilesystem: true
+            runAsNonRoot: true
+            runAsUser: 1000
+            seccompProfile:
+              type: RuntimeDefault
+          image: "opea/translation:latest"
+          imagePullPolicy: IfNotPresent
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          ports:
+            - name: translation
+              containerPort: 8888
+              protocol: TCP
+          resources:
+            null
+      volumes:
+        - name: tmp
+          emptyDir: {}
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: translation-nginx
+  labels:
+    app.kubernetes.io/name: translation
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+    app: translation-nginx
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: translation
+      app.kubernetes.io/instance: translation
+      app: translation-nginx
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: translation
+        app.kubernetes.io/instance: translation
+        app: translation-nginx
+    spec:
+      containers:
+      - image: nginx:1.27.1
+        imagePullPolicy: IfNotPresent
+        name: nginx
+        volumeMounts:
+        - mountPath: /etc/nginx/conf.d
+          name: nginx-config-volume
+      securityContext: {}
+      volumes:
+      - configMap:
+          defaultMode: 420
+          name: translation-nginx-config
+        name: nginx-config-volume
--- a/Translation/kubernetes/intel/hpu/gaudi/manifest/translation.yaml
+++ b/Translation/kubernetes/intel/hpu/gaudi/manifest/translation.yaml
@@ -0,0 +1,497 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: translation-tgi-config
+  labels:
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "2.1.0"
+data:
+  LLM_MODEL_ID: "haoranxu/ALMA-13B"
+  PORT: "2080"
+  HF_TOKEN: "insert-your-huggingface-token-here"
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  HABANA_LOGS: "/tmp/habana_logs"
+  NUMBA_CACHE_DIR: "/tmp"
+  HF_HOME: "/tmp/.cache/huggingface"
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: translation-llm-uservice-config
+  labels:
+    app.kubernetes.io/name: llm-uservice
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+data:
+  TGI_LLM_ENDPOINT: "http://translation-tgi"
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  LOGFLAG: ""
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: translation-ui-config
+  labels:
+    app.kubernetes.io/name: translation-ui
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+data:
+  BASE_URL: "/v1/translation"
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+data:
+  default.conf: |+
+    # Copyright (C) 2024 Intel Corporation
+    # SPDX-License-Identifier: Apache-2.0
+
+
+    server {
+        listen       80;
+        listen  [::]:80;
+
+        location /home {
+            alias  /usr/share/nginx/html/index.html;
+        }
+
+        location / {
+            proxy_pass http://translation-ui:5173;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        location /v1/translation {
+            proxy_pass http://translation;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+    }
+
+kind: ConfigMap
+metadata:
+  name: translation-nginx-config
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: translation-ui
+  labels:
+    app.kubernetes.io/name: translation-ui
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+spec:
+  type: ClusterIP
+  ports:
+    - port: 5173
+      targetPort: ui
+      protocol: TCP
+      name: ui
+  selector:
+    app.kubernetes.io/name: translation-ui
+    app.kubernetes.io/instance: translation
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: translation-llm-uservice
+  labels:
+    app.kubernetes.io/name: llm-uservice
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+spec:
+  type: ClusterIP
+  ports:
+    - port: 9000
+      targetPort: 9000
+      protocol: TCP
+      name: llm-uservice
+  selector:
+    app.kubernetes.io/name: llm-uservice
+    app.kubernetes.io/instance: translation
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: translation-tgi
+  labels:
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "2.1.0"
+spec:
+  type: ClusterIP
+  ports:
+    - port: 80
+      targetPort: 2080
+      protocol: TCP
+      name: tgi
+  selector:
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: translation
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: translation-nginx
+spec:
+  ports:
+  - port: 80
+    protocol: TCP
+    targetPort: 80
+  selector:
+    app.kubernetes.io/name: translation
+    app.kubernetes.io/instance: translation
+    app: translation-nginx
+  type: NodePort
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: translation
+  labels:
+    app.kubernetes.io/name: translation
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+spec:
+  type: ClusterIP
+  ports:
+    - port: 8888
+      targetPort: 8888
+      protocol: TCP
+      name: translation
+  selector:
+    app.kubernetes.io/name: translation
+    app.kubernetes.io/instance: translation
+    app: translation
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: translation-ui
+  labels:
+    app.kubernetes.io/name: translation-ui
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: translation-ui
+      app.kubernetes.io/instance: translation
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: translation-ui
+        app.kubernetes.io/instance: translation
+        app.kubernetes.io/version: "v1.0"
+    spec:
+      securityContext:
+        {}
+      containers:
+        - name: translation-ui
+          envFrom:
+            - configMapRef:
+                name: translation-ui-config
+          securityContext:
+            {}
+          image: "opea/translation-ui:latest"
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: ui
+              containerPort: 80
+              protocol: TCP
+          resources:
+            {}
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+      volumes:
+        - name: tmp
+          emptyDir: {}
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: translation-llm-uservice
+  labels:
+    app.kubernetes.io/name: llm-uservice
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: llm-uservice
+      app.kubernetes.io/instance: translation
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: llm-uservice
+        app.kubernetes.io/instance: translation
+    spec:
+      securityContext:
+        {}
+      containers:
+        - name: translation
+          envFrom:
+            - configMapRef:
+                name: translation-llm-uservice-config
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+              - ALL
+            readOnlyRootFilesystem: false
+            runAsNonRoot: true
+            runAsUser: 1000
+            seccompProfile:
+              type: RuntimeDefault
+          image: "opea/llm-tgi:latest"
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: llm-uservice
+              containerPort: 9000
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          livenessProbe:
+            failureThreshold: 24
+            httpGet:
+              path: v1/health_check
+              port: llm-uservice
+            initialDelaySeconds: 5
+            periodSeconds: 5
+          readinessProbe:
+            httpGet:
+              path: v1/health_check
+              port: llm-uservice
+            initialDelaySeconds: 5
+            periodSeconds: 5
+          startupProbe:
+            failureThreshold: 120
+            httpGet:
+              path: v1/health_check
+              port: llm-uservice
+            initialDelaySeconds: 5
+            periodSeconds: 5
+          resources:
+            {}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: translation-tgi
+  labels:
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "2.1.0"
+spec:
+  # use explicit replica counts only of HorizontalPodAutoscaler is disabled
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: tgi
+      app.kubernetes.io/instance: translation
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: tgi
+        app.kubernetes.io/instance: translation
+    spec:
+      securityContext:
+        {}
+      containers:
+        - name: tgi
+          envFrom:
+            - configMapRef:
+                name: translation-tgi-config
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+              - ALL
+            readOnlyRootFilesystem: true
+            runAsNonRoot: true
+            runAsUser: 1000
+            seccompProfile:
+              type: RuntimeDefault
+          image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
+          imagePullPolicy: IfNotPresent
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+            - mountPath: /tmp
+              name: tmp
+          ports:
+            - name: http
+              containerPort: 2080
+              protocol: TCP
+          livenessProbe:
+            failureThreshold: 24
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            tcpSocket:
+              port: http
+          readinessProbe:
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            tcpSocket:
+              port: http
+          startupProbe:
+            failureThreshold: 120
+            initialDelaySeconds: 20
+            periodSeconds: 5
+            tcpSocket:
+              port: http
+          resources:
+            limits:
+              habana.ai/gaudi: 1
+      volumes:
+        - name: model-volume
+          emptyDir: {}
+        - name: tmp
+          emptyDir: {}
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: translation
+  labels:
+    app.kubernetes.io/name: translation
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+    app: translation
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: translation
+      app.kubernetes.io/instance: translation
+      app: translation
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: translation
+        app.kubernetes.io/instance: translation
+        app: translation
+    spec:
+      securityContext:
+        null
+      containers:
+        - name: translation
+          env:
+            - name: LLM_SERVICE_HOST_IP
+              value: translation-llm-uservice
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+              - ALL
+            readOnlyRootFilesystem: true
+            runAsNonRoot: true
+            runAsUser: 1000
+            seccompProfile:
+              type: RuntimeDefault
+          image: "opea/translation:latest"
+          imagePullPolicy: IfNotPresent
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          ports:
+            - name: translation
+              containerPort: 8888
+              protocol: TCP
+          resources:
+            null
+      volumes:
+        - name: tmp
+          emptyDir: {}
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: translation-nginx
+  labels:
+    app.kubernetes.io/name: translation
+    app.kubernetes.io/instance: translation
+    app.kubernetes.io/version: "v1.0"
+    app: translation-nginx
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: translation
+      app.kubernetes.io/instance: translation
+      app: translation-nginx
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: translation
+        app.kubernetes.io/instance: translation
+        app: translation-nginx
+    spec:
+      containers:
+      - image: nginx:1.27.1
+        imagePullPolicy: IfNotPresent
+        name: nginx
+        volumeMounts:
+        - mountPath: /etc/nginx/conf.d
+          name: nginx-config-volume
+      securityContext: {}
+      volumes:
+      - configMap:
+          defaultMode: 420
+          name: translation-nginx-config
+        name: nginx-config-volume