Update mainifest for FaqGen (#582)

* update tgi version Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * add k8s for faq Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * add benchmark for faq Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * refine k8s for faq Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * add tuning for faq Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * add prompts with different length for faq Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * add tgi docker for llama3.1 Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * remove useless code Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * remove nodeselector Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * remove hg token Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * refine code structure Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix readme Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> --------- Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-08-13 16:29:15 +08:00
parent 8c384e0314
commit 80e3e2a2d3
6 changed files with 308 additions and 330 deletions
--- a/FaqGen/docker/gaudi/README.md
+++ b/FaqGen/docker/gaudi/README.md
@@ -16,7 +16,7 @@ cd GenAIComps
 As TGI Gaudi has been officially published as a Docker image, we simply need to pull it:

 ```bash
-docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
+docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
 ```

 ### 2. Build LLM Image
@@ -56,7 +56,7 @@ docker build -t opea/faqgen-react-ui:latest --build-arg https_proxy=$https_proxy

 Then run the command `docker images`, you will have the following Docker Images:

-1. `ghcr.io/huggingface/tgi-gaudi:1.2.1`
+1. `ghcr.io/huggingface/tgi-gaudi:2.0.1`
 2. `opea/llm-faqgen-tgi:latest`
 3. `opea/faqgen:latest`
 4. `opea/faqgen-ui:latest`
--- a/FaqGen/docker/gaudi/compose.yaml
+++ b/FaqGen/docker/gaudi/compose.yaml
@@ -17,12 +17,14 @@ services:
      https_proxy: ${https_proxy}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      PREFILL_BATCH_BUCKET_SIZE: 1
+      BATCH_BUCKET_SIZE: 8
    runtime: habana
    cap_add:
      - SYS_NICE
    ipc: host
-    command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 4096
  llm_faqgen:
    image: opea/llm-faqgen-tgi:latest
    container_name: llm-faqgen-server
--- a/FaqGen/kubernetes/manifests/README.md
+++ b/FaqGen/kubernetes/manifests/README.md
@@ -23,13 +23,24 @@ sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" faqg
 kubectl apply -f faqgen.yaml
 ```

+## Deploy UI
+
+```
+cd GenAIExamples/FaqGen/kubernetes/manifests/
+kubectl get svc # get ip address
+ip_address="" # according to your svc address
+sed -i "s/insert_your_ip_here/${ip_address}/g" ui.yaml
+kubectl apply -f ui.yaml
+```
+
 ## Verify Services

 Make sure all the pods are running, and restart the faqgen-xxxx pod if necessary.

 ```
 kubectl get pods
-curl http://${host_ip}:8888/v1/faqgen -H "Content-Type: application/json" -d '{
+port=7779 # 7779 for gaudi, 7778 for xeon
+curl http://${host_ip}:7779/v1/faqgen -H "Content-Type: application/json" -d '{
     "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
     }'
 ```
--- a/FaqGen/kubernetes/manifests/gaudi/faqgen.yaml
+++ b/FaqGen/kubernetes/manifests/gaudi/faqgen.yaml
@@ -1,216 +1,186 @@
 ---
-# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-apiVersion: v1
-kind: Service
-metadata:
-  name: faqgen-tgi
-  labels:
-    helm.sh/chart: tgi-0.1.0
-    app.kubernetes.io/name: tgi
-    app.kubernetes.io/instance: faqgen
-    app.kubernetes.io/version: "1.4"
-    app.kubernetes.io/managed-by: Helm
-spec:
-  type: ClusterIP
-  ports:
-    - port: 80
-      targetPort: 80
-      protocol: TCP
-      name: tgi
-  selector:
-    app.kubernetes.io/name: tgi
-    app.kubernetes.io/instance: faqgen
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: faqgen-llm-uservice
-  labels:
-    helm.sh/chart: llm-uservice-0.1.0
-    app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: faqgen
-    app.kubernetes.io/version: "1.0.0"
-    app.kubernetes.io/managed-by: Helm
-spec:
-  type: ClusterIP
-  ports:
-    - port: 9000
-      targetPort: 9000
-      protocol: TCP
-      name: llm-uservice
-  selector:
-    app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: faqgen
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: faqgen
-  labels:
-    helm.sh/chart: faqgen-0.1.0
-    app.kubernetes.io/name: faqgen
-    app.kubernetes.io/instance: faqgen
-    app.kubernetes.io/version: "1.0.0"
-    app.kubernetes.io/managed-by: Helm
-spec:
-  type: ClusterIP
-  ports:
-    - port: 8888
-      targetPort: 8888
-      protocol: TCP
-      name: faqgen
-  selector:
-    app.kubernetes.io/name: faqgen
-    app.kubernetes.io/instance: faqgen
---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: faqgen-tgi
-  labels:
-    helm.sh/chart: tgi-0.1.0
-    app.kubernetes.io/name: tgi
-    app.kubernetes.io/instance: faqgen
-    app.kubernetes.io/version: "1.4"
-    app.kubernetes.io/managed-by: Helm
+  name: faq-tgi-deploy
+  namespace: default
 spec:
  replicas: 1
  selector:
    matchLabels:
-      app.kubernetes.io/name: tgi
-      app.kubernetes.io/instance: faqgen
+      app: faq-tgi-deploy
  template:
    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
-        app.kubernetes.io/name: tgi
-        app.kubernetes.io/instance: faqgen
+        app: faq-tgi-deploy
    spec:
-      securityContext: {}
+      hostIPC: true
      containers:
-        - name: tgi
-          env:
-            - name: MODEL_ID
-              value: Intel/neural-chat-7b-v3-3
-            - name: PORT
-              value: "80"
-            - name: http_proxy
-              value:
-            - name: https_proxy
-              value:
-            - name: no_proxy
-              value:
-          securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:1.4"
-          imagePullPolicy: IfNotPresent
-          volumeMounts:
-            - mountPath: /data
-              name: model-volume
-          ports:
-            - name: http
-              containerPort: 80
-              protocol: TCP
-          resources: {}
+      - name: faq-tgi-deploy-demo
+        env:
+          - name: HUGGING_FACE_HUB_TOKEN
+            value: "insert-your-huggingface-token-here"
+          - name: OMPI_MCA_btl_vader_single_copy_mechanism
+            value: none
+          - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+            value: 'true'
+          - name: runtime
+            value: habana
+          - name: HABANA_VISIBLE_DEVICES
+            value: all
+          - name: PREFILL_BATCH_BUCKET_SIZE
+            value: "1"
+          - name: BATCH_BUCKET_SIZE
+            value: "8"
+          - name: PORT
+            value: "80"
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
+        imagePullPolicy: IfNotPresent
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - 'meta-llama/Meta-Llama-3-8B-Instruct'
+        - --max-input-length
+        - '3096'
+        - --max-total-tokens
+        - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+      serviceAccountName: default
      volumes:
-        - name: model-volume
-          hostPath:
-            path: /mnt
-            type: Directory
+      - name: model-volume
+        hostPath:
+          path: /home/sdp/cesg
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: faq-tgi-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: faq-tgi-deploy
+  ports:
+  - name: service
+    port: 8010
+    targetPort: 80
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: faqgen-llm-uservice
-  labels:
-    helm.sh/chart: llm-uservice-0.1.0
-    app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: faqgen
-    app.kubernetes.io/version: "1.0.0"
-    app.kubernetes.io/managed-by: Helm
+  name: faq-micro-deploy
+  namespace: default
 spec:
  replicas: 1
  selector:
    matchLabels:
-      app.kubernetes.io/name: llm-uservice
-      app.kubernetes.io/instance: faqgen
+      app: faq-micro-deploy
  template:
    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
-        app.kubernetes.io/name: llm-uservice
-        app.kubernetes.io/instance: faqgen
+        app: faq-micro-deploy
    spec:
-      securityContext: {}
+      hostIPC: true
      containers:
-        - name: faqgen
+        - name: faq-micro-deploy
          env:
            - name: TGI_LLM_ENDPOINT
-              value: "http://faqgen-tgi:80"
+              value: "http://faq-tgi-svc.default.svc.cluster.local:8010"
            - name: HUGGINGFACEHUB_API_TOKEN
              value: "insert-your-huggingface-token-here"
-            - name: http_proxy
-              value:
-            - name: https_proxy
-              value:
-            - name: no_proxy
-              value:
-          securityContext: {}
-          image: "opea/llm-faqgen-tgi:latest"
+          image: opea/llm-faqgen-tgi:latest
          imagePullPolicy: IfNotPresent
+          args: null
          ports:
-            - name: llm-uservice
-              containerPort: 9000
-              protocol: TCP
-          startupProbe:
-            exec:
-              command:
-                - curl
-                - http://faqgen-tgi:80
-            initialDelaySeconds: 5
-            periodSeconds: 5
-            failureThreshold: 120
-          resources: {}
+          - containerPort: 9000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: faq-micro-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: faq-micro-deploy
+  ports:
+  - name: service
+    port: 9003
+    targetPort: 9000
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: faqgen
-  labels:
-    helm.sh/chart: faqgen-0.1.0
-    app.kubernetes.io/name: faqgen
-    app.kubernetes.io/instance: faqgen
-    app.kubernetes.io/version: "1.0.0"
-    app.kubernetes.io/managed-by: Helm
+  name: faq-mega-server-deploy
+  namespace: default
 spec:
  replicas: 1
  selector:
    matchLabels:
-      app.kubernetes.io/name: faqgen
-      app.kubernetes.io/instance: faqgen
+      app: faq-mega-server-deploy
  template:
    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
-        app.kubernetes.io/name: faqgen
-        app.kubernetes.io/instance: faqgen
+        app: faq-mega-server-deploy
    spec:
-      securityContext: null
+      hostIPC: true
      containers:
-        - name: faqgen
+        - name: faq-mega-server-deploy
          env:
            - name: LLM_SERVICE_HOST_IP
-              value: faqgen-llm-uservice
-            - name: http_proxy
-              value:
-            - name: https_proxy
-              value:
-            - name: no_proxy
-              value:
-          securityContext: null
-          image: "opea/faqgen:latest"
+              value: faq-micro-svc
+            - name: LLM_SERVICE_PORT
+              value: "9003"
+            - name: MEGA_SERVICE_HOST_IP
+              value: faq-mega-server-svc
+            - name: MEGA_SERVICE_PORT
+              value: "7777"
+          image: opea/faqgen:latest
          imagePullPolicy: IfNotPresent
+          args: null
          ports:
-            - name: faqgen
-              containerPort: 8888
-              protocol: TCP
-          resources: null
+          - containerPort: 7777
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: faq-mega-server-svc
+spec:
+  type: NodePort
+  selector:
+    app: faq-mega-server-deploy
+  ports:
+  - name: service
+    port: 7779
+    targetPort: 7777
+    nodePort: 30779
--- a/FaqGen/kubernetes/manifests/ui.yaml
+++ b/FaqGen/kubernetes/manifests/ui.yaml
@@ -0,0 +1,46 @@
+---
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: faq-mega-ui-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: faq-mega-ui-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: faq-mega-ui-deploy
+    spec:
+      hostIPC: true
+      containers:
+        - name: faq-mega-ui-deploy
+          env:
+            - name: DOC_BASE_URL
+              value: http://{insert_your_ip_here}:7779/v1/faqgen
+          image: opea/faqgen-ui:latest
+          imagePullPolicy: IfNotPresent
+          args: null
+          ports:
+          - containerPort: 5173
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: faq-mega-ui-svc
+spec:
+  type: NodePort
+  selector:
+    app: faq-mega-ui-deploy
+  ports:
+  - name: service
+    port: 5175
+    targetPort: 5173
+    nodePort: 30175
--- a/FaqGen/kubernetes/manifests/xeon/faqgen.yaml
+++ b/FaqGen/kubernetes/manifests/xeon/faqgen.yaml
@@ -1,216 +1,165 @@
 ---
-# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-apiVersion: v1
-kind: Service
-metadata:
-  name: faqgen-tgi
-  labels:
-    helm.sh/chart: tgi-0.1.0
-    app.kubernetes.io/name: tgi
-    app.kubernetes.io/instance: faqgen
-    app.kubernetes.io/version: "1.4"
-    app.kubernetes.io/managed-by: Helm
-spec:
-  type: ClusterIP
-  ports:
-    - port: 80
-      targetPort: 80
-      protocol: TCP
-      name: tgi
-  selector:
-    app.kubernetes.io/name: tgi
-    app.kubernetes.io/instance: faqgen
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: faqgen-llm-uservice
-  labels:
-    helm.sh/chart: llm-uservice-0.1.0
-    app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: faqgen
-    app.kubernetes.io/version: "1.0.0"
-    app.kubernetes.io/managed-by: Helm
-spec:
-  type: ClusterIP
-  ports:
-    - port: 9000
-      targetPort: 9000
-      protocol: TCP
-      name: llm-uservice
-  selector:
-    app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: faqgen
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: faqgen
-  labels:
-    helm.sh/chart: faqgen-0.1.0
-    app.kubernetes.io/name: faqgen
-    app.kubernetes.io/instance: faqgen
-    app.kubernetes.io/version: "1.0.0"
-    app.kubernetes.io/managed-by: Helm
-spec:
-  type: ClusterIP
-  ports:
-    - port: 8888
-      targetPort: 8888
-      protocol: TCP
-      name: faqgen
-  selector:
-    app.kubernetes.io/name: faqgen
-    app.kubernetes.io/instance: faqgen
---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: faqgen-tgi
-  labels:
-    helm.sh/chart: tgi-0.1.0
-    app.kubernetes.io/name: tgi
-    app.kubernetes.io/instance: faqgen
-    app.kubernetes.io/version: "1.4"
-    app.kubernetes.io/managed-by: Helm
+  name: faq-tgi-cpu-deploy
+  namespace: default
 spec:
  replicas: 1
  selector:
    matchLabels:
-      app.kubernetes.io/name: tgi
-      app.kubernetes.io/instance: faqgen
+      app: faq-tgi-cpu-deploy
  template:
    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
-        app.kubernetes.io/name: tgi
-        app.kubernetes.io/instance: faqgen
+        app: faq-tgi-cpu-deploy
    spec:
+      hostIPC: true
      securityContext: {}
      containers:
-        - name: tgi
-          env:
-            - name: MODEL_ID
-              value: Intel/neural-chat-7b-v3-3
-            - name: PORT
-              value: "80"
-            - name: http_proxy
-              value:
-            - name: https_proxy
-              value:
-            - name: no_proxy
-              value:
-          securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:1.4"
-          imagePullPolicy: IfNotPresent
-          volumeMounts:
-            - mountPath: /data
-              name: model-volume
-          ports:
-            - name: http
-              containerPort: 80
-              protocol: TCP
-          resources: {}
+      - name: faq-tgi-cpu-deploy-demo
+        env:
+          - name: HUGGING_FACE_HUB_TOKEN
+            value: "insert-your-huggingface-token-here"
+          - name: PORT
+            value: "80"
+        image: ghcr.io/huggingface/text-generation-inference:1.4
+        imagePullPolicy: IfNotPresent
+        securityContext: {}
+        args:
+        - --model-id
+        - 'meta-llama/Meta-Llama-3-8B-Instruct'
+        - --max-input-length
+        - '3096'
+        - --max-total-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+      serviceAccountName: default
      volumes:
-        - name: model-volume
-          hostPath:
-            path: /mnt
-            type: Directory
+      - name: model-volume
+        hostPath:
+          path: /home/sdp/cesg
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: faq-tgi-cpu-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: faq-tgi-cpu-deploy
+  ports:
+  - name: service
+    port: 8011
+    targetPort: 80
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: faqgen-llm-uservice
-  labels:
-    helm.sh/chart: llm-uservice-0.1.0
-    app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: faqgen
-    app.kubernetes.io/version: "1.0.0"
-    app.kubernetes.io/managed-by: Helm
+  name: faq-micro-cpu-deploy
+  namespace: default
 spec:
  replicas: 1
  selector:
    matchLabels:
-      app.kubernetes.io/name: llm-uservice
-      app.kubernetes.io/instance: faqgen
+      app: faq-micro-cpu-deploy
  template:
    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
-        app.kubernetes.io/name: llm-uservice
-        app.kubernetes.io/instance: faqgen
+        app: faq-micro-cpu-deploy
    spec:
-      securityContext: {}
+      hostIPC: true
      containers:
-        - name: faqgen
+        - name: faq-micro-cpu-deploy
          env:
            - name: TGI_LLM_ENDPOINT
-              value: "http://faqgen-tgi:80"
+              value: "http://faq-tgi-cpu-svc.default.svc.cluster.local:8011"
            - name: HUGGINGFACEHUB_API_TOKEN
              value: "insert-your-huggingface-token-here"
-            - name: http_proxy
-              value:
-            - name: https_proxy
-              value:
-            - name: no_proxy
-              value:
-          securityContext: {}
-          image: "opea/llm-faqgen-tgi:latest"
+          image: opea/llm-faqgen-tgi:latest
          imagePullPolicy: IfNotPresent
+          args: null
          ports:
-            - name: llm-uservice
-              containerPort: 9000
-              protocol: TCP
-          startupProbe:
-            exec:
-              command:
-                - curl
-                - http://faqgen-tgi:80
-            initialDelaySeconds: 5
-            periodSeconds: 5
-            failureThreshold: 120
-          resources: {}
+          - containerPort: 9000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: faq-micro-cpu-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: faq-micro-cpu-deploy
+  ports:
+  - name: service
+    port: 9004
+    targetPort: 9000
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: faqgen
-  labels:
-    helm.sh/chart: faqgen-0.1.0
-    app.kubernetes.io/name: faqgen
-    app.kubernetes.io/instance: faqgen
-    app.kubernetes.io/version: "1.0.0"
-    app.kubernetes.io/managed-by: Helm
+  name: faq-mega-server-cpu-deploy
+  namespace: default
 spec:
  replicas: 1
  selector:
    matchLabels:
-      app.kubernetes.io/name: faqgen
-      app.kubernetes.io/instance: faqgen
+      app: faq-mega-server-cpu-deploy
  template:
    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
-        app.kubernetes.io/name: faqgen
-        app.kubernetes.io/instance: faqgen
+        app: faq-mega-server-cpu-deploy
    spec:
-      securityContext: null
+      hostIPC: true
      containers:
-        - name: faqgen
+        - name: faq-mega-server-cpu-deploy
          env:
            - name: LLM_SERVICE_HOST_IP
-              value: faqgen-llm-uservice
-            - name: http_proxy
-              value:
-            - name: https_proxy
-              value:
-            - name: no_proxy
-              value:
-          securityContext: null
-          image: "opea/faqgen:latest"
+              value: faq-micro-cpu-svc
+            - name: LLM_SERVICE_PORT
+              value: "9004"
+            - name: MEGA_SERVICE_HOST_IP
+              value: faq-mega-server-cpu-svc
+            - name: MEGA_SERVICE_PORT
+              value: "7777"
+          image: opea/faqgen:latest
          imagePullPolicy: IfNotPresent
+          args: null
          ports:
-            - name: faqgen
-              containerPort: 8888
-              protocol: TCP
-          resources: null
+          - containerPort: 7777
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: faq-mega-server-cpu-svc
+spec:
+  type: NodePort
+  selector:
+    app: faq-mega-server-cpu-deploy
+  ports:
+  - name: service
+    port: 7778
+    targetPort: 7777
+    nodePort: 30778