refactored GaqGen

2024-10-21 10:46:12 +03:00
parent 58ff7d9518
commit 4e1237d410
2 changed files with 68 additions and 75 deletions
--- a/FaqGen/benchmark/performance/helm_charts/customize.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/customize.yaml
@@ -1,34 +1,23 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-
 podSpecs:
  - name: faq-mega-server-deploy
-    spec:
-      image_name: opea/chatqna
-      image_tag: latest
-      replicas: 2
-      resources:
-        limits:
-          cpu: "8"
-          memory: "8000Mi"
-        requests:
-          cpu: "8"
-          memory: "8000Mi"
+    replicas: 2
+    resources:
+      limits:
+        cpu: "8"
+        memory: "8000Mi"
+      requests:
+        cpu: "8"
+        memory: "8000Mi"


  - name: faq-tgi-deploy
-    spec:
-      image_name: ghcr.io/huggingface/tgi-gaudi
-      image_tag: 2.0.5
-      replicas: 7
-      resources:
-        limits:
-          habana.ai/gaudi: 1
+    replicas: 7
+    resources:
+      limits:
+        habana.ai/gaudi: 1

  - name: faq-micro-deploy
-    spec:
-      image_name: opea/llm-faqgen-tgi
-      image_tag: latest
-      replicas: 1
+    replicas: 1
--- a/FaqGen/benchmark/performance/helm_charts/values.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/values.yaml
@@ -7,63 +7,67 @@ config:
  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
  CONFIG_MAP_NAME: faq-config
  NODE_SELECTOR: faq-opea
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}

-deployments:
+microservices:
  - name: faq-mega-server-deploy
-    spec:
-      ports:
-        - containerPort: 7777
+    image: opea/chatqna:latest
+    replicas: 1
+    ports:
+      - containerPort: 7777

  - name: faq-micro-deploy
-    spec:
-      ports:
-        - containerPort: 9000
+    image: opea/llm-faqgen-tgi:latest
+    replicas: 1
+    ports:
+      - containerPort: 9000

  - name: faq-tgi-deploy
-    spec:
-      ports:
-        - containerPort: 80
-      resources:
-        limits:
-          habana.ai/gaudi: 1
-      args:
-        - name: "--model-id"
-          value: $(LLM_MODEL_ID)
-        - name: "--max-input-length"
-          value: "2048"
-        - name: "--max-total-tokens"
-          value: "4096"
-      env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: "true"
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: ENABLE_HPU_GRAPH
-          value: 'true'
-        - name: LIMIT_HPU_GRAPH
-          value: 'true'
-        - name: USE_FLASH_ATTENTION
-          value: 'true'
-        - name: FLASH_ATTENTION_RECOMPUTE
-          value: 'true'
-      volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-      volumes:
-        - hostPath:
-            path: /mnt/models
-            type: Directory
-          name: model-volume
-        - emptyDir:
-            medium: Memory
-            sizeLimit: 1Gi
-          name: shm
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    replicas: 1
+    ports:
+      - containerPort: 80
+    resources:
+      limits:
+        habana.ai/gaudi: 1
+    args:
+      - name: "--model-id"
+        value: $(LLM_MODEL_ID)
+      - name: "--max-input-length"
+        value: "2048"
+      - name: "--max-total-tokens"
+        value: "4096"
+    env:
+      - name: OMPI_MCA_btl_vader_single_copy_mechanism
+        value: none
+      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+        value: "true"
+      - name: runtime
+        value: habana
+      - name: HABANA_VISIBLE_DEVICES
+        value: all
+      - name: ENABLE_HPU_GRAPH
+        value: 'true'
+      - name: LIMIT_HPU_GRAPH
+        value: 'true'
+      - name: USE_FLASH_ATTENTION
+        value: 'true'
+      - name: FLASH_ATTENTION_RECOMPUTE
+        value: 'true'
+    volumeMounts:
+      - mountPath: /data
+        name: model-volume
+      - mountPath: /dev/shm
+        name: shm
+    volumes:
+      - hostPath:
+          path: /mnt/models
+          type: Directory
+        name: model-volume
+      - emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+        name: shm

 services:
  - name: faq-micro-svc