refactored GaqGen

2024-10-21 10:46:12 +03:00
parent 58ff7d9518
commit 4e1237d410
2 changed files with 68 additions and 75 deletions
--- a/FaqGen/benchmark/performance/helm_charts/customize.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/customize.yaml
@@ -1,34 +1,23 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
 podSpecs:
  - name: faq-mega-server-deploy
-    spec:
+    replicas: 2
-      image_name: opea/chatqna
+    resources:
-      image_tag: latest
+      limits:
-      replicas: 2
+        cpu: "8"
-      resources:
+        memory: "8000Mi"
-        limits:
+      requests:
-          cpu: "8"
+        cpu: "8"
-          memory: "8000Mi"
+        memory: "8000Mi"
        requests:
          cpu: "8"
          memory: "8000Mi"
  - name: faq-tgi-deploy
-    spec:
+    replicas: 7
-      image_name: ghcr.io/huggingface/tgi-gaudi
+    resources:
-      image_tag: 2.0.5
+      limits:
-      replicas: 7
+        habana.ai/gaudi: 1
      resources:
        limits:
          habana.ai/gaudi: 1
  - name: faq-micro-deploy
-    spec:
+    replicas: 1
      image_name: opea/llm-faqgen-tgi
      image_tag: latest
      replicas: 1
--- a/FaqGen/benchmark/performance/helm_charts/values.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/values.yaml
@@ -7,63 +7,67 @@ config:
  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
  CONFIG_MAP_NAME: faq-config
  NODE_SELECTOR: faq-opea
  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-deployments:
+microservices:
  - name: faq-mega-server-deploy
-    spec:
+    image: opea/chatqna:latest
-      ports:
+    replicas: 1
-        - containerPort: 7777
+    ports:
      - containerPort: 7777
  - name: faq-micro-deploy
-    spec:
+    image: opea/llm-faqgen-tgi:latest
-      ports:
+    replicas: 1
-        - containerPort: 9000
+    ports:
      - containerPort: 9000
  - name: faq-tgi-deploy
-    spec:
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
-      ports:
+    replicas: 1
-        - containerPort: 80
+    ports:
-      resources:
+      - containerPort: 80
-        limits:
+    resources:
-          habana.ai/gaudi: 1
+      limits:
-      args:
+        habana.ai/gaudi: 1
-        - name: "--model-id"
+    args:
-          value: $(LLM_MODEL_ID)
+      - name: "--model-id"
-        - name: "--max-input-length"
+        value: $(LLM_MODEL_ID)
-          value: "2048"
+      - name: "--max-input-length"
-        - name: "--max-total-tokens"
+        value: "2048"
-          value: "4096"
+      - name: "--max-total-tokens"
-      env:
+        value: "4096"
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+    env:
-          value: none
+      - name: OMPI_MCA_btl_vader_single_copy_mechanism
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+        value: none
-          value: "true"
+      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-        - name: runtime
+        value: "true"
-          value: habana
+      - name: runtime
-        - name: HABANA_VISIBLE_DEVICES
+        value: habana
-          value: all
+      - name: HABANA_VISIBLE_DEVICES
-        - name: ENABLE_HPU_GRAPH
+        value: all
-          value: 'true'
+      - name: ENABLE_HPU_GRAPH
-        - name: LIMIT_HPU_GRAPH
+        value: 'true'
-          value: 'true'
+      - name: LIMIT_HPU_GRAPH
-        - name: USE_FLASH_ATTENTION
+        value: 'true'
-          value: 'true'
+      - name: USE_FLASH_ATTENTION
-        - name: FLASH_ATTENTION_RECOMPUTE
+        value: 'true'
-          value: 'true'
+      - name: FLASH_ATTENTION_RECOMPUTE
-      volumeMounts:
+        value: 'true'
-        - mountPath: /data
+    volumeMounts:
-          name: model-volume
+      - mountPath: /data
-        - mountPath: /dev/shm
+        name: model-volume
-          name: shm
+      - mountPath: /dev/shm
-      volumes:
+        name: shm
-        - hostPath:
+    volumes:
-            path: /mnt/models
+      - hostPath:
-            type: Directory
+          path: /mnt/models
-          name: model-volume
+          type: Directory
-        - emptyDir:
+        name: model-volume
-            medium: Memory
+      - emptyDir:
-            sizeLimit: 1Gi
+          medium: Memory
-          name: shm
+          sizeLimit: 1Gi
        name: shm
 services:
  - name: faq-micro-svc