From 048b4e1df9adb002aa789aff525a00f66b88f8ae Mon Sep 17 00:00:00 2001
From: Zhenzhong Xu <zhenzhongxu@zhenzhongxu-vm-u22.habana-labs.com>
Date: Mon, 21 Oct 2024 11:06:37 +0300
Subject: [PATCH] refactored AudioQNA

---
 AudioQnA/benchmark/helm_charts/customize.yaml |  43 +----
 .../helm_charts/templates/configmap.yaml      |   2 +-
 .../helm_charts/templates/deployment.yaml     |  59 +++---
 AudioQnA/benchmark/helm_charts/values.yaml    | 182 ++++++++++--------
 .../performance/helm_charts/values.yaml       |   2 +-
 .../performance/helm_charts/values.yaml       |   2 +-
 6 files changed, 144 insertions(+), 146 deletions(-)

diff --git a/AudioQnA/benchmark/helm_charts/customize.yaml b/AudioQnA/benchmark/helm_charts/customize.yaml
index 31e1b6ca6..87fe3b280 100644
--- a/AudioQnA/benchmark/helm_charts/customize.yaml
+++ b/AudioQnA/benchmark/helm_charts/customize.yaml
@@ -1,50 +1,23 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-
 podSpecs:
   - name: audioqna-backend-server-deploy
-    spec:
-      image_name: opea/audioqna
-      image_tag: latest
-      replicas: 1
+    replicas: 1
 
   - name: asr-deploy
-    spec:
-      image_name: opea/asr
-      image_tag: latest
-      replicas: 1
+    replicas: 1
 
   - name: whisper-deploy
-    spec:
-      image_name: opea/whisper-gaudi
-      image_tag: latest
-      replicas: 1
-      resources:
-        limits:
-          habana.ai/gaudi: 1
+    replicas: 1
+
 
   - name: tts-deploy
-    spec:
-      image_name: opea/tts
-      image_tag: latest
-      replicas: 1
+    replicas: 1
 
   - name: speecht5-deploy
-    spec:
-      image_name: opea/speecht5-gaudi
-      image_tag: latest
-      replicas: 1
-      resources:
-        limits:
-          habana.ai/gaudi: 1
+    replicas: 1
+
 
   - name: llm-dependency-deploy
-    spec:
-      image_name: ghcr.io/huggingface/tgi-gaudi
-      image_tag: 2.0.5
-      replicas: 1
-      resources:
-        limits:
-          habana.ai/gaudi: 1
+    replicas: 1
\ No newline at end of file
diff --git a/AudioQnA/benchmark/helm_charts/templates/configmap.yaml b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml
index 79246763f..7aa21eb20 100644
--- a/AudioQnA/benchmark/helm_charts/templates/configmap.yaml
+++ b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml
@@ -7,7 +7,7 @@ metadata:
   name: {{ .Values.config.CONFIG_MAP_NAME }}
   namespace: default
 data:
-  HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }}
   LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
   NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
   TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
diff --git a/AudioQnA/benchmark/helm_charts/templates/deployment.yaml b/AudioQnA/benchmark/helm_charts/templates/deployment.yaml
index 7a9fe0c54..920e54685 100644
--- a/AudioQnA/benchmark/helm_charts/templates/deployment.yaml
+++ b/AudioQnA/benchmark/helm_charts/templates/deployment.yaml
@@ -2,33 +2,38 @@
 # SPDX-License-Identifier: Apache-2.0
 
 {{- $global := .Values }}
-{{- range $deployment := .Values.deployments }}
-{{- range $podSpec := $global.podSpecs }}
-{{- if eq $podSpec.name $deployment.name }}
+{{- range $microservice := .Values.microservices }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: {{ $deployment.name }}
+  name: {{ $microservice.name }}
   namespace: default
 spec:
-  replicas: {{ $podSpec.spec.replicas }}
+  {{- $replicas := $microservice.replicas }}
+  {{- range $podSpec := $global.podSpecs }}
+    {{- if eq $podSpec.name $microservice.name }}
+      {{- $replicas = $podSpec.replicas | default $microservice.replicas }}
+    {{- end }}
+  {{- end }}
+  replicas: {{ $replicas }}
+
   selector:
     matchLabels:
-      app: {{ $deployment.name }}
+      app: {{ $microservice.name }}
   template:
     metadata:
       annotations:
         sidecar.istio.io/rewriteAppHTTPProbers: 'true'
       labels:
-        app: {{ $deployment.name }}
+        app: {{ $microservice.name }}
     spec:
       containers:
       - envFrom:
         - configMapRef:
             name: {{ $global.config.CONFIG_MAP_NAME }}
-        {{- if $deployment.spec.args }}
+        {{- if $microservice.args }}
         args:
-        {{- range $arg := $deployment.spec.args }}
+        {{- range $arg := $microservice.args }}
           {{- if $arg.name }}
           - {{ $arg.name }}
           {{- end }}
@@ -38,31 +43,39 @@ spec:
         {{- end }}
         {{- end }}
 
-        {{- if $deployment.spec.env }}
+        {{- if $microservice.env }}
         env:
-        {{- range $env := $deployment.spec.env }}
+        {{- range $env := $microservice.env }}
           - name: {{ $env.name }}
             value: "{{ $env.value }}"
         {{- end }}
         {{- end }}
 
-        image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }}
+        image: {{ $microservice.image }}
         imagePullPolicy: IfNotPresent
-        name: {{ $podSpec.name }}
+        name: {{ $microservice.name }}
 
-        {{- if $deployment.spec.ports }}
+        {{- if $microservice.ports }}
         ports:
-        {{- range $port := $deployment.spec.ports }}
+        {{- range $port := $microservice.ports }}
           {{- range $port_name, $port_id := $port }}
           - {{ $port_name }}: {{ $port_id }}
           {{- end }}
         {{- end }}
         {{- end }}
 
+        {{- $resources := $microservice.resources }}
+        {{- range $podSpec := $global.podSpecs }}
+          {{- if eq $podSpec.name $microservice.name }}
+            {{- if $podSpec.resources }}
+              {{- $resources = $podSpec.resources }}
+            {{- end }}
+          {{- end }}
+        {{- end }}
 
-        {{- if $podSpec.spec.resources }}
+        {{- if $resources }}
         resources:
-        {{- range $resourceType, $resource := $podSpec.spec.resources }}
+        {{- range $resourceType, $resource := $resources }}
           {{ $resourceType }}:
           {{- range $limitType, $limit := $resource }}
             {{ $limitType }}: {{ $limit }}
@@ -70,9 +83,9 @@ spec:
         {{- end }}
         {{- end }}
 
-        {{- if $deployment.spec.volumeMounts }}
+        {{- if $microservice.volumeMounts }}
         volumeMounts:
-        {{- range $volumeMount := $deployment.spec.volumeMounts }}
+        {{- range $volumeMount := $microservice.volumeMounts }}
           - mountPath: {{ $volumeMount.mountPath }}
             name: {{ $volumeMount.name }}
         {{- end }}
@@ -85,15 +98,15 @@ spec:
       topologySpreadConstraints:
       - labelSelector:
           matchLabels:
-            app: {{ $deployment.name }}
+            app: {{ $microservice.name }}
         maxSkew: 1
         topologyKey: kubernetes.io/hostname
         whenUnsatisfiable: ScheduleAnyway
 
 
-      {{- if $deployment.spec.volumes }}
+      {{- if $microservice.volumes }}
       volumes:
-      {{- range $index, $volume := $deployment.spec.volumes }}
+      {{- range $index, $volume := $microservice.volumes }}
         - name: {{ $volume.name }}
           {{- if $volume.hostPath }}
           hostPath:
@@ -109,5 +122,3 @@ spec:
 
 ---
 {{- end }}
-{{- end }}
-{{- end }}
diff --git a/AudioQnA/benchmark/helm_charts/values.yaml b/AudioQnA/benchmark/helm_charts/values.yaml
index e2f03da95..28d414c96 100644
--- a/AudioQnA/benchmark/helm_charts/values.yaml
+++ b/AudioQnA/benchmark/helm_charts/values.yaml
@@ -6,7 +6,7 @@ namespace: default
 config:
   LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
   CONFIG_MAP_NAME: audio-qna-config
-  NODE_SELECTOR: audioqna-opea
+  NODE_SELECTOR: opea
   ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
   TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
   TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
@@ -17,101 +17,115 @@ config:
   LLM_SERVICE_PORT: "3007"
   TTS_SERVICE_HOST_IP: tts-svc
   TTS_SERVICE_PORT: "3002"
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
 
-deployments:
+microservices:
   - name: audioqna-backend-server-deploy
-    spec:
-      ports:
-        - containerPort: 8888
+    image: opea/audioqna:latest
+    replicas: 1
+    ports:
+      - containerPort: 8888
 
   - name: asr-deploy
-    spec:
-      ports:
-        - containerPort: 9099
+    image: opea/asr:latest
+    replicas: 1
+    ports:
+      - containerPort: 9099
 
   - name: whisper-deploy
-    spec:
-      ports:
-        - containerPort: 7066
-      env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
+    image: opea/whisper-gaudi:latest
+    replicas: 1
+    ports:
+      - containerPort: 7066
+    env:
+      - name: OMPI_MCA_btl_vader_single_copy_mechanism
+        value: none
+      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+        value: 'true'
+      - name: runtime
+        value: habana
+      - name: HABANA_VISIBLE_DEVICES
+        value: all
+    resources:
+      limits:
+        habana.ai/gaudi: 1
 
   - name: tts-deploy
-    spec:
-      ports:
-        - containerPort: 9088
-
-  - name: llm-deploy
-    spec:
-      ports:
-        - containerPort: 9000
+    image: opea/tts:latest
+    replicas: 1
+    ports:
+      - containerPort: 9088
 
   - name: speecht5-deploy
-    spec:
-      ports:
-        - containerPort: 7055
-      env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
+    image: opea/speecht5-gaudi:latest
+    replicas: 1
+    ports:
+      - containerPort: 7055
+    env:
+      - name: OMPI_MCA_btl_vader_single_copy_mechanism
+        value: none
+      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+        value: 'true'
+      - name: runtime
+        value: habana
+      - name: HABANA_VISIBLE_DEVICES
+        value: all
+    resources:
+      limits:
+        habana.ai/gaudi: 1
 
+  - name: llm-deploy
+    image: opea/llm-tgi:latest
+    replicas: 1
+    ports:
+      - containerPort: 9000
+      
   - name: llm-dependency-deploy
-    spec:
-      ports:
-        - containerPort: 80
-      resources:
-        limits:
-          habana.ai/gaudi: 1
-      args:
-        - name: "--model-id"
-          value: $(LLM_MODEL_ID)
-        - name: "--max-input-length"
-          value: "2048"
-        - name: "--max-total-tokens"
-          value: "4096"
-      env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: "true"
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: ENABLE_HPU_GRAPH
-          value: 'true'
-        - name: LIMIT_HPU_GRAPH
-          value: 'true'
-        - name: USE_FLASH_ATTENTION
-          value: 'true'
-        - name: FLASH_ATTENTION_RECOMPUTE
-          value: 'true'
-      volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-      volumes:
-        - hostPath:
-            path: /mnt/models
-            type: Directory
-          name: model-volume
-        - emptyDir:
-            medium: Memory
-            sizeLimit: 1Gi
-          name: shm
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    replicas: 1
+    ports:
+      - containerPort: 80
+    resources:
+      limits:
+        habana.ai/gaudi: 1
+    args:
+      - name: "--model-id"
+        value: $(LLM_MODEL_ID)
+      - name: "--max-input-length"
+        value: "2048"
+      - name: "--max-total-tokens"
+        value: "4096"
+    env:
+      - name: OMPI_MCA_btl_vader_single_copy_mechanism
+        value: none
+      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+        value: "true"
+      - name: runtime
+        value: habana
+      - name: HABANA_VISIBLE_DEVICES
+        value: all
+      - name: ENABLE_HPU_GRAPH
+        value: 'true'
+      - name: LIMIT_HPU_GRAPH
+        value: 'true'
+      - name: USE_FLASH_ATTENTION
+        value: 'true'
+      - name: FLASH_ATTENTION_RECOMPUTE
+        value: 'true'
+    volumeMounts:
+      - mountPath: /data
+        name: model-volume
+      - mountPath: /dev/shm
+        name: shm
+    volumes:
+      - hostPath:
+          path: /mnt/models
+          type: Directory
+        name: model-volume
+      - emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+        name: shm
 
 services:
   - name: asr-svc
diff --git a/ChatQnA/benchmark/performance/helm_charts/values.yaml b/ChatQnA/benchmark/performance/helm_charts/values.yaml
index ddb76b0bb..5cd34f578 100644
--- a/ChatQnA/benchmark/performance/helm_charts/values.yaml
+++ b/ChatQnA/benchmark/performance/helm_charts/values.yaml
@@ -5,7 +5,7 @@ namespace: default
 
 config:
   CONFIG_MAP_NAME: chatqna-config
-  NODE_SELECTOR: chatqna-opea
+  NODE_SELECTOR: opea
   EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
   LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
   RERANK_MODEL_ID: BAAI/bge-reranker-base
diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml
index 6f12074b8..1548a7eab 100644
--- a/FaqGen/benchmark/performance/helm_charts/values.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/values.yaml
@@ -6,7 +6,7 @@ namespace: default
 config:
   LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
   CONFIG_MAP_NAME: faq-config
-  NODE_SELECTOR: faq-opea
+  NODE_SELECTOR: opea
   HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
 
 microservices: