refactored AudioQNA

2024-10-21 11:06:37 +03:00
parent fdb8a33a6e
commit 048b4e1df9
6 changed files with 144 additions and 146 deletions
--- a/AudioQnA/benchmark/helm_charts/customize.yaml
+++ b/AudioQnA/benchmark/helm_charts/customize.yaml
@@ -1,50 +1,23 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-
 podSpecs:
  - name: audioqna-backend-server-deploy
-    spec:
-      image_name: opea/audioqna
-      image_tag: latest
-      replicas: 1
+    replicas: 1

  - name: asr-deploy
-    spec:
-      image_name: opea/asr
-      image_tag: latest
-      replicas: 1
+    replicas: 1

  - name: whisper-deploy
-    spec:
-      image_name: opea/whisper-gaudi
-      image_tag: latest
-      replicas: 1
-      resources:
-        limits:
-          habana.ai/gaudi: 1
+    replicas: 1
+

  - name: tts-deploy
-    spec:
-      image_name: opea/tts
-      image_tag: latest
-      replicas: 1
+    replicas: 1

  - name: speecht5-deploy
-    spec:
-      image_name: opea/speecht5-gaudi
-      image_tag: latest
-      replicas: 1
-      resources:
-        limits:
-          habana.ai/gaudi: 1
+    replicas: 1
+

  - name: llm-dependency-deploy
-    spec:
-      image_name: ghcr.io/huggingface/tgi-gaudi
-      image_tag: 2.0.5
-      replicas: 1
-      resources:
-        limits:
-          habana.ai/gaudi: 1
+    replicas: 1
--- a/AudioQnA/benchmark/helm_charts/templates/configmap.yaml
+++ b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml
@@ -7,7 +7,7 @@ metadata:
  name: {{ .Values.config.CONFIG_MAP_NAME }}
  namespace: default
 data:
-  HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }}
  LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
  NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
  TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
--- a/AudioQnA/benchmark/helm_charts/templates/deployment.yaml
+++ b/AudioQnA/benchmark/helm_charts/templates/deployment.yaml
@@ -2,33 +2,38 @@
 # SPDX-License-Identifier: Apache-2.0

 {{- $global := .Values }}
-{{- range $deployment := .Values.deployments }}
-{{- range $podSpec := $global.podSpecs }}
-{{- if eq $podSpec.name $deployment.name }}
+{{- range $microservice := .Values.microservices }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: {{ $deployment.name }}
+  name: {{ $microservice.name }}
  namespace: default
 spec:
-  replicas: {{ $podSpec.spec.replicas }}
+  {{- $replicas := $microservice.replicas }}
+  {{- range $podSpec := $global.podSpecs }}
+    {{- if eq $podSpec.name $microservice.name }}
+      {{- $replicas = $podSpec.replicas | default $microservice.replicas }}
+    {{- end }}
+  {{- end }}
+  replicas: {{ $replicas }}
+
  selector:
    matchLabels:
-      app: {{ $deployment.name }}
+      app: {{ $microservice.name }}
  template:
    metadata:
      annotations:
        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
-        app: {{ $deployment.name }}
+        app: {{ $microservice.name }}
    spec:
      containers:
      - envFrom:
        - configMapRef:
            name: {{ $global.config.CONFIG_MAP_NAME }}
-        {{- if $deployment.spec.args }}
+        {{- if $microservice.args }}
        args:
-        {{- range $arg := $deployment.spec.args }}
+        {{- range $arg := $microservice.args }}
          {{- if $arg.name }}
          - {{ $arg.name }}
          {{- end }}
@@ -38,31 +43,39 @@ spec:
        {{- end }}
        {{- end }}

-        {{- if $deployment.spec.env }}
+        {{- if $microservice.env }}
        env:
-        {{- range $env := $deployment.spec.env }}
+        {{- range $env := $microservice.env }}
          - name: {{ $env.name }}
            value: "{{ $env.value }}"
        {{- end }}
        {{- end }}

-        image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }}
+        image: {{ $microservice.image }}
        imagePullPolicy: IfNotPresent
-        name: {{ $podSpec.name }}
+        name: {{ $microservice.name }}

-        {{- if $deployment.spec.ports }}
+        {{- if $microservice.ports }}
        ports:
-        {{- range $port := $deployment.spec.ports }}
+        {{- range $port := $microservice.ports }}
          {{- range $port_name, $port_id := $port }}
          - {{ $port_name }}: {{ $port_id }}
          {{- end }}
        {{- end }}
        {{- end }}

+        {{- $resources := $microservice.resources }}
+        {{- range $podSpec := $global.podSpecs }}
+          {{- if eq $podSpec.name $microservice.name }}
+            {{- if $podSpec.resources }}
+              {{- $resources = $podSpec.resources }}
+            {{- end }}
+          {{- end }}
+        {{- end }}

-        {{- if $podSpec.spec.resources }}
+        {{- if $resources }}
        resources:
-        {{- range $resourceType, $resource := $podSpec.spec.resources }}
+        {{- range $resourceType, $resource := $resources }}
          {{ $resourceType }}:
          {{- range $limitType, $limit := $resource }}
            {{ $limitType }}: {{ $limit }}
@@ -70,9 +83,9 @@ spec:
        {{- end }}
        {{- end }}

-        {{- if $deployment.spec.volumeMounts }}
+        {{- if $microservice.volumeMounts }}
        volumeMounts:
-        {{- range $volumeMount := $deployment.spec.volumeMounts }}
+        {{- range $volumeMount := $microservice.volumeMounts }}
          - mountPath: {{ $volumeMount.mountPath }}
            name: {{ $volumeMount.name }}
        {{- end }}
@@ -85,15 +98,15 @@ spec:
      topologySpreadConstraints:
      - labelSelector:
          matchLabels:
-            app: {{ $deployment.name }}
+            app: {{ $microservice.name }}
        maxSkew: 1
        topologyKey: kubernetes.io/hostname
        whenUnsatisfiable: ScheduleAnyway


-      {{- if $deployment.spec.volumes }}
+      {{- if $microservice.volumes }}
      volumes:
-      {{- range $index, $volume := $deployment.spec.volumes }}
+      {{- range $index, $volume := $microservice.volumes }}
        - name: {{ $volume.name }}
          {{- if $volume.hostPath }}
          hostPath:
@@ -109,5 +122,3 @@ spec:

 ---
 {{- end }}
-{{- end }}
-{{- end }}
--- a/AudioQnA/benchmark/helm_charts/values.yaml
+++ b/AudioQnA/benchmark/helm_charts/values.yaml
@@ -6,7 +6,7 @@ namespace: default
 config:
  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
  CONFIG_MAP_NAME: audio-qna-config
-  NODE_SELECTOR: audioqna-opea
+  NODE_SELECTOR: opea
  ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
  TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
@@ -17,101 +17,115 @@ config:
  LLM_SERVICE_PORT: "3007"
  TTS_SERVICE_HOST_IP: tts-svc
  TTS_SERVICE_PORT: "3002"
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}

-deployments:
+microservices:
  - name: audioqna-backend-server-deploy
-    spec:
-      ports:
-        - containerPort: 8888
+    image: opea/audioqna:latest
+    replicas: 1
+    ports:
+      - containerPort: 8888

  - name: asr-deploy
-    spec:
-      ports:
-        - containerPort: 9099
+    image: opea/asr:latest
+    replicas: 1
+    ports:
+      - containerPort: 9099

  - name: whisper-deploy
-    spec:
-      ports:
-        - containerPort: 7066
-      env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
+    image: opea/whisper-gaudi:latest
+    replicas: 1
+    ports:
+      - containerPort: 7066
+    env:
+      - name: OMPI_MCA_btl_vader_single_copy_mechanism
+        value: none
+      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+        value: 'true'
+      - name: runtime
+        value: habana
+      - name: HABANA_VISIBLE_DEVICES
+        value: all
+    resources:
+      limits:
+        habana.ai/gaudi: 1

  - name: tts-deploy
-    spec:
-      ports:
-        - containerPort: 9088
-
-  - name: llm-deploy
-    spec:
-      ports:
-        - containerPort: 9000
+    image: opea/tts:latest
+    replicas: 1
+    ports:
+      - containerPort: 9088

  - name: speecht5-deploy
-    spec:
-      ports:
-        - containerPort: 7055
-      env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
+    image: opea/speecht5-gaudi:latest
+    replicas: 1
+    ports:
+      - containerPort: 7055
+    env:
+      - name: OMPI_MCA_btl_vader_single_copy_mechanism
+        value: none
+      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+        value: 'true'
+      - name: runtime
+        value: habana
+      - name: HABANA_VISIBLE_DEVICES
+        value: all
+    resources:
+      limits:
+        habana.ai/gaudi: 1

+  - name: llm-deploy
+    image: opea/llm-tgi:latest
+    replicas: 1
+    ports:
+      - containerPort: 9000
+      
  - name: llm-dependency-deploy
-    spec:
-      ports:
-        - containerPort: 80
-      resources:
-        limits:
-          habana.ai/gaudi: 1
-      args:
-        - name: "--model-id"
-          value: $(LLM_MODEL_ID)
-        - name: "--max-input-length"
-          value: "2048"
-        - name: "--max-total-tokens"
-          value: "4096"
-      env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: "true"
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: ENABLE_HPU_GRAPH
-          value: 'true'
-        - name: LIMIT_HPU_GRAPH
-          value: 'true'
-        - name: USE_FLASH_ATTENTION
-          value: 'true'
-        - name: FLASH_ATTENTION_RECOMPUTE
-          value: 'true'
-      volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-      volumes:
-        - hostPath:
-            path: /mnt/models
-            type: Directory
-          name: model-volume
-        - emptyDir:
-            medium: Memory
-            sizeLimit: 1Gi
-          name: shm
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    replicas: 1
+    ports:
+      - containerPort: 80
+    resources:
+      limits:
+        habana.ai/gaudi: 1
+    args:
+      - name: "--model-id"
+        value: $(LLM_MODEL_ID)
+      - name: "--max-input-length"
+        value: "2048"
+      - name: "--max-total-tokens"
+        value: "4096"
+    env:
+      - name: OMPI_MCA_btl_vader_single_copy_mechanism
+        value: none
+      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+        value: "true"
+      - name: runtime
+        value: habana
+      - name: HABANA_VISIBLE_DEVICES
+        value: all
+      - name: ENABLE_HPU_GRAPH
+        value: 'true'
+      - name: LIMIT_HPU_GRAPH
+        value: 'true'
+      - name: USE_FLASH_ATTENTION
+        value: 'true'
+      - name: FLASH_ATTENTION_RECOMPUTE
+        value: 'true'
+    volumeMounts:
+      - mountPath: /data
+        name: model-volume
+      - mountPath: /dev/shm
+        name: shm
+    volumes:
+      - hostPath:
+          path: /mnt/models
+          type: Directory
+        name: model-volume
+      - emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+        name: shm

 services:
  - name: asr-svc
--- a/ChatQnA/benchmark/performance/helm_charts/values.yaml
+++ b/ChatQnA/benchmark/performance/helm_charts/values.yaml
@@ -5,7 +5,7 @@ namespace: default

 config:
  CONFIG_MAP_NAME: chatqna-config
-  NODE_SELECTOR: chatqna-opea
+  NODE_SELECTOR: opea
  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
  RERANK_MODEL_ID: BAAI/bge-reranker-base
--- a/FaqGen/benchmark/performance/helm_charts/values.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/values.yaml
@@ -6,7 +6,7 @@ namespace: default
 config:
  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
  CONFIG_MAP_NAME: faq-config
-  NODE_SELECTOR: faq-opea
+  NODE_SELECTOR: opea
  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}

 microservices: