benchmark helmcharts (#995)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-10-21 11:13:24 +08:00
parent 184e9a43b8
commit d6b04b3405
17 changed files with 856 additions and 1 deletions
--- a/AudioQnA/benchmark/helm_charts/.helmignore
+++ b/AudioQnA/benchmark/helm_charts/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
--- a/AudioQnA/benchmark/helm_charts/Chart.yaml
+++ b/AudioQnA/benchmark/helm_charts/Chart.yaml
@@ -0,0 +1,27 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: chatqna-charts
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
--- a/AudioQnA/benchmark/helm_charts/README.md
+++ b/AudioQnA/benchmark/helm_charts/README.md
@@ -0,0 +1,36 @@
+# ChatQnA Deployment
+
+This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
+
+## Getting Started
+
+### Preparation
+
+```bash
+# on k8s-master node
+cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
+
+# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
+# vim customize.yaml
+HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
+```
+
+### Deploy your ChatQnA
+
+```bash
+# Deploy a ChatQnA pipeline using the specified YAML configuration.
+# To deploy with different configurations, simply provide a different YAML file.
+helm install chatqna helm_charts/ -f customize.yaml
+```
+
+Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
+
+## Customize your own ChatQnA pipelines. (Optional)
+
+There are two yaml configs you can specify.
+
+- customize.yaml
+  This file can specify image names, the number of replicas and CPU cores to manage your pods.
+
+- values.yaml
+  This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes.
--- a/AudioQnA/benchmark/helm_charts/customize.yaml
+++ b/AudioQnA/benchmark/helm_charts/customize.yaml
@@ -0,0 +1,50 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+
+podSpecs:
+  - name: audioqna-backend-server-deploy
+    spec:
+      image_name: opea/audioqna
+      image_tag: latest
+      replicas: 1
+
+  - name: asr-deploy
+    spec:
+      image_name: opea/asr
+      image_tag: latest
+      replicas: 1
+
+  - name: whisper-deploy
+    spec:
+      image_name: opea/whisper-gaudi
+      image_tag: latest
+      replicas: 1
+      resources:
+        limits:
+          habana.ai/gaudi: 1
+
+  - name: tts-deploy
+    spec:
+      image_name: opea/tts
+      image_tag: latest
+      replicas: 1
+
+  - name: speecht5-deploy
+    spec:
+      image_name: opea/speecht5-gaudi
+      image_tag: latest
+      replicas: 1
+      resources:
+        limits:
+          habana.ai/gaudi: 1
+
+  - name: llm-dependency-deploy
+    spec:
+      image_name: ghcr.io/huggingface/tgi-gaudi
+      image_tag: 2.0.5
+      replicas: 1
+      resources:
+        limits:
+          habana.ai/gaudi: 1
--- a/AudioQnA/benchmark/helm_charts/templates/configmap.yaml
+++ b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Values.config.CONFIG_MAP_NAME }}
+  namespace: default
+data:
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
+  LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
+  NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
+  TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
+
+  ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
+  TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
+  MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
+  ASR_SERVICE_HOST_IP: asr-svc
+  ASR_SERVICE_PORT: "3001"
+  LLM_SERVICE_HOST_IP: llm-svc
+  LLM_SERVICE_PORT: "3007"
+  TTS_SERVICE_HOST_IP: tts-svc
+  TTS_SERVICE_PORT: "3002"
+---
--- a/AudioQnA/benchmark/helm_charts/templates/deployment.yaml
+++ b/AudioQnA/benchmark/helm_charts/templates/deployment.yaml
@@ -0,0 +1,113 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- $global := .Values }}
+{{- range $deployment := .Values.deployments }}
+{{- range $podSpec := $global.podSpecs }}
+{{- if eq $podSpec.name $deployment.name }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ $deployment.name }}
+  namespace: default
+spec:
+  replicas: {{ $podSpec.spec.replicas }}
+  selector:
+    matchLabels:
+      app: {{ $deployment.name }}
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: {{ $deployment.name }}
+    spec:
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: {{ $global.config.CONFIG_MAP_NAME }}
+        {{- if $deployment.spec.args }}
+        args:
+        {{- range $arg := $deployment.spec.args }}
+          {{- if $arg.name }}
+          - {{ $arg.name }}
+          {{- end }}
+          {{- if $arg.value }}
+          - "{{ $arg.value }}"
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+        {{- if $deployment.spec.env }}
+        env:
+        {{- range $env := $deployment.spec.env }}
+          - name: {{ $env.name }}
+            value: "{{ $env.value }}"
+        {{- end }}
+        {{- end }}
+
+        image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }}
+        imagePullPolicy: IfNotPresent
+        name: {{ $podSpec.name }}
+
+        {{- if $deployment.spec.ports }}
+        ports:
+        {{- range $port := $deployment.spec.ports }}
+          {{- range $port_name, $port_id := $port }}
+          - {{ $port_name }}: {{ $port_id }}
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+
+        {{- if $podSpec.spec.resources }}
+        resources:
+        {{- range $resourceType, $resource := $podSpec.spec.resources }}
+          {{ $resourceType }}:
+          {{- range $limitType, $limit := $resource }}
+            {{ $limitType }}: {{ $limit }}
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+        {{- if $deployment.spec.volumeMounts }}
+        volumeMounts:
+        {{- range $volumeMount := $deployment.spec.volumeMounts }}
+          - mountPath: {{ $volumeMount.mountPath }}
+            name: {{ $volumeMount.name }}
+        {{- end }}
+        {{- end }}
+
+      hostIPC: true
+      nodeSelector:
+        node-type: {{ $global.config.NODE_SELECTOR }}
+      serviceAccountName: default
+      topologySpreadConstraints:
+      - labelSelector:
+          matchLabels:
+            app: {{ $deployment.name }}
+        maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+
+
+      {{- if $deployment.spec.volumes }}
+      volumes:
+      {{- range $index, $volume := $deployment.spec.volumes }}
+        - name: {{ $volume.name }}
+          {{- if $volume.hostPath }}
+          hostPath:
+            path: {{ $volume.hostPath.path }}
+            type: {{ $volume.hostPath.type }}
+          {{- else if $volume.emptyDir }}
+          emptyDir:
+            medium: {{ $volume.emptyDir.medium }}
+            sizeLimit: {{ $volume.emptyDir.sizeLimit }}
+          {{- end }}
+      {{- end }}
+      {{- end }}
+
+---
+{{- end }}
+{{- end }}
+{{- end }}
--- a/AudioQnA/benchmark/helm_charts/templates/service.yaml
+++ b/AudioQnA/benchmark/helm_charts/templates/service.yaml
@@ -0,0 +1,24 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- range $service := .Values.services }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ $service.name }}
+  namespace: default
+spec:
+  ports:
+  {{- range $port := $service.spec.ports }}
+    - name: {{ $port.name }}
+    {{- range $port_name, $port_id := $port }}
+      {{- if ne $port_name "name"}}
+      {{ $port_name }}: {{ $port_id }}
+      {{- end }}
+    {{- end }}
+  {{- end }}
+  selector:
+    app: {{ $service.spec.selector.app }}
+  type: {{ $service.spec.type }}
+---
+{{- end }}
--- a/AudioQnA/benchmark/helm_charts/values.yaml
+++ b/AudioQnA/benchmark/helm_charts/values.yaml
@@ -0,0 +1,186 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+namespace: default
+
+config:
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  CONFIG_MAP_NAME: audio-qna-config
+  NODE_SELECTOR: audioqna-opea
+  ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
+  TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
+  MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
+  ASR_SERVICE_HOST_IP: asr-svc
+  ASR_SERVICE_PORT: "3001"
+  LLM_SERVICE_HOST_IP: llm-svc
+  LLM_SERVICE_PORT: "3007"
+  TTS_SERVICE_HOST_IP: tts-svc
+  TTS_SERVICE_PORT: "3002"
+
+deployments:
+  - name: audioqna-backend-server-deploy
+    spec:
+      ports:
+        - containerPort: 8888
+
+  - name: asr-deploy
+    spec:
+      ports:
+        - containerPort: 9099
+
+  - name: whisper-deploy
+    spec:
+      ports:
+        - containerPort: 7066
+      env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+
+  - name: tts-deploy
+    spec:
+      ports:
+        - containerPort: 9088
+
+  - name: llm-deploy
+    spec:
+      ports:
+        - containerPort: 9000
+
+  - name: speecht5-deploy
+    spec:
+      ports:
+        - containerPort: 7055
+      env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+
+  - name: llm-dependency-deploy
+    spec:
+      ports:
+        - containerPort: 80
+      resources:
+        limits:
+          habana.ai/gaudi: 1
+      args:
+        - name: "--model-id"
+          value: $(LLM_MODEL_ID)
+        - name: "--max-input-length"
+          value: "2048"
+        - name: "--max-total-tokens"
+          value: "4096"
+      env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: "true"
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: ENABLE_HPU_GRAPH
+          value: 'true'
+        - name: LIMIT_HPU_GRAPH
+          value: 'true'
+        - name: USE_FLASH_ATTENTION
+          value: 'true'
+        - name: FLASH_ATTENTION_RECOMPUTE
+          value: 'true'
+      volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+      volumes:
+        - hostPath:
+            path: /mnt/models
+            type: Directory
+          name: model-volume
+        - emptyDir:
+            medium: Memory
+            sizeLimit: 1Gi
+          name: shm
+
+services:
+  - name: asr-svc
+    spec:
+      ports:
+        - name: service
+          port: 3001
+          targetPort: 9099
+      selector:
+        app: asr-deploy
+      type: ClusterIP
+
+  - name: whisper-svc
+    spec:
+      ports:
+        - name: service
+          port: 7066
+          targetPort: 7066
+      selector:
+        app: whisper-deploy
+      type: ClusterIP
+
+  - name: tts-svc
+    spec:
+      ports:
+        - name: service
+          port: 3002
+          targetPort: 9088
+      selector:
+        app: tts-deploy
+      type: ClusterIP
+
+  - name: speecht5-svc
+    spec:
+      ports:
+        - name: service
+          port: 7055
+          targetPort: 7055
+      selector:
+        app: speecht5-deploy
+      type: ClusterIP
+
+  - name: llm-dependency-svc
+    spec:
+      ports:
+        - name: service
+          port: 3006
+          targetPort: 80
+      selector:
+        app: llm-dependency-deploy
+      type: ClusterIP
+
+    - name: llm-svc
+    spec:
+      ports:
+        - name: service
+          port: 3007
+          targetPort: 9000
+      selector:
+        app: llm-deploy
+      type: ClusterIP
+
+  - name: audioqna-backend-server-svc
+    spec:
+      ports:
+        - name: service
+          port: 3088
+          targetPort: 8888
+          nodePort: 30666
+      selector:
+        app: audioqna-backend-server-deploy
+      type: NodePort
--- a/ChatQnA/benchmark/performance/helm_charts/README.md
+++ b/ChatQnA/benchmark/performance/helm_charts/README.md
@@ -20,7 +20,7 @@ HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
 ```bash
 # Deploy a ChatQnA pipeline using the specified YAML configuration.
 # To deploy with different configurations, simply provide a different YAML file.
-helm install chatqna helm_charts/ -f customize.yaml
+helm install chatqna ../helm_charts/ -f customize.yaml
 ```

 Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
--- a/FaqGen/benchmark/performance/helm_charts/.helmignore
+++ b/FaqGen/benchmark/performance/helm_charts/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
--- a/FaqGen/benchmark/performance/helm_charts/Chart.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/Chart.yaml
@@ -0,0 +1,27 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: chatqna-charts
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
--- a/FaqGen/benchmark/performance/helm_charts/README.md
+++ b/FaqGen/benchmark/performance/helm_charts/README.md
@@ -0,0 +1,36 @@
+# ChatQnA Deployment
+
+This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
+
+## Getting Started
+
+### Preparation
+
+```bash
+# on k8s-master node
+cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
+
+# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
+# vim customize.yaml
+HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
+```
+
+### Deploy your ChatQnA
+
+```bash
+# Deploy a ChatQnA pipeline using the specified YAML configuration.
+# To deploy with different configurations, simply provide a different YAML file.
+helm install chatqna helm_charts/ -f customize.yaml
+```
+
+Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
+
+## Customize your own ChatQnA pipelines. (Optional)
+
+There are two yaml configs you can specify.
+
+- customize.yaml
+  This file can specify image names, the number of replicas and CPU cores to manage your pods.
+
+- values.yaml
+  This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes.
--- a/FaqGen/benchmark/performance/helm_charts/customize.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/customize.yaml
@@ -0,0 +1,34 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+
+podSpecs:
+  - name: faq-mega-server-deploy
+    spec:
+      image_name: opea/chatqna
+      image_tag: latest
+      replicas: 2
+      resources:
+        limits:
+          cpu: "8"
+          memory: "8000Mi"
+        requests:
+          cpu: "8"
+          memory: "8000Mi"
+
+
+  - name: faq-tgi-deploy
+    spec:
+      image_name: ghcr.io/huggingface/tgi-gaudi
+      image_tag: 2.0.5
+      replicas: 7
+      resources:
+        limits:
+          habana.ai/gaudi: 1
+
+  - name: faq-micro-deploy
+    spec:
+      image_name: opea/llm-faqgen-tgi
+      image_tag: latest
+      replicas: 1
--- a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
@@ -0,0 +1,16 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Values.config.CONFIG_MAP_NAME }}
+  namespace: default
+data:
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
+  LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
+  NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
+  TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
+  LLM_SERVICE_HOST_IP: faq-micro-svc
+  MEGA_SERVICE_HOST_IP: faq-mega-server-svc
+---
--- a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
@@ -0,0 +1,113 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- $global := .Values }}
+{{- range $deployment := .Values.deployments }}
+{{- range $podSpec := $global.podSpecs }}
+{{- if eq $podSpec.name $deployment.name }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ $deployment.name }}
+  namespace: default
+spec:
+  replicas: {{ $podSpec.spec.replicas }}
+  selector:
+    matchLabels:
+      app: {{ $deployment.name }}
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: {{ $deployment.name }}
+    spec:
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: {{ $global.config.CONFIG_MAP_NAME }}
+        {{- if $deployment.spec.args }}
+        args:
+        {{- range $arg := $deployment.spec.args }}
+          {{- if $arg.name }}
+          - {{ $arg.name }}
+          {{- end }}
+          {{- if $arg.value }}
+          - "{{ $arg.value }}"
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+        {{- if $deployment.spec.env }}
+        env:
+        {{- range $env := $deployment.spec.env }}
+          - name: {{ $env.name }}
+            value: "{{ $env.value }}"
+        {{- end }}
+        {{- end }}
+
+        image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }}
+        imagePullPolicy: IfNotPresent
+        name: {{ $podSpec.name }}
+
+        {{- if $deployment.spec.ports }}
+        ports:
+        {{- range $port := $deployment.spec.ports }}
+          {{- range $port_name, $port_id := $port }}
+          - {{ $port_name }}: {{ $port_id }}
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+
+        {{- if $podSpec.spec.resources }}
+        resources:
+        {{- range $resourceType, $resource := $podSpec.spec.resources }}
+          {{ $resourceType }}:
+          {{- range $limitType, $limit := $resource }}
+            {{ $limitType }}: {{ $limit }}
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+        {{- if $deployment.spec.volumeMounts }}
+        volumeMounts:
+        {{- range $volumeMount := $deployment.spec.volumeMounts }}
+          - mountPath: {{ $volumeMount.mountPath }}
+            name: {{ $volumeMount.name }}
+        {{- end }}
+        {{- end }}
+
+      hostIPC: true
+      nodeSelector:
+        node-type: {{ $global.config.NODE_SELECTOR }}
+      serviceAccountName: default
+      topologySpreadConstraints:
+      - labelSelector:
+          matchLabels:
+            app: {{ $deployment.name }}
+        maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+
+
+      {{- if $deployment.spec.volumes }}
+      volumes:
+      {{- range $index, $volume := $deployment.spec.volumes }}
+        - name: {{ $volume.name }}
+          {{- if $volume.hostPath }}
+          hostPath:
+            path: {{ $volume.hostPath.path }}
+            type: {{ $volume.hostPath.type }}
+          {{- else if $volume.emptyDir }}
+          emptyDir:
+            medium: {{ $volume.emptyDir.medium }}
+            sizeLimit: {{ $volume.emptyDir.sizeLimit }}
+          {{- end }}
+      {{- end }}
+      {{- end }}
+
+---
+{{- end }}
+{{- end }}
+{{- end }}
--- a/FaqGen/benchmark/performance/helm_charts/templates/service.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/service.yaml
@@ -0,0 +1,24 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- range $service := .Values.services }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ $service.name }}
+  namespace: default
+spec:
+  ports:
+  {{- range $port := $service.spec.ports }}
+    - name: {{ $port.name }}
+    {{- range $port_name, $port_id := $port }}
+      {{- if ne $port_name "name"}}
+      {{ $port_name }}: {{ $port_id }}
+      {{- end }}
+    {{- end }}
+  {{- end }}
+  selector:
+    app: {{ $service.spec.selector.app }}
+  type: {{ $service.spec.type }}
+---
+{{- end }}
--- a/FaqGen/benchmark/performance/helm_charts/values.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/values.yaml
@@ -0,0 +1,98 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+namespace: default
+
+config:
+  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
+  CONFIG_MAP_NAME: faq-config
+  NODE_SELECTOR: faq-opea
+
+deployments:
+  - name: faq-mega-server-deploy
+    spec:
+      ports:
+        - containerPort: 7777
+
+  - name: faq-micro-deploy
+    spec:
+      ports:
+        - containerPort: 9000
+
+  - name: faq-tgi-deploy
+    spec:
+      ports:
+        - containerPort: 80
+      resources:
+        limits:
+          habana.ai/gaudi: 1
+      args:
+        - name: "--model-id"
+          value: $(LLM_MODEL_ID)
+        - name: "--max-input-length"
+          value: "2048"
+        - name: "--max-total-tokens"
+          value: "4096"
+      env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: "true"
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: ENABLE_HPU_GRAPH
+          value: 'true'
+        - name: LIMIT_HPU_GRAPH
+          value: 'true'
+        - name: USE_FLASH_ATTENTION
+          value: 'true'
+        - name: FLASH_ATTENTION_RECOMPUTE
+          value: 'true'
+      volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+      volumes:
+        - hostPath:
+            path: /mnt/models
+            type: Directory
+          name: model-volume
+        - emptyDir:
+            medium: Memory
+            sizeLimit: 1Gi
+          name: shm
+
+services:
+  - name: faq-micro-svc
+    spec:
+      ports:
+        - name: service
+          port: 9003
+          targetPort: 9000
+      selector:
+        app: faq-micro-deploy
+      type: ClusterIP
+
+  - name: faq-tgi-svc
+    spec:
+      ports:
+        - name: service
+          port: 8010
+          targetPort: 80
+      selector:
+        app: faq-tgi-deploy
+      type: ClusterIP
+
+  - name: faq-mega-server-svc
+    spec:
+      ports:
+        - name: service
+          port: 7779
+          targetPort: 7777
+          nodePort: 30779
+      selector:
+        app: faq-mega-server-deploy
+      type: NodePort