removed benchmark template

2024-10-23 09:30:03 +03:00
parent 2876677214
commit 1046aad26f
25 changed files with 2 additions and 1212 deletions
--- a/AudioQnA/benchmark/performance/helm_charts/.helmignore
+++ b/AudioQnA/benchmark/performance/helm_charts/.helmignore
@@ -1,23 +0,0 @@
-# Patterns to ignore when building packages.
-# This supports shell glob matching, relative path matching, and
-# negation (prefixed with !). Only one pattern per line.
-.DS_Store
-# Common VCS dirs
-.git/
-.gitignore
-.bzr/
-.bzrignore
-.hg/
-.hgignore
-.svn/
-# Common backup files
-*.swp
-*.bak
-*.tmp
-*.orig
-*~
-# Various IDEs
-.project
-.idea/
-*.tmproj
-.vscode/
--- a/AudioQnA/benchmark/performance/helm_charts/Chart.yaml
+++ b/AudioQnA/benchmark/performance/helm_charts/Chart.yaml
@@ -1,27 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v2
-name: audioqna-charts
-description: A Helm chart for Kubernetes
-
-# A chart can be either an 'application' or a 'library' chart.
-#
-# Application charts are a collection of templates that can be packaged into versioned archives
-# to be deployed.
-#
-# Library charts provide useful utilities or functions for the chart developer. They're included as
-# a dependency of application charts to inject those utilities and functions into the rendering
-# pipeline. Library charts do not define any templates and therefore cannot be deployed.
-type: application
-
-# This is the chart version. This version number should be incremented each time you make changes
-# to the chart and its templates, including the app version.
-# Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 1.0
-
-# This is the version number of the application being deployed. This version number should be
-# incremented each time you make changes to the application. Versions are not expected to
-# follow Semantic Versioning. They should reflect the version the application is using.
-# It is recommended to use it with quotes.
-appVersion: "1.16.0"
--- a/AudioQnA/benchmark/performance/helm_charts/README.md
+++ b/AudioQnA/benchmark/performance/helm_charts/README.md
@@ -1,25 +0,0 @@
-# Benchmarking Deployment
-
-This document guides you through deploying this example pipeline using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
-
-## Getting Started
-
-### Preparation
-
-```bash
-# on k8s-master node
-cd GenAIExamples/{example_name}/benchmark/performance/helm_charts
-
-# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
-# vim values.yaml
-HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
-```
-
-### Deployment
-
-```bash
-# Deploy the pipeline
-helm install {example_name} .
-```
-
-Note: Currently we only support the HPU version, because only HPU values.yaml is provided here.
--- a/AudioQnA/benchmark/performance/helm_charts/customize.yaml
+++ b/AudioQnA/benchmark/performance/helm_charts/customize.yaml
@@ -1,23 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-podSpecs:
-  - name: audioqna-backend-server-deploy
-    replicas: 1
-
-  - name: asr-deploy
-    replicas: 1
-
-  - name: whisper-deploy
-    replicas: 1
-
-
-  - name: tts-deploy
-    replicas: 1
-
-  - name: speecht5-deploy
-    replicas: 1
-
-
-  - name: llm-dependency-deploy
-    replicas: 1
--- a/AudioQnA/benchmark/performance/helm_charts/templates/configmap.yaml
+++ b/AudioQnA/benchmark/performance/helm_charts/templates/configmap.yaml
@@ -1,25 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ .Values.config.CONFIG_MAP_NAME }}
-  namespace: default
-data:
-  HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }}
-  LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
-  NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
-  TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
-
-  ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
-  TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
-  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
-  MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
-  ASR_SERVICE_HOST_IP: asr-svc
-  ASR_SERVICE_PORT: "3001"
-  LLM_SERVICE_HOST_IP: llm-svc
-  LLM_SERVICE_PORT: "3007"
-  TTS_SERVICE_HOST_IP: tts-svc
-  TTS_SERVICE_PORT: "3002"
---
--- a/AudioQnA/benchmark/performance/helm_charts/templates/deployment.yaml
+++ b/AudioQnA/benchmark/performance/helm_charts/templates/deployment.yaml
@@ -1,131 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-{{- $global := .Values }}
-{{- range $microservice := .Values.microservices }}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ $microservice.name }}
-  namespace: default
-spec:
-  {{- $replicas := $microservice.replicas }}
-  {{- range $podSpec := $global.podSpecs }}
-    {{- if eq $podSpec.name $microservice.name }}
-      {{- $replicas = $podSpec.replicas | default $microservice.replicas }}
-    {{- end }}
-  {{- end }}
-  replicas: {{ $replicas }}
-
-  selector:
-    matchLabels:
-      app: {{ $microservice.name }}
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: {{ $microservice.name }}
-    spec:
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: {{ $global.config.CONFIG_MAP_NAME }}
-        {{- if $microservice.args }}
-        args:
-        {{- range $arg := $microservice.args }}
-          {{- if $arg.name }}
-          - {{ $arg.name }}
-          {{- end }}
-          {{- if $arg.value }}
-          - "{{ $arg.value }}"
-          {{- end }}
-        {{- end }}
-        {{- end }}
-
-        {{- if $microservice.env }}
-        env:
-        {{- range $env := $microservice.env }}
-          - name: {{ $env.name }}
-            value: "{{ $env.value }}"
-        {{- end }}
-        {{- end }}
-
-        {{- $image := $microservice.image }}
-        {{- range $podSpec := $global.podSpecs }}
-          {{- if eq $podSpec.name $microservice.name }}
-            {{- $image = $podSpec.image | default $microservice.image }}
-          {{- end }}
-        {{- end }}
-        image: {{ $image }}
-
-        imagePullPolicy: IfNotPresent
-        name: {{ $microservice.name }}
-
-        {{- if $microservice.ports }}
-        ports:
-        {{- range $port := $microservice.ports }}
-          {{- range $port_name, $port_id := $port }}
-          - {{ $port_name }}: {{ $port_id }}
-          {{- end }}
-        {{- end }}
-        {{- end }}
-
-        {{- $resources := $microservice.resources }}
-        {{- range $podSpec := $global.podSpecs }}
-          {{- if eq $podSpec.name $microservice.name }}
-            {{- if $podSpec.resources }}
-              {{- $resources = $podSpec.resources }}
-            {{- end }}
-          {{- end }}
-        {{- end }}
-
-        {{- if $resources }}
-        resources:
-        {{- range $resourceType, $resource := $resources }}
-          {{ $resourceType }}:
-          {{- range $limitType, $limit := $resource }}
-            {{ $limitType }}: {{ $limit }}
-          {{- end }}
-        {{- end }}
-        {{- end }}
-
-        {{- if $microservice.volumeMounts }}
-        volumeMounts:
-        {{- range $volumeMount := $microservice.volumeMounts }}
-          - mountPath: {{ $volumeMount.mountPath }}
-            name: {{ $volumeMount.name }}
-        {{- end }}
-        {{- end }}
-
-      hostIPC: true
-      nodeSelector:
-        node-type: {{ $global.config.NODE_SELECTOR }}
-      serviceAccountName: default
-      topologySpreadConstraints:
-      - labelSelector:
-          matchLabels:
-            app: {{ $microservice.name }}
-        maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-
-
-      {{- if $microservice.volumes }}
-      volumes:
-      {{- range $index, $volume := $microservice.volumes }}
-        - name: {{ $volume.name }}
-          {{- if $volume.hostPath }}
-          hostPath:
-            path: {{ $volume.hostPath.path }}
-            type: {{ $volume.hostPath.type }}
-          {{- else if $volume.emptyDir }}
-          emptyDir:
-            medium: {{ $volume.emptyDir.medium }}
-            sizeLimit: {{ $volume.emptyDir.sizeLimit }}
-          {{- end }}
-      {{- end }}
-      {{- end }}
-
---
-{{- end }}
--- a/AudioQnA/benchmark/performance/helm_charts/templates/service.yaml
+++ b/AudioQnA/benchmark/performance/helm_charts/templates/service.yaml
@@ -1,24 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-{{- range $service := .Values.services }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ $service.name }}
-  namespace: default
-spec:
-  ports:
-  {{- range $port := $service.spec.ports }}
-    - name: {{ $port.name }}
-    {{- range $port_name, $port_id := $port }}
-      {{- if ne $port_name "name"}}
-      {{ $port_name }}: {{ $port_id }}
-      {{- end }}
-    {{- end }}
-  {{- end }}
-  selector:
-    app: {{ $service.spec.selector.app }}
-  type: {{ $service.spec.type }}
---
-{{- end }}
--- a/AudioQnA/benchmark/performance/helm_charts/values.yaml
+++ b/AudioQnA/benchmark/performance/helm_charts/values.yaml
@@ -1,200 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-namespace: default
-
-config:
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  CONFIG_MAP_NAME: audio-qna-config
-  NODE_SELECTOR: opea
-  ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
-  TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
-  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
-  MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
-  ASR_SERVICE_HOST_IP: asr-svc
-  ASR_SERVICE_PORT: "3001"
-  LLM_SERVICE_HOST_IP: llm-svc
-  LLM_SERVICE_PORT: "3007"
-  TTS_SERVICE_HOST_IP: tts-svc
-  TTS_SERVICE_PORT: "3002"
-  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-
-microservices:
-  - name: audioqna-backend-server-deploy
-    image: opea/audioqna:latest
-    replicas: 1
-    ports:
-      - containerPort: 8888
-
-  - name: asr-deploy
-    image: opea/asr:latest
-    replicas: 1
-    ports:
-      - containerPort: 9099
-
-  - name: whisper-deploy
-    image: opea/whisper-gaudi:latest
-    replicas: 1
-    ports:
-      - containerPort: 7066
-    env:
-      - name: OMPI_MCA_btl_vader_single_copy_mechanism
-        value: none
-      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-        value: 'true'
-      - name: runtime
-        value: habana
-      - name: HABANA_VISIBLE_DEVICES
-        value: all
-    resources:
-      limits:
-        habana.ai/gaudi: 1
-
-  - name: tts-deploy
-    image: opea/tts:latest
-    replicas: 1
-    ports:
-      - containerPort: 9088
-
-  - name: speecht5-deploy
-    image: opea/speecht5-gaudi:latest
-    replicas: 1
-    ports:
-      - containerPort: 7055
-    env:
-      - name: OMPI_MCA_btl_vader_single_copy_mechanism
-        value: none
-      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-        value: 'true'
-      - name: runtime
-        value: habana
-      - name: HABANA_VISIBLE_DEVICES
-        value: all
-    resources:
-      limits:
-        habana.ai/gaudi: 1
-
-  - name: llm-deploy
-    image: opea/llm-tgi:latest
-    replicas: 1
-    ports:
-      - containerPort: 9000
-
-  - name: llm-dependency-deploy
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
-    replicas: 1
-    ports:
-      - containerPort: 80
-    resources:
-      limits:
-        habana.ai/gaudi: 1
-    args:
-      - name: "--model-id"
-        value: $(LLM_MODEL_ID)
-      - name: "--max-input-length"
-        value: "2048"
-      - name: "--max-total-tokens"
-        value: "4096"
-    env:
-      - name: OMPI_MCA_btl_vader_single_copy_mechanism
-        value: none
-      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-        value: "true"
-      - name: runtime
-        value: habana
-      - name: HABANA_VISIBLE_DEVICES
-        value: all
-      - name: ENABLE_HPU_GRAPH
-        value: 'true'
-      - name: LIMIT_HPU_GRAPH
-        value: 'true'
-      - name: USE_FLASH_ATTENTION
-        value: 'true'
-      - name: FLASH_ATTENTION_RECOMPUTE
-        value: 'true'
-    volumeMounts:
-      - mountPath: /data
-        name: model-volume
-      - mountPath: /dev/shm
-        name: shm
-    volumes:
-      - hostPath:
-          path: /mnt/models
-          type: Directory
-        name: model-volume
-      - emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
-        name: shm
-
-services:
-  - name: asr-svc
-    spec:
-      ports:
-        - name: service
-          port: 3001
-          targetPort: 9099
-      selector:
-        app: asr-deploy
-      type: ClusterIP
-
-  - name: whisper-svc
-    spec:
-      ports:
-        - name: service
-          port: 7066
-          targetPort: 7066
-      selector:
-        app: whisper-deploy
-      type: ClusterIP
-
-  - name: tts-svc
-    spec:
-      ports:
-        - name: service
-          port: 3002
-          targetPort: 9088
-      selector:
-        app: tts-deploy
-      type: ClusterIP
-
-  - name: speecht5-svc
-    spec:
-      ports:
-        - name: service
-          port: 7055
-          targetPort: 7055
-      selector:
-        app: speecht5-deploy
-      type: ClusterIP
-
-  - name: llm-dependency-svc
-    spec:
-      ports:
-        - name: service
-          port: 3006
-          targetPort: 80
-      selector:
-        app: llm-dependency-deploy
-      type: ClusterIP
-
-  - name: llm-svc
-    spec:
-      ports:
-        - name: service
-          port: 3007
-          targetPort: 9000
-      selector:
-        app: llm-deploy
-      type: ClusterIP
-
-  - name: audioqna-backend-server-svc
-    spec:
-      ports:
-        - name: service
-          port: 3088
-          targetPort: 8888
-          nodePort: 30666
-      selector:
-        app: audioqna-backend-server-deploy
-      type: NodePort
--- a/ChatQnA/benchmark/performance/helm_charts/deployment.py
+++ b/ChatQnA/benchmark/performance/helm_charts/deployment.py
@@ -59,7 +59,7 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
            else None
        ),
        {"name": "llm-dependency-deploy", "resources": {"limits": {"habana.ai/gaudi": 1}}},
-        {"name": "retriever-deploy", "resources": {"requests": {"cpu": "16", "memory": "8000Mi"}}},
+        {"name": "retriever-deploy", "resources": {"requests": {"cpu": "8", "memory": "8000Mi"}}},
    ]

    replicas = [replica for replica in replicas if replica]
@@ -72,7 +72,7 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
                {"name": "--model-id", "value": "$(LLM_MODEL_ID)"},
                {"name": "--max-input-length", "value": 1280},
                {"name": "--max-total-tokens", "value": 2048},
-                {"name": "--max-batch-total-tokens", "value": 35536},
+                {"name": "--max-batch-total-tokens", "value": 65536},
                {"name": "--max-batch-prefill-tokens", "value": 4096},
            ],
        },
--- a/FaqGen/benchmark/performance/helm_charts/.helmignore
+++ b/FaqGen/benchmark/performance/helm_charts/.helmignore
@@ -1,23 +0,0 @@
-# Patterns to ignore when building packages.
-# This supports shell glob matching, relative path matching, and
-# negation (prefixed with !). Only one pattern per line.
-.DS_Store
-# Common VCS dirs
-.git/
-.gitignore
-.bzr/
-.bzrignore
-.hg/
-.hgignore
-.svn/
-# Common backup files
-*.swp
-*.bak
-*.tmp
-*.orig
-*~
-# Various IDEs
-.project
-.idea/
-*.tmproj
-.vscode/
--- a/FaqGen/benchmark/performance/helm_charts/Chart.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/Chart.yaml
@@ -1,27 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v2
-name: faqgen-charts
-description: A Helm chart for Kubernetes
-
-# A chart can be either an 'application' or a 'library' chart.
-#
-# Application charts are a collection of templates that can be packaged into versioned archives
-# to be deployed.
-#
-# Library charts provide useful utilities or functions for the chart developer. They're included as
-# a dependency of application charts to inject those utilities and functions into the rendering
-# pipeline. Library charts do not define any templates and therefore cannot be deployed.
-type: application
-
-# This is the chart version. This version number should be incremented each time you make changes
-# to the chart and its templates, including the app version.
-# Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 1.0
-
-# This is the version number of the application being deployed. This version number should be
-# incremented each time you make changes to the application. Versions are not expected to
-# follow Semantic Versioning. They should reflect the version the application is using.
-# It is recommended to use it with quotes.
-appVersion: "1.16.0"
--- a/FaqGen/benchmark/performance/helm_charts/README.md
+++ b/FaqGen/benchmark/performance/helm_charts/README.md
@@ -1,25 +0,0 @@
-# Benchmarking Deployment
-
-This document guides you through deploying this example pipeline using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
-
-## Getting Started
-
-### Preparation
-
-```bash
-# on k8s-master node
-cd GenAIExamples/{example_name}/benchmark/performance/helm_charts
-
-# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
-# vim values.yaml
-HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
-```
-
-### Deployment
-
-```bash
-# Deploy the pipeline
-helm install {example_name} .
-```
-
-Note: Currently we only support the HPU version, because only HPU values.yaml is provided here.
--- a/FaqGen/benchmark/performance/helm_charts/customize.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/customize.yaml
@@ -1,23 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-podSpecs:
-  - name: faq-mega-server-deploy
-    replicas: 2
-    resources:
-      limits:
-        cpu: "8"
-        memory: "8000Mi"
-      requests:
-        cpu: "8"
-        memory: "8000Mi"
-
-
-  - name: faq-tgi-deploy
-    replicas: 7
-    resources:
-      limits:
-        habana.ai/gaudi: 1
-
-  - name: faq-micro-deploy
-    replicas: 1
--- a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
@@ -1,16 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ .Values.config.CONFIG_MAP_NAME }}
-  namespace: default
-data:
-  HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
-  LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
-  NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
-  TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
-  LLM_SERVICE_HOST_IP: faq-micro-svc
-  MEGA_SERVICE_HOST_IP: faq-mega-server-svc
---
--- a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
@@ -1,131 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-{{- $global := .Values }}
-{{- range $microservice := .Values.microservices }}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ $microservice.name }}
-  namespace: default
-spec:
-  {{- $replicas := $microservice.replicas }}
-  {{- range $podSpec := $global.podSpecs }}
-    {{- if eq $podSpec.name $microservice.name }}
-      {{- $replicas = $podSpec.replicas | default $microservice.replicas }}
-    {{- end }}
-  {{- end }}
-  replicas: {{ $replicas }}
-
-  selector:
-    matchLabels:
-      app: {{ $microservice.name }}
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: {{ $microservice.name }}
-    spec:
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: {{ $global.config.CONFIG_MAP_NAME }}
-        {{- if $microservice.args }}
-        args:
-        {{- range $arg := $microservice.args }}
-          {{- if $arg.name }}
-          - {{ $arg.name }}
-          {{- end }}
-          {{- if $arg.value }}
-          - "{{ $arg.value }}"
-          {{- end }}
-        {{- end }}
-        {{- end }}
-
-        {{- if $microservice.env }}
-        env:
-        {{- range $env := $microservice.env }}
-          - name: {{ $env.name }}
-            value: "{{ $env.value }}"
-        {{- end }}
-        {{- end }}
-
-        {{- $image := $microservice.image }}
-        {{- range $podSpec := $global.podSpecs }}
-          {{- if eq $podSpec.name $microservice.name }}
-            {{- $image = $podSpec.image | default $microservice.image }}
-          {{- end }}
-        {{- end }}
-        image: {{ $image }}
-
-        imagePullPolicy: IfNotPresent
-        name: {{ $microservice.name }}
-
-        {{- if $microservice.ports }}
-        ports:
-        {{- range $port := $microservice.ports }}
-          {{- range $port_name, $port_id := $port }}
-          - {{ $port_name }}: {{ $port_id }}
-          {{- end }}
-        {{- end }}
-        {{- end }}
-
-        {{- $resources := $microservice.resources }}
-        {{- range $podSpec := $global.podSpecs }}
-          {{- if eq $podSpec.name $microservice.name }}
-            {{- if $podSpec.resources }}
-              {{- $resources = $podSpec.resources }}
-            {{- end }}
-          {{- end }}
-        {{- end }}
-
-        {{- if $resources }}
-        resources:
-        {{- range $resourceType, $resource := $resources }}
-          {{ $resourceType }}:
-          {{- range $limitType, $limit := $resource }}
-            {{ $limitType }}: {{ $limit }}
-          {{- end }}
-        {{- end }}
-        {{- end }}
-
-        {{- if $microservice.volumeMounts }}
-        volumeMounts:
-        {{- range $volumeMount := $microservice.volumeMounts }}
-          - mountPath: {{ $volumeMount.mountPath }}
-            name: {{ $volumeMount.name }}
-        {{- end }}
-        {{- end }}
-
-      hostIPC: true
-      nodeSelector:
-        node-type: {{ $global.config.NODE_SELECTOR }}
-      serviceAccountName: default
-      topologySpreadConstraints:
-      - labelSelector:
-          matchLabels:
-            app: {{ $microservice.name }}
-        maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-
-
-      {{- if $microservice.volumes }}
-      volumes:
-      {{- range $index, $volume := $microservice.volumes }}
-        - name: {{ $volume.name }}
-          {{- if $volume.hostPath }}
-          hostPath:
-            path: {{ $volume.hostPath.path }}
-            type: {{ $volume.hostPath.type }}
-          {{- else if $volume.emptyDir }}
-          emptyDir:
-            medium: {{ $volume.emptyDir.medium }}
-            sizeLimit: {{ $volume.emptyDir.sizeLimit }}
-          {{- end }}
-      {{- end }}
-      {{- end }}
-
---
-{{- end }}
--- a/FaqGen/benchmark/performance/helm_charts/templates/service.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/service.yaml
@@ -1,24 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-{{- range $service := .Values.services }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ $service.name }}
-  namespace: default
-spec:
-  ports:
-  {{- range $port := $service.spec.ports }}
-    - name: {{ $port.name }}
-    {{- range $port_name, $port_id := $port }}
-      {{- if ne $port_name "name"}}
-      {{ $port_name }}: {{ $port_id }}
-      {{- end }}
-    {{- end }}
-  {{- end }}
-  selector:
-    app: {{ $service.spec.selector.app }}
-  type: {{ $service.spec.type }}
---
-{{- end }}
--- a/FaqGen/benchmark/performance/helm_charts/values.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/values.yaml
@@ -1,102 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-namespace: default
-
-config:
-  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
-  CONFIG_MAP_NAME: faq-config
-  NODE_SELECTOR: opea
-  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-
-microservices:
-  - name: faq-mega-server-deploy
-    image: opea/chatqna:latest
-    replicas: 1
-    ports:
-      - containerPort: 7777
-
-  - name: faq-micro-deploy
-    image: opea/llm-faqgen-tgi:latest
-    replicas: 1
-    ports:
-      - containerPort: 9000
-
-  - name: faq-tgi-deploy
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
-    replicas: 1
-    ports:
-      - containerPort: 80
-    resources:
-      limits:
-        habana.ai/gaudi: 1
-    args:
-      - name: "--model-id"
-        value: $(LLM_MODEL_ID)
-      - name: "--max-input-length"
-        value: "2048"
-      - name: "--max-total-tokens"
-        value: "4096"
-    env:
-      - name: OMPI_MCA_btl_vader_single_copy_mechanism
-        value: none
-      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-        value: "true"
-      - name: runtime
-        value: habana
-      - name: HABANA_VISIBLE_DEVICES
-        value: all
-      - name: ENABLE_HPU_GRAPH
-        value: 'true'
-      - name: LIMIT_HPU_GRAPH
-        value: 'true'
-      - name: USE_FLASH_ATTENTION
-        value: 'true'
-      - name: FLASH_ATTENTION_RECOMPUTE
-        value: 'true'
-    volumeMounts:
-      - mountPath: /data
-        name: model-volume
-      - mountPath: /dev/shm
-        name: shm
-    volumes:
-      - hostPath:
-          path: /mnt/models
-          type: Directory
-        name: model-volume
-      - emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
-        name: shm
-
-services:
-  - name: faq-micro-svc
-    spec:
-      ports:
-        - name: service
-          port: 9003
-          targetPort: 9000
-      selector:
-        app: faq-micro-deploy
-      type: ClusterIP
-
-  - name: faq-tgi-svc
-    spec:
-      ports:
-        - name: service
-          port: 8010
-          targetPort: 80
-      selector:
-        app: faq-tgi-deploy
-      type: ClusterIP
-
-  - name: faq-mega-server-svc
-    spec:
-      ports:
-        - name: service
-          port: 7779
-          targetPort: 7777
-          nodePort: 30779
-      selector:
-        app: faq-mega-server-deploy
-      type: NodePort
--- a/VisualQnA/benchmark/performance/helm_charts/.helmignore
+++ b/VisualQnA/benchmark/performance/helm_charts/.helmignore
@@ -1,23 +0,0 @@
-# Patterns to ignore when building packages.
-# This supports shell glob matching, relative path matching, and
-# negation (prefixed with !). Only one pattern per line.
-.DS_Store
-# Common VCS dirs
-.git/
-.gitignore
-.bzr/
-.bzrignore
-.hg/
-.hgignore
-.svn/
-# Common backup files
-*.swp
-*.bak
-*.tmp
-*.orig
-*~
-# Various IDEs
-.project
-.idea/
-*.tmproj
-.vscode/
--- a/VisualQnA/benchmark/performance/helm_charts/Chart.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/Chart.yaml
@@ -1,27 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v2
-name: visualqna-charts
-description: A Helm chart for Kubernetes
-
-# A chart can be either an 'application' or a 'library' chart.
-#
-# Application charts are a collection of templates that can be packaged into versioned archives
-# to be deployed.
-#
-# Library charts provide useful utilities or functions for the chart developer. They're included as
-# a dependency of application charts to inject those utilities and functions into the rendering
-# pipeline. Library charts do not define any templates and therefore cannot be deployed.
-type: application
-
-# This is the chart version. This version number should be incremented each time you make changes
-# to the chart and its templates, including the app version.
-# Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 1.0
-
-# This is the version number of the application being deployed. This version number should be
-# incremented each time you make changes to the application. Versions are not expected to
-# follow Semantic Versioning. They should reflect the version the application is using.
-# It is recommended to use it with quotes.
-appVersion: "1.16.0"
--- a/VisualQnA/benchmark/performance/helm_charts/README.md
+++ b/VisualQnA/benchmark/performance/helm_charts/README.md
@@ -1,25 +0,0 @@
-# Benchmarking Deployment
-
-This document guides you through deploying this example pipeline using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
-
-## Getting Started
-
-### Preparation
-
-```bash
-# on k8s-master node
-cd GenAIExamples/{example_name}/benchmark/performance/helm_charts
-
-# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
-# vim values.yaml
-HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
-```
-
-### Deployment
-
-```bash
-# Deploy the pipeline
-helm install {example_name} .
-```
-
-Note: Currently we only support the HPU version, because only HPU values.yaml is provided here.
--- a/VisualQnA/benchmark/performance/helm_charts/customize.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/customize.yaml
@@ -1,23 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-podSpecs:
-  - name: faq-mega-server-deploy
-    replicas: 2
-    resources:
-      limits:
-        cpu: "8"
-        memory: "8000Mi"
-      requests:
-        cpu: "8"
-        memory: "8000Mi"
-
-
-  - name: faq-tgi-deploy
-    replicas: 7
-    resources:
-      limits:
-        habana.ai/gaudi: 1
-
-  - name: faq-micro-deploy
-    replicas: 1
--- a/VisualQnA/benchmark/performance/helm_charts/templates/configmap.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/templates/configmap.yaml
@@ -1,24 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ .Values.config.CONFIG_MAP_NAME }}
-  namespace: default
-data:
-  HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
-  LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
-  NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
-
-  CONFIG_MAP_NAME: visualqna-config
-  NODE_SELECTOR: opea
-  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-
-  LVM_ENDPOINT: "http://visualqna-tgi"
-  MODEL_ID: "llava-hf/llava-v1.6-mistral-7b-hf"
-  LVM_SERVICE_HOST_IP: visualqna-lvm-uservice
-  PORT: "8399"
-  MAX_INPUT_TOKENS: "4096"
-  MAX_TOTAL_TOKENS: "8192"
---
--- a/VisualQnA/benchmark/performance/helm_charts/templates/deployment.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/templates/deployment.yaml
@@ -1,131 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-{{- $global := .Values }}
-{{- range $microservice := .Values.microservices }}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ $microservice.name }}
-  namespace: default
-spec:
-  {{- $replicas := $microservice.replicas }}
-  {{- range $podSpec := $global.podSpecs }}
-    {{- if eq $podSpec.name $microservice.name }}
-      {{- $replicas = $podSpec.replicas | default $microservice.replicas }}
-    {{- end }}
-  {{- end }}
-  replicas: {{ $replicas }}
-
-  selector:
-    matchLabels:
-      app: {{ $microservice.name }}
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: {{ $microservice.name }}
-    spec:
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: {{ $global.config.CONFIG_MAP_NAME }}
-        {{- if $microservice.args }}
-        args:
-        {{- range $arg := $microservice.args }}
-          {{- if $arg.name }}
-          - {{ $arg.name }}
-          {{- end }}
-          {{- if $arg.value }}
-          - "{{ $arg.value }}"
-          {{- end }}
-        {{- end }}
-        {{- end }}
-
-        {{- if $microservice.env }}
-        env:
-        {{- range $env := $microservice.env }}
-          - name: {{ $env.name }}
-            value: "{{ $env.value }}"
-        {{- end }}
-        {{- end }}
-
-        {{- $image := $microservice.image }}
-        {{- range $podSpec := $global.podSpecs }}
-          {{- if eq $podSpec.name $microservice.name }}
-            {{- $image = $podSpec.image | default $microservice.image }}
-          {{- end }}
-        {{- end }}
-        image: {{ $image }}
-
-        imagePullPolicy: IfNotPresent
-        name: {{ $microservice.name }}
-
-        {{- if $microservice.ports }}
-        ports:
-        {{- range $port := $microservice.ports }}
-          {{- range $port_name, $port_id := $port }}
-          - {{ $port_name }}: {{ $port_id }}
-          {{- end }}
-        {{- end }}
-        {{- end }}
-
-        {{- $resources := $microservice.resources }}
-        {{- range $podSpec := $global.podSpecs }}
-          {{- if eq $podSpec.name $microservice.name }}
-            {{- if $podSpec.resources }}
-              {{- $resources = $podSpec.resources }}
-            {{- end }}
-          {{- end }}
-        {{- end }}
-
-        {{- if $resources }}
-        resources:
-        {{- range $resourceType, $resource := $resources }}
-          {{ $resourceType }}:
-          {{- range $limitType, $limit := $resource }}
-            {{ $limitType }}: {{ $limit }}
-          {{- end }}
-        {{- end }}
-        {{- end }}
-
-        {{- if $microservice.volumeMounts }}
-        volumeMounts:
-        {{- range $volumeMount := $microservice.volumeMounts }}
-          - mountPath: {{ $volumeMount.mountPath }}
-            name: {{ $volumeMount.name }}
-        {{- end }}
-        {{- end }}
-
-      hostIPC: true
-      nodeSelector:
-        node-type: {{ $global.config.NODE_SELECTOR }}
-      serviceAccountName: default
-      topologySpreadConstraints:
-      - labelSelector:
-          matchLabels:
-            app: {{ $microservice.name }}
-        maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-
-
-      {{- if $microservice.volumes }}
-      volumes:
-      {{- range $index, $volume := $microservice.volumes }}
-        - name: {{ $volume.name }}
-          {{- if $volume.hostPath }}
-          hostPath:
-            path: {{ $volume.hostPath.path }}
-            type: {{ $volume.hostPath.type }}
-          {{- else if $volume.emptyDir }}
-          emptyDir:
-            medium: {{ $volume.emptyDir.medium }}
-            sizeLimit: {{ $volume.emptyDir.sizeLimit }}
-          {{- end }}
-      {{- end }}
-      {{- end }}
-
---
-{{- end }}
--- a/VisualQnA/benchmark/performance/helm_charts/templates/service.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/templates/service.yaml
@@ -1,24 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-{{- range $service := .Values.services }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ $service.name }}
-  namespace: default
-spec:
-  ports:
-  {{- range $port := $service.spec.ports }}
-    - name: {{ $port.name }}
-    {{- range $port_name, $port_id := $port }}
-      {{- if ne $port_name "name"}}
-      {{ $port_name }}: {{ $port_id }}
-      {{- end }}
-    {{- end }}
-  {{- end }}
-  selector:
-    app: {{ $service.spec.selector.app }}
-  type: {{ $service.spec.type }}
---
-{{- end }}
--- a/VisualQnA/benchmark/performance/helm_charts/values.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/values.yaml
@@ -1,84 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-namespace: default
-
-config:
-  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
-  CONFIG_MAP_NAME: visualqna-config
-  NODE_SELECTOR: opea
-  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-
-  LVM_ENDPOINT: "http://visualqna-tgi"
-  MODEL_ID: "llava-hf/llava-v1.6-mistral-7b-hf"
-  LVM_SERVICE_HOST_IP: visualqna-lvm-uservice
-  PORT: "8399"
-  MAX_INPUT_TOKENS: "4096"
-  MAX_TOTAL_TOKENS: "8192"
-
-microservices:
-  - name: visualqna-lvm-uservice
-    image: opea/lvm-tgi:latest
-    replicas: 1
-    ports:
-      - containerPort: 9399
-
-  - name: visualqna
-    image: opea/visualqna:latest
-    replicas: 1
-    ports:
-      - containerPort: 8399
-
-  - name: visualqna-tgi
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
-    replicas: 1
-    ports:
-      - containerPort: 8399
-    resources:
-      limits:
-        habana.ai/gaudi: 1
-    volumeMounts:
-      - mountPath: /data
-        name: model-volume
-      - mountPath: /dev/shm
-        name: shm
-    volumes:
-      - hostPath:
-          path: /mnt/models
-          type: Directory
-        name: model-volume
-      - emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
-        name: shm
-
-services:
-  - name: visualqna-lvm-uservice
-    spec:
-      ports:
-        - name: service
-          port: 9399
-          targetPort: 9399
-      selector:
-        app: visualqna-lvm-uservice
-      type: ClusterIP
-
-  - name: visualqna-tgi-service
-    spec:
-      ports:
-        - name: service
-          port: 80
-          targetPort: 8399
-      selector:
-        app: visualqna-tgi
-      type: ClusterIP
-
-  - name: visualqna-service
-    spec:
-      ports:
-        - name: service
-          port: 8888
-          targetPort: 8888
-      selector:
-        app: visualqna
-      type: ClusterIP