removed benchmark template

2024-10-23 09:30:03 +03:00
parent 2876677214
commit 1046aad26f
25 changed files with 2 additions and 1212 deletions
--- a/AudioQnA/benchmark/performance/helm_charts/.helmignore
+++ b/AudioQnA/benchmark/performance/helm_charts/.helmignore
@@ -1,23 +0,0 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 *.tmproj
 .vscode/
--- a/AudioQnA/benchmark/performance/helm_charts/Chart.yaml
+++ b/AudioQnA/benchmark/performance/helm_charts/Chart.yaml
@@ -1,27 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 apiVersion: v2
 name: audioqna-charts
 description: A Helm chart for Kubernetes
 # A chart can be either an 'application' or a 'library' chart.
 #
 # Application charts are a collection of templates that can be packaged into versioned archives
 # to be deployed.
 #
 # Library charts provide useful utilities or functions for the chart developer. They're included as
 # a dependency of application charts to inject those utilities and functions into the rendering
 # pipeline. Library charts do not define any templates and therefore cannot be deployed.
 type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
 version: 1.0
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
 appVersion: "1.16.0"
--- a/AudioQnA/benchmark/performance/helm_charts/README.md
+++ b/AudioQnA/benchmark/performance/helm_charts/README.md
@@ -1,25 +0,0 @@
 # Benchmarking Deployment
 This document guides you through deploying this example pipeline using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
 ## Getting Started
 ### Preparation
 ```bash
 # on k8s-master node
 cd GenAIExamples/{example_name}/benchmark/performance/helm_charts
 # Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
 # vim values.yaml
 HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
 ```
 ### Deployment
 ```bash
 # Deploy the pipeline
 helm install {example_name} .
 ```
 Note: Currently we only support the HPU version, because only HPU values.yaml is provided here.
--- a/AudioQnA/benchmark/performance/helm_charts/customize.yaml
+++ b/AudioQnA/benchmark/performance/helm_charts/customize.yaml
@@ -1,23 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 podSpecs:
  - name: audioqna-backend-server-deploy
    replicas: 1
  - name: asr-deploy
    replicas: 1
  - name: whisper-deploy
    replicas: 1
  - name: tts-deploy
    replicas: 1
  - name: speecht5-deploy
    replicas: 1
  - name: llm-dependency-deploy
    replicas: 1
--- a/AudioQnA/benchmark/performance/helm_charts/templates/configmap.yaml
+++ b/AudioQnA/benchmark/performance/helm_charts/templates/configmap.yaml
@@ -1,25 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: {{ .Values.config.CONFIG_MAP_NAME }}
  namespace: default
 data:
  HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }}
  LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
  NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
  TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
  ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
  TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
  MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
  ASR_SERVICE_HOST_IP: asr-svc
  ASR_SERVICE_PORT: "3001"
  LLM_SERVICE_HOST_IP: llm-svc
  LLM_SERVICE_PORT: "3007"
  TTS_SERVICE_HOST_IP: tts-svc
  TTS_SERVICE_PORT: "3002"
 ---
--- a/AudioQnA/benchmark/performance/helm_charts/templates/deployment.yaml
+++ b/AudioQnA/benchmark/performance/helm_charts/templates/deployment.yaml
@@ -1,131 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 {{- $global := .Values }}
 {{- range $microservice := .Values.microservices }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ $microservice.name }}
  namespace: default
 spec:
  {{- $replicas := $microservice.replicas }}
  {{- range $podSpec := $global.podSpecs }}
    {{- if eq $podSpec.name $microservice.name }}
      {{- $replicas = $podSpec.replicas | default $microservice.replicas }}
    {{- end }}
  {{- end }}
  replicas: {{ $replicas }}
  selector:
    matchLabels:
      app: {{ $microservice.name }}
  template:
    metadata:
      annotations:
        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
        app: {{ $microservice.name }}
    spec:
      containers:
      - envFrom:
        - configMapRef:
            name: {{ $global.config.CONFIG_MAP_NAME }}
        {{- if $microservice.args }}
        args:
        {{- range $arg := $microservice.args }}
          {{- if $arg.name }}
          - {{ $arg.name }}
          {{- end }}
          {{- if $arg.value }}
          - "{{ $arg.value }}"
          {{- end }}
        {{- end }}
        {{- end }}
        {{- if $microservice.env }}
        env:
        {{- range $env := $microservice.env }}
          - name: {{ $env.name }}
            value: "{{ $env.value }}"
        {{- end }}
        {{- end }}
        {{- $image := $microservice.image }}
        {{- range $podSpec := $global.podSpecs }}
          {{- if eq $podSpec.name $microservice.name }}
            {{- $image = $podSpec.image | default $microservice.image }}
          {{- end }}
        {{- end }}
        image: {{ $image }}
        imagePullPolicy: IfNotPresent
        name: {{ $microservice.name }}
        {{- if $microservice.ports }}
        ports:
        {{- range $port := $microservice.ports }}
          {{- range $port_name, $port_id := $port }}
          - {{ $port_name }}: {{ $port_id }}
          {{- end }}
        {{- end }}
        {{- end }}
        {{- $resources := $microservice.resources }}
        {{- range $podSpec := $global.podSpecs }}
          {{- if eq $podSpec.name $microservice.name }}
            {{- if $podSpec.resources }}
              {{- $resources = $podSpec.resources }}
            {{- end }}
          {{- end }}
        {{- end }}
        {{- if $resources }}
        resources:
        {{- range $resourceType, $resource := $resources }}
          {{ $resourceType }}:
          {{- range $limitType, $limit := $resource }}
            {{ $limitType }}: {{ $limit }}
          {{- end }}
        {{- end }}
        {{- end }}
        {{- if $microservice.volumeMounts }}
        volumeMounts:
        {{- range $volumeMount := $microservice.volumeMounts }}
          - mountPath: {{ $volumeMount.mountPath }}
            name: {{ $volumeMount.name }}
        {{- end }}
        {{- end }}
      hostIPC: true
      nodeSelector:
        node-type: {{ $global.config.NODE_SELECTOR }}
      serviceAccountName: default
      topologySpreadConstraints:
      - labelSelector:
          matchLabels:
            app: {{ $microservice.name }}
        maxSkew: 1
        topologyKey: kubernetes.io/hostname
        whenUnsatisfiable: ScheduleAnyway
      {{- if $microservice.volumes }}
      volumes:
      {{- range $index, $volume := $microservice.volumes }}
        - name: {{ $volume.name }}
          {{- if $volume.hostPath }}
          hostPath:
            path: {{ $volume.hostPath.path }}
            type: {{ $volume.hostPath.type }}
          {{- else if $volume.emptyDir }}
          emptyDir:
            medium: {{ $volume.emptyDir.medium }}
            sizeLimit: {{ $volume.emptyDir.sizeLimit }}
          {{- end }}
      {{- end }}
      {{- end }}
 ---
 {{- end }}
--- a/AudioQnA/benchmark/performance/helm_charts/templates/service.yaml
+++ b/AudioQnA/benchmark/performance/helm_charts/templates/service.yaml
@@ -1,24 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 {{- range $service := .Values.services }}
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ $service.name }}
  namespace: default
 spec:
  ports:
  {{- range $port := $service.spec.ports }}
    - name: {{ $port.name }}
    {{- range $port_name, $port_id := $port }}
      {{- if ne $port_name "name"}}
      {{ $port_name }}: {{ $port_id }}
      {{- end }}
    {{- end }}
  {{- end }}
  selector:
    app: {{ $service.spec.selector.app }}
  type: {{ $service.spec.type }}
 ---
 {{- end }}
--- a/AudioQnA/benchmark/performance/helm_charts/values.yaml
+++ b/AudioQnA/benchmark/performance/helm_charts/values.yaml
@@ -1,200 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 namespace: default
 config:
  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
  CONFIG_MAP_NAME: audio-qna-config
  NODE_SELECTOR: opea
  ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
  TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
  MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
  ASR_SERVICE_HOST_IP: asr-svc
  ASR_SERVICE_PORT: "3001"
  LLM_SERVICE_HOST_IP: llm-svc
  LLM_SERVICE_PORT: "3007"
  TTS_SERVICE_HOST_IP: tts-svc
  TTS_SERVICE_PORT: "3002"
  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
 microservices:
  - name: audioqna-backend-server-deploy
    image: opea/audioqna:latest
    replicas: 1
    ports:
      - containerPort: 8888
  - name: asr-deploy
    image: opea/asr:latest
    replicas: 1
    ports:
      - containerPort: 9099
  - name: whisper-deploy
    image: opea/whisper-gaudi:latest
    replicas: 1
    ports:
      - containerPort: 7066
    env:
      - name: OMPI_MCA_btl_vader_single_copy_mechanism
        value: none
      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
        value: 'true'
      - name: runtime
        value: habana
      - name: HABANA_VISIBLE_DEVICES
        value: all
    resources:
      limits:
        habana.ai/gaudi: 1
  - name: tts-deploy
    image: opea/tts:latest
    replicas: 1
    ports:
      - containerPort: 9088
  - name: speecht5-deploy
    image: opea/speecht5-gaudi:latest
    replicas: 1
    ports:
      - containerPort: 7055
    env:
      - name: OMPI_MCA_btl_vader_single_copy_mechanism
        value: none
      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
        value: 'true'
      - name: runtime
        value: habana
      - name: HABANA_VISIBLE_DEVICES
        value: all
    resources:
      limits:
        habana.ai/gaudi: 1
  - name: llm-deploy
    image: opea/llm-tgi:latest
    replicas: 1
    ports:
      - containerPort: 9000
  - name: llm-dependency-deploy
    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
    replicas: 1
    ports:
      - containerPort: 80
    resources:
      limits:
        habana.ai/gaudi: 1
    args:
      - name: "--model-id"
        value: $(LLM_MODEL_ID)
      - name: "--max-input-length"
        value: "2048"
      - name: "--max-total-tokens"
        value: "4096"
    env:
      - name: OMPI_MCA_btl_vader_single_copy_mechanism
        value: none
      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
        value: "true"
      - name: runtime
        value: habana
      - name: HABANA_VISIBLE_DEVICES
        value: all
      - name: ENABLE_HPU_GRAPH
        value: 'true'
      - name: LIMIT_HPU_GRAPH
        value: 'true'
      - name: USE_FLASH_ATTENTION
        value: 'true'
      - name: FLASH_ATTENTION_RECOMPUTE
        value: 'true'
    volumeMounts:
      - mountPath: /data
        name: model-volume
      - mountPath: /dev/shm
        name: shm
    volumes:
      - hostPath:
          path: /mnt/models
          type: Directory
        name: model-volume
      - emptyDir:
          medium: Memory
          sizeLimit: 1Gi
        name: shm
 services:
  - name: asr-svc
    spec:
      ports:
        - name: service
          port: 3001
          targetPort: 9099
      selector:
        app: asr-deploy
      type: ClusterIP
  - name: whisper-svc
    spec:
      ports:
        - name: service
          port: 7066
          targetPort: 7066
      selector:
        app: whisper-deploy
      type: ClusterIP
  - name: tts-svc
    spec:
      ports:
        - name: service
          port: 3002
          targetPort: 9088
      selector:
        app: tts-deploy
      type: ClusterIP
  - name: speecht5-svc
    spec:
      ports:
        - name: service
          port: 7055
          targetPort: 7055
      selector:
        app: speecht5-deploy
      type: ClusterIP
  - name: llm-dependency-svc
    spec:
      ports:
        - name: service
          port: 3006
          targetPort: 80
      selector:
        app: llm-dependency-deploy
      type: ClusterIP
  - name: llm-svc
    spec:
      ports:
        - name: service
          port: 3007
          targetPort: 9000
      selector:
        app: llm-deploy
      type: ClusterIP
  - name: audioqna-backend-server-svc
    spec:
      ports:
        - name: service
          port: 3088
          targetPort: 8888
          nodePort: 30666
      selector:
        app: audioqna-backend-server-deploy
      type: NodePort
--- a/ChatQnA/benchmark/performance/helm_charts/deployment.py
+++ b/ChatQnA/benchmark/performance/helm_charts/deployment.py
@@ -59,7 +59,7 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
            else None
        ),
        {"name": "llm-dependency-deploy", "resources": {"limits": {"habana.ai/gaudi": 1}}},
-        {"name": "retriever-deploy", "resources": {"requests": {"cpu": "16", "memory": "8000Mi"}}},
+        {"name": "retriever-deploy", "resources": {"requests": {"cpu": "8", "memory": "8000Mi"}}},
    ]
    replicas = [replica for replica in replicas if replica]
@@ -72,7 +72,7 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
                {"name": "--model-id", "value": "$(LLM_MODEL_ID)"},
                {"name": "--max-input-length", "value": 1280},
                {"name": "--max-total-tokens", "value": 2048},
-                {"name": "--max-batch-total-tokens", "value": 35536},
+                {"name": "--max-batch-total-tokens", "value": 65536},
                {"name": "--max-batch-prefill-tokens", "value": 4096},
            ],
        },
--- a/FaqGen/benchmark/performance/helm_charts/.helmignore
+++ b/FaqGen/benchmark/performance/helm_charts/.helmignore
@@ -1,23 +0,0 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 *.tmproj
 .vscode/
--- a/FaqGen/benchmark/performance/helm_charts/Chart.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/Chart.yaml
@@ -1,27 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 apiVersion: v2
 name: faqgen-charts
 description: A Helm chart for Kubernetes
 # A chart can be either an 'application' or a 'library' chart.
 #
 # Application charts are a collection of templates that can be packaged into versioned archives
 # to be deployed.
 #
 # Library charts provide useful utilities or functions for the chart developer. They're included as
 # a dependency of application charts to inject those utilities and functions into the rendering
 # pipeline. Library charts do not define any templates and therefore cannot be deployed.
 type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
 version: 1.0
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
 appVersion: "1.16.0"
--- a/FaqGen/benchmark/performance/helm_charts/README.md
+++ b/FaqGen/benchmark/performance/helm_charts/README.md
@@ -1,25 +0,0 @@
 # Benchmarking Deployment
 This document guides you through deploying this example pipeline using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
 ## Getting Started
 ### Preparation
 ```bash
 # on k8s-master node
 cd GenAIExamples/{example_name}/benchmark/performance/helm_charts
 # Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
 # vim values.yaml
 HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
 ```
 ### Deployment
 ```bash
 # Deploy the pipeline
 helm install {example_name} .
 ```
 Note: Currently we only support the HPU version, because only HPU values.yaml is provided here.
--- a/FaqGen/benchmark/performance/helm_charts/customize.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/customize.yaml
@@ -1,23 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 podSpecs:
  - name: faq-mega-server-deploy
    replicas: 2
    resources:
      limits:
        cpu: "8"
        memory: "8000Mi"
      requests:
        cpu: "8"
        memory: "8000Mi"
  - name: faq-tgi-deploy
    replicas: 7
    resources:
      limits:
        habana.ai/gaudi: 1
  - name: faq-micro-deploy
    replicas: 1
--- a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
@@ -1,16 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: {{ .Values.config.CONFIG_MAP_NAME }}
  namespace: default
 data:
  HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
  LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
  NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
  TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
  LLM_SERVICE_HOST_IP: faq-micro-svc
  MEGA_SERVICE_HOST_IP: faq-mega-server-svc
 ---
--- a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
@@ -1,131 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 {{- $global := .Values }}
 {{- range $microservice := .Values.microservices }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ $microservice.name }}
  namespace: default
 spec:
  {{- $replicas := $microservice.replicas }}
  {{- range $podSpec := $global.podSpecs }}
    {{- if eq $podSpec.name $microservice.name }}
      {{- $replicas = $podSpec.replicas | default $microservice.replicas }}
    {{- end }}
  {{- end }}
  replicas: {{ $replicas }}
  selector:
    matchLabels:
      app: {{ $microservice.name }}
  template:
    metadata:
      annotations:
        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
        app: {{ $microservice.name }}
    spec:
      containers:
      - envFrom:
        - configMapRef:
            name: {{ $global.config.CONFIG_MAP_NAME }}
        {{- if $microservice.args }}
        args:
        {{- range $arg := $microservice.args }}
          {{- if $arg.name }}
          - {{ $arg.name }}
          {{- end }}
          {{- if $arg.value }}
          - "{{ $arg.value }}"
          {{- end }}
        {{- end }}
        {{- end }}
        {{- if $microservice.env }}
        env:
        {{- range $env := $microservice.env }}
          - name: {{ $env.name }}
            value: "{{ $env.value }}"
        {{- end }}
        {{- end }}
        {{- $image := $microservice.image }}
        {{- range $podSpec := $global.podSpecs }}
          {{- if eq $podSpec.name $microservice.name }}
            {{- $image = $podSpec.image | default $microservice.image }}
          {{- end }}
        {{- end }}
        image: {{ $image }}
        imagePullPolicy: IfNotPresent
        name: {{ $microservice.name }}
        {{- if $microservice.ports }}
        ports:
        {{- range $port := $microservice.ports }}
          {{- range $port_name, $port_id := $port }}
          - {{ $port_name }}: {{ $port_id }}
          {{- end }}
        {{- end }}
        {{- end }}
        {{- $resources := $microservice.resources }}
        {{- range $podSpec := $global.podSpecs }}
          {{- if eq $podSpec.name $microservice.name }}
            {{- if $podSpec.resources }}
              {{- $resources = $podSpec.resources }}
            {{- end }}
          {{- end }}
        {{- end }}
        {{- if $resources }}
        resources:
        {{- range $resourceType, $resource := $resources }}
          {{ $resourceType }}:
          {{- range $limitType, $limit := $resource }}
            {{ $limitType }}: {{ $limit }}
          {{- end }}
        {{- end }}
        {{- end }}
        {{- if $microservice.volumeMounts }}
        volumeMounts:
        {{- range $volumeMount := $microservice.volumeMounts }}
          - mountPath: {{ $volumeMount.mountPath }}
            name: {{ $volumeMount.name }}
        {{- end }}
        {{- end }}
      hostIPC: true
      nodeSelector:
        node-type: {{ $global.config.NODE_SELECTOR }}
      serviceAccountName: default
      topologySpreadConstraints:
      - labelSelector:
          matchLabels:
            app: {{ $microservice.name }}
        maxSkew: 1
        topologyKey: kubernetes.io/hostname
        whenUnsatisfiable: ScheduleAnyway
      {{- if $microservice.volumes }}
      volumes:
      {{- range $index, $volume := $microservice.volumes }}
        - name: {{ $volume.name }}
          {{- if $volume.hostPath }}
          hostPath:
            path: {{ $volume.hostPath.path }}
            type: {{ $volume.hostPath.type }}
          {{- else if $volume.emptyDir }}
          emptyDir:
            medium: {{ $volume.emptyDir.medium }}
            sizeLimit: {{ $volume.emptyDir.sizeLimit }}
          {{- end }}
      {{- end }}
      {{- end }}
 ---
 {{- end }}
--- a/FaqGen/benchmark/performance/helm_charts/templates/service.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/service.yaml
@@ -1,24 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 {{- range $service := .Values.services }}
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ $service.name }}
  namespace: default
 spec:
  ports:
  {{- range $port := $service.spec.ports }}
    - name: {{ $port.name }}
    {{- range $port_name, $port_id := $port }}
      {{- if ne $port_name "name"}}
      {{ $port_name }}: {{ $port_id }}
      {{- end }}
    {{- end }}
  {{- end }}
  selector:
    app: {{ $service.spec.selector.app }}
  type: {{ $service.spec.type }}
 ---
 {{- end }}
--- a/FaqGen/benchmark/performance/helm_charts/values.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/values.yaml
@@ -1,102 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 namespace: default
 config:
  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
  CONFIG_MAP_NAME: faq-config
  NODE_SELECTOR: opea
  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
 microservices:
  - name: faq-mega-server-deploy
    image: opea/chatqna:latest
    replicas: 1
    ports:
      - containerPort: 7777
  - name: faq-micro-deploy
    image: opea/llm-faqgen-tgi:latest
    replicas: 1
    ports:
      - containerPort: 9000
  - name: faq-tgi-deploy
    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
    replicas: 1
    ports:
      - containerPort: 80
    resources:
      limits:
        habana.ai/gaudi: 1
    args:
      - name: "--model-id"
        value: $(LLM_MODEL_ID)
      - name: "--max-input-length"
        value: "2048"
      - name: "--max-total-tokens"
        value: "4096"
    env:
      - name: OMPI_MCA_btl_vader_single_copy_mechanism
        value: none
      - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
        value: "true"
      - name: runtime
        value: habana
      - name: HABANA_VISIBLE_DEVICES
        value: all
      - name: ENABLE_HPU_GRAPH
        value: 'true'
      - name: LIMIT_HPU_GRAPH
        value: 'true'
      - name: USE_FLASH_ATTENTION
        value: 'true'
      - name: FLASH_ATTENTION_RECOMPUTE
        value: 'true'
    volumeMounts:
      - mountPath: /data
        name: model-volume
      - mountPath: /dev/shm
        name: shm
    volumes:
      - hostPath:
          path: /mnt/models
          type: Directory
        name: model-volume
      - emptyDir:
          medium: Memory
          sizeLimit: 1Gi
        name: shm
 services:
  - name: faq-micro-svc
    spec:
      ports:
        - name: service
          port: 9003
          targetPort: 9000
      selector:
        app: faq-micro-deploy
      type: ClusterIP
  - name: faq-tgi-svc
    spec:
      ports:
        - name: service
          port: 8010
          targetPort: 80
      selector:
        app: faq-tgi-deploy
      type: ClusterIP
  - name: faq-mega-server-svc
    spec:
      ports:
        - name: service
          port: 7779
          targetPort: 7777
          nodePort: 30779
      selector:
        app: faq-mega-server-deploy
      type: NodePort
--- a/VisualQnA/benchmark/performance/helm_charts/.helmignore
+++ b/VisualQnA/benchmark/performance/helm_charts/.helmignore
@@ -1,23 +0,0 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 *.tmproj
 .vscode/
--- a/VisualQnA/benchmark/performance/helm_charts/Chart.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/Chart.yaml
@@ -1,27 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 apiVersion: v2
 name: visualqna-charts
 description: A Helm chart for Kubernetes
 # A chart can be either an 'application' or a 'library' chart.
 #
 # Application charts are a collection of templates that can be packaged into versioned archives
 # to be deployed.
 #
 # Library charts provide useful utilities or functions for the chart developer. They're included as
 # a dependency of application charts to inject those utilities and functions into the rendering
 # pipeline. Library charts do not define any templates and therefore cannot be deployed.
 type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
 version: 1.0
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
 appVersion: "1.16.0"
--- a/VisualQnA/benchmark/performance/helm_charts/README.md
+++ b/VisualQnA/benchmark/performance/helm_charts/README.md
@@ -1,25 +0,0 @@
 # Benchmarking Deployment
 This document guides you through deploying this example pipeline using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
 ## Getting Started
 ### Preparation
 ```bash
 # on k8s-master node
 cd GenAIExamples/{example_name}/benchmark/performance/helm_charts
 # Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
 # vim values.yaml
 HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
 ```
 ### Deployment
 ```bash
 # Deploy the pipeline
 helm install {example_name} .
 ```
 Note: Currently we only support the HPU version, because only HPU values.yaml is provided here.
--- a/VisualQnA/benchmark/performance/helm_charts/customize.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/customize.yaml
@@ -1,23 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 podSpecs:
  - name: faq-mega-server-deploy
    replicas: 2
    resources:
      limits:
        cpu: "8"
        memory: "8000Mi"
      requests:
        cpu: "8"
        memory: "8000Mi"
  - name: faq-tgi-deploy
    replicas: 7
    resources:
      limits:
        habana.ai/gaudi: 1
  - name: faq-micro-deploy
    replicas: 1
--- a/VisualQnA/benchmark/performance/helm_charts/templates/configmap.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/templates/configmap.yaml
@@ -1,24 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: {{ .Values.config.CONFIG_MAP_NAME }}
  namespace: default
 data:
  HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
  LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
  NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
  CONFIG_MAP_NAME: visualqna-config
  NODE_SELECTOR: opea
  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
  LVM_ENDPOINT: "http://visualqna-tgi"
  MODEL_ID: "llava-hf/llava-v1.6-mistral-7b-hf"
  LVM_SERVICE_HOST_IP: visualqna-lvm-uservice
  PORT: "8399"
  MAX_INPUT_TOKENS: "4096"
  MAX_TOTAL_TOKENS: "8192"
 ---
--- a/VisualQnA/benchmark/performance/helm_charts/templates/deployment.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/templates/deployment.yaml
@@ -1,131 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 {{- $global := .Values }}
 {{- range $microservice := .Values.microservices }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ $microservice.name }}
  namespace: default
 spec:
  {{- $replicas := $microservice.replicas }}
  {{- range $podSpec := $global.podSpecs }}
    {{- if eq $podSpec.name $microservice.name }}
      {{- $replicas = $podSpec.replicas | default $microservice.replicas }}
    {{- end }}
  {{- end }}
  replicas: {{ $replicas }}
  selector:
    matchLabels:
      app: {{ $microservice.name }}
  template:
    metadata:
      annotations:
        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
        app: {{ $microservice.name }}
    spec:
      containers:
      - envFrom:
        - configMapRef:
            name: {{ $global.config.CONFIG_MAP_NAME }}
        {{- if $microservice.args }}
        args:
        {{- range $arg := $microservice.args }}
          {{- if $arg.name }}
          - {{ $arg.name }}
          {{- end }}
          {{- if $arg.value }}
          - "{{ $arg.value }}"
          {{- end }}
        {{- end }}
        {{- end }}
        {{- if $microservice.env }}
        env:
        {{- range $env := $microservice.env }}
          - name: {{ $env.name }}
            value: "{{ $env.value }}"
        {{- end }}
        {{- end }}
        {{- $image := $microservice.image }}
        {{- range $podSpec := $global.podSpecs }}
          {{- if eq $podSpec.name $microservice.name }}
            {{- $image = $podSpec.image | default $microservice.image }}
          {{- end }}
        {{- end }}
        image: {{ $image }}
        imagePullPolicy: IfNotPresent
        name: {{ $microservice.name }}
        {{- if $microservice.ports }}
        ports:
        {{- range $port := $microservice.ports }}
          {{- range $port_name, $port_id := $port }}
          - {{ $port_name }}: {{ $port_id }}
          {{- end }}
        {{- end }}
        {{- end }}
        {{- $resources := $microservice.resources }}
        {{- range $podSpec := $global.podSpecs }}
          {{- if eq $podSpec.name $microservice.name }}
            {{- if $podSpec.resources }}
              {{- $resources = $podSpec.resources }}
            {{- end }}
          {{- end }}
        {{- end }}
        {{- if $resources }}
        resources:
        {{- range $resourceType, $resource := $resources }}
          {{ $resourceType }}:
          {{- range $limitType, $limit := $resource }}
            {{ $limitType }}: {{ $limit }}
          {{- end }}
        {{- end }}
        {{- end }}
        {{- if $microservice.volumeMounts }}
        volumeMounts:
        {{- range $volumeMount := $microservice.volumeMounts }}
          - mountPath: {{ $volumeMount.mountPath }}
            name: {{ $volumeMount.name }}
        {{- end }}
        {{- end }}
      hostIPC: true
      nodeSelector:
        node-type: {{ $global.config.NODE_SELECTOR }}
      serviceAccountName: default
      topologySpreadConstraints:
      - labelSelector:
          matchLabels:
            app: {{ $microservice.name }}
        maxSkew: 1
        topologyKey: kubernetes.io/hostname
        whenUnsatisfiable: ScheduleAnyway
      {{- if $microservice.volumes }}
      volumes:
      {{- range $index, $volume := $microservice.volumes }}
        - name: {{ $volume.name }}
          {{- if $volume.hostPath }}
          hostPath:
            path: {{ $volume.hostPath.path }}
            type: {{ $volume.hostPath.type }}
          {{- else if $volume.emptyDir }}
          emptyDir:
            medium: {{ $volume.emptyDir.medium }}
            sizeLimit: {{ $volume.emptyDir.sizeLimit }}
          {{- end }}
      {{- end }}
      {{- end }}
 ---
 {{- end }}
--- a/VisualQnA/benchmark/performance/helm_charts/templates/service.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/templates/service.yaml
@@ -1,24 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 {{- range $service := .Values.services }}
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ $service.name }}
  namespace: default
 spec:
  ports:
  {{- range $port := $service.spec.ports }}
    - name: {{ $port.name }}
    {{- range $port_name, $port_id := $port }}
      {{- if ne $port_name "name"}}
      {{ $port_name }}: {{ $port_id }}
      {{- end }}
    {{- end }}
  {{- end }}
  selector:
    app: {{ $service.spec.selector.app }}
  type: {{ $service.spec.type }}
 ---
 {{- end }}
--- a/VisualQnA/benchmark/performance/helm_charts/values.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/values.yaml
@@ -1,84 +0,0 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 namespace: default
 config:
  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
  CONFIG_MAP_NAME: visualqna-config
  NODE_SELECTOR: opea
  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
  LVM_ENDPOINT: "http://visualqna-tgi"
  MODEL_ID: "llava-hf/llava-v1.6-mistral-7b-hf"
  LVM_SERVICE_HOST_IP: visualqna-lvm-uservice
  PORT: "8399"
  MAX_INPUT_TOKENS: "4096"
  MAX_TOTAL_TOKENS: "8192"
 microservices:
  - name: visualqna-lvm-uservice
    image: opea/lvm-tgi:latest
    replicas: 1
    ports:
      - containerPort: 9399
  - name: visualqna
    image: opea/visualqna:latest
    replicas: 1
    ports:
      - containerPort: 8399
  - name: visualqna-tgi
    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
    replicas: 1
    ports:
      - containerPort: 8399
    resources:
      limits:
        habana.ai/gaudi: 1
    volumeMounts:
      - mountPath: /data
        name: model-volume
      - mountPath: /dev/shm
        name: shm
    volumes:
      - hostPath:
          path: /mnt/models
          type: Directory
        name: model-volume
      - emptyDir:
          medium: Memory
          sizeLimit: 1Gi
        name: shm
 services:
  - name: visualqna-lvm-uservice
    spec:
      ports:
        - name: service
          port: 9399
          targetPort: 9399
      selector:
        app: visualqna-lvm-uservice
      type: ClusterIP
  - name: visualqna-tgi-service
    spec:
      ports:
        - name: service
          port: 80
          targetPort: 8399
      selector:
        app: visualqna-tgi
      type: ClusterIP
  - name: visualqna-service
    spec:
      ports:
        - name: service
          port: 8888
          targetPort: 8888
      selector:
        app: visualqna
      type: ClusterIP