From bb46f5b3554580be8da0ce2e232a6f7d646feeb2 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhongxu@zhenzhongxu-vm-u22.habana-labs.com>
Date: Tue, 22 Oct 2024 05:45:00 +0300
Subject: [PATCH] added visual qna & update deployment template

---
 .../helm_charts/templates/deployment.yaml     |   9 +-
 .../helm_charts/hpu_with_rerank.yaml          |   3 +
 .../helm_charts/templates/deployment.yaml     |   9 +-
 .../performance/helm_charts/.helmignore       |  23 +++
 .../performance/helm_charts/Chart.yaml        |  27 ++++
 .../performance/helm_charts/README.md         |  36 +++++
 .../performance/helm_charts/customize.yaml    |  23 +++
 .../helm_charts/templates/configmap.yaml      |  25 ++++
 .../helm_charts/templates/deployment.yaml     | 131 ++++++++++++++++++
 .../helm_charts/templates/service.yaml        |  24 ++++
 .../performance/helm_charts/values.yaml       |  84 +++++++++++
 11 files changed, 392 insertions(+), 2 deletions(-)
 create mode 100644 VisualQnA/benchmark/performance/helm_charts/.helmignore
 create mode 100644 VisualQnA/benchmark/performance/helm_charts/Chart.yaml
 create mode 100644 VisualQnA/benchmark/performance/helm_charts/README.md
 create mode 100644 VisualQnA/benchmark/performance/helm_charts/customize.yaml
 create mode 100644 VisualQnA/benchmark/performance/helm_charts/templates/configmap.yaml
 create mode 100644 VisualQnA/benchmark/performance/helm_charts/templates/deployment.yaml
 create mode 100644 VisualQnA/benchmark/performance/helm_charts/templates/service.yaml
 create mode 100644 VisualQnA/benchmark/performance/helm_charts/values.yaml

diff --git a/AudioQnA/benchmark/helm_charts/templates/deployment.yaml b/AudioQnA/benchmark/helm_charts/templates/deployment.yaml
index 920e54685..4fe4fc581 100644
--- a/AudioQnA/benchmark/helm_charts/templates/deployment.yaml
+++ b/AudioQnA/benchmark/helm_charts/templates/deployment.yaml
@@ -51,7 +51,14 @@ spec:
         {{- end }}
         {{- end }}
 
-        image: {{ $microservice.image }}
+        {{- $image := $microservice.image }}
+        {{- range $podSpec := $global.podSpecs }}
+          {{- if eq $podSpec.name $microservice.name }}
+            {{- $image = $podSpec.image | default $microservice.image }}
+          {{- end }}
+        {{- end }}
+        image: {{ $image }}
+
         imagePullPolicy: IfNotPresent
         name: {{ $microservice.name }}
 
diff --git a/ChatQnA/benchmark/performance/helm_charts/hpu_with_rerank.yaml b/ChatQnA/benchmark/performance/helm_charts/hpu_with_rerank.yaml
index 5cd34f578..916e1dff0 100644
--- a/ChatQnA/benchmark/performance/helm_charts/hpu_with_rerank.yaml
+++ b/ChatQnA/benchmark/performance/helm_charts/hpu_with_rerank.yaml
@@ -64,6 +64,9 @@ microservices:
   - name: reranking-dependency-deploy
     image: opea/tei-gaudi:latest
     replicas: 1
+    resources:
+      limits:
+        habana.ai/gaudi: 1
     args:
       - name: "--model-id"
       - value: $(RERANK_MODEL_ID)
diff --git a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
index 920e54685..4fe4fc581 100644
--- a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
@@ -51,7 +51,14 @@ spec:
         {{- end }}
         {{- end }}
 
-        image: {{ $microservice.image }}
+        {{- $image := $microservice.image }}
+        {{- range $podSpec := $global.podSpecs }}
+          {{- if eq $podSpec.name $microservice.name }}
+            {{- $image = $podSpec.image | default $microservice.image }}
+          {{- end }}
+        {{- end }}
+        image: {{ $image }}
+
         imagePullPolicy: IfNotPresent
         name: {{ $microservice.name }}
 
diff --git a/VisualQnA/benchmark/performance/helm_charts/.helmignore b/VisualQnA/benchmark/performance/helm_charts/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/VisualQnA/benchmark/performance/helm_charts/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/VisualQnA/benchmark/performance/helm_charts/Chart.yaml b/VisualQnA/benchmark/performance/helm_charts/Chart.yaml
new file mode 100644
index 000000000..51f94d087
--- /dev/null
+++ b/VisualQnA/benchmark/performance/helm_charts/Chart.yaml
@@ -0,0 +1,27 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: chatqna-charts
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
diff --git a/VisualQnA/benchmark/performance/helm_charts/README.md b/VisualQnA/benchmark/performance/helm_charts/README.md
new file mode 100644
index 000000000..f6df9ce4f
--- /dev/null
+++ b/VisualQnA/benchmark/performance/helm_charts/README.md
@@ -0,0 +1,36 @@
+# ChatQnA Deployment
+
+This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
+
+## Getting Started
+
+### Preparation
+
+```bash
+# on k8s-master node
+cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
+
+# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
+# vim customize.yaml
+HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
+```
+
+### Deploy your ChatQnA
+
+```bash
+# Deploy a ChatQnA pipeline using the specified YAML configuration.
+# To deploy with different configurations, simply provide a different YAML file.
+helm install chatqna helm_charts/ -f customize.yaml
+```
+
+Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
+
+## Customize your own ChatQnA pipelines. (Optional)
+
+There are two yaml configs you can specify.
+
+- customize.yaml
+  This file can specify image names, the number of replicas and CPU cores to manage your pods.
+
+- values.yaml
+  This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes.
diff --git a/VisualQnA/benchmark/performance/helm_charts/customize.yaml b/VisualQnA/benchmark/performance/helm_charts/customize.yaml
new file mode 100644
index 000000000..01388e66a
--- /dev/null
+++ b/VisualQnA/benchmark/performance/helm_charts/customize.yaml
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+podSpecs:
+  - name: faq-mega-server-deploy
+    replicas: 2
+    resources:
+      limits:
+        cpu: "8"
+        memory: "8000Mi"
+      requests:
+        cpu: "8"
+        memory: "8000Mi"
+
+
+  - name: faq-tgi-deploy
+    replicas: 7
+    resources:
+      limits:
+        habana.ai/gaudi: 1
+
+  - name: faq-micro-deploy
+    replicas: 1
diff --git a/VisualQnA/benchmark/performance/helm_charts/templates/configmap.yaml b/VisualQnA/benchmark/performance/helm_charts/templates/configmap.yaml
new file mode 100644
index 000000000..9cc879ee3
--- /dev/null
+++ b/VisualQnA/benchmark/performance/helm_charts/templates/configmap.yaml
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Values.config.CONFIG_MAP_NAME }}
+  namespace: default
+data:
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
+  LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
+  NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
+
+  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
+  CONFIG_MAP_NAME: visualqna-config
+  NODE_SELECTOR: opea
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+
+  LVM_ENDPOINT: "http://visualqna-tgi"
+  MODEL_ID: "llava-hf/llava-v1.6-mistral-7b-hf"
+  LVM_SERVICE_HOST_IP: visualqna-lvm-uservice
+  PORT: "8399"
+  MAX_INPUT_TOKENS: "4096"
+  MAX_TOTAL_TOKENS: "8192"
+---
diff --git a/VisualQnA/benchmark/performance/helm_charts/templates/deployment.yaml b/VisualQnA/benchmark/performance/helm_charts/templates/deployment.yaml
new file mode 100644
index 000000000..4fe4fc581
--- /dev/null
+++ b/VisualQnA/benchmark/performance/helm_charts/templates/deployment.yaml
@@ -0,0 +1,131 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- $global := .Values }}
+{{- range $microservice := .Values.microservices }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ $microservice.name }}
+  namespace: default
+spec:
+  {{- $replicas := $microservice.replicas }}
+  {{- range $podSpec := $global.podSpecs }}
+    {{- if eq $podSpec.name $microservice.name }}
+      {{- $replicas = $podSpec.replicas | default $microservice.replicas }}
+    {{- end }}
+  {{- end }}
+  replicas: {{ $replicas }}
+
+  selector:
+    matchLabels:
+      app: {{ $microservice.name }}
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: {{ $microservice.name }}
+    spec:
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: {{ $global.config.CONFIG_MAP_NAME }}
+        {{- if $microservice.args }}
+        args:
+        {{- range $arg := $microservice.args }}
+          {{- if $arg.name }}
+          - {{ $arg.name }}
+          {{- end }}
+          {{- if $arg.value }}
+          - "{{ $arg.value }}"
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+        {{- if $microservice.env }}
+        env:
+        {{- range $env := $microservice.env }}
+          - name: {{ $env.name }}
+            value: "{{ $env.value }}"
+        {{- end }}
+        {{- end }}
+
+        {{- $image := $microservice.image }}
+        {{- range $podSpec := $global.podSpecs }}
+          {{- if eq $podSpec.name $microservice.name }}
+            {{- $image = $podSpec.image | default $microservice.image }}
+          {{- end }}
+        {{- end }}
+        image: {{ $image }}
+
+        imagePullPolicy: IfNotPresent
+        name: {{ $microservice.name }}
+
+        {{- if $microservice.ports }}
+        ports:
+        {{- range $port := $microservice.ports }}
+          {{- range $port_name, $port_id := $port }}
+          - {{ $port_name }}: {{ $port_id }}
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+        {{- $resources := $microservice.resources }}
+        {{- range $podSpec := $global.podSpecs }}
+          {{- if eq $podSpec.name $microservice.name }}
+            {{- if $podSpec.resources }}
+              {{- $resources = $podSpec.resources }}
+            {{- end }}
+          {{- end }}
+        {{- end }}
+
+        {{- if $resources }}
+        resources:
+        {{- range $resourceType, $resource := $resources }}
+          {{ $resourceType }}:
+          {{- range $limitType, $limit := $resource }}
+            {{ $limitType }}: {{ $limit }}
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+        {{- if $microservice.volumeMounts }}
+        volumeMounts:
+        {{- range $volumeMount := $microservice.volumeMounts }}
+          - mountPath: {{ $volumeMount.mountPath }}
+            name: {{ $volumeMount.name }}
+        {{- end }}
+        {{- end }}
+
+      hostIPC: true
+      nodeSelector:
+        node-type: {{ $global.config.NODE_SELECTOR }}
+      serviceAccountName: default
+      topologySpreadConstraints:
+      - labelSelector:
+          matchLabels:
+            app: {{ $microservice.name }}
+        maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+
+
+      {{- if $microservice.volumes }}
+      volumes:
+      {{- range $index, $volume := $microservice.volumes }}
+        - name: {{ $volume.name }}
+          {{- if $volume.hostPath }}
+          hostPath:
+            path: {{ $volume.hostPath.path }}
+            type: {{ $volume.hostPath.type }}
+          {{- else if $volume.emptyDir }}
+          emptyDir:
+            medium: {{ $volume.emptyDir.medium }}
+            sizeLimit: {{ $volume.emptyDir.sizeLimit }}
+          {{- end }}
+      {{- end }}
+      {{- end }}
+
+---
+{{- end }}
diff --git a/VisualQnA/benchmark/performance/helm_charts/templates/service.yaml b/VisualQnA/benchmark/performance/helm_charts/templates/service.yaml
new file mode 100644
index 000000000..5a5896921
--- /dev/null
+++ b/VisualQnA/benchmark/performance/helm_charts/templates/service.yaml
@@ -0,0 +1,24 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- range $service := .Values.services }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ $service.name }}
+  namespace: default
+spec:
+  ports:
+  {{- range $port := $service.spec.ports }}
+    - name: {{ $port.name }}
+    {{- range $port_name, $port_id := $port }}
+      {{- if ne $port_name "name"}}
+      {{ $port_name }}: {{ $port_id }}
+      {{- end }}
+    {{- end }}
+  {{- end }}
+  selector:
+    app: {{ $service.spec.selector.app }}
+  type: {{ $service.spec.type }}
+---
+{{- end }}
diff --git a/VisualQnA/benchmark/performance/helm_charts/values.yaml b/VisualQnA/benchmark/performance/helm_charts/values.yaml
new file mode 100644
index 000000000..503c57fb5
--- /dev/null
+++ b/VisualQnA/benchmark/performance/helm_charts/values.yaml
@@ -0,0 +1,84 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+namespace: default
+
+config:
+  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
+  CONFIG_MAP_NAME: visualqna-config
+  NODE_SELECTOR: opea
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+
+  LVM_ENDPOINT: "http://visualqna-tgi"
+  MODEL_ID: "llava-hf/llava-v1.6-mistral-7b-hf"
+  LVM_SERVICE_HOST_IP: visualqna-lvm-uservice
+  PORT: "8399"
+  MAX_INPUT_TOKENS: "4096"
+  MAX_TOTAL_TOKENS: "8192"
+
+microservices:
+  - name: visualqna-lvm-uservice
+    image: opea/lvm-tgi:latest
+    replicas: 1
+    ports:
+      - containerPort: 9399
+
+  - name: visualqna
+    image: opea/visualqna:latest
+    replicas: 1
+    ports:
+      - containerPort: 8399
+
+  - name: visualqna-tgi
+    image: opea/llava-tgi:latest
+    replicas: 1
+    ports:
+      - containerPort: 8399
+    resources:
+      limits:
+        habana.ai/gaudi: 1
+    volumeMounts:
+      - mountPath: /data
+        name: model-volume
+      - mountPath: /dev/shm
+        name: shm
+    volumes:
+      - hostPath:
+          path: /mnt/models
+          type: Directory
+        name: model-volume
+      - emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+        name: shm
+
+services:
+  - name: visualqna-lvm-uservice
+    spec:
+      ports:
+        - name: service
+          port: 9399
+          targetPort: 9399
+      selector:
+        app: visualqna-lvm-uservice
+      type: ClusterIP
+
+  - name: visualqna-tgi
+    spec:
+      ports:
+        - name: service
+          port: 80
+          targetPort: 8399
+      selector:
+        app: lvm-uservice
+      type: ClusterIP
+
+  - name: visualqna
+    spec:
+      ports:
+        - name: service
+          port: 8888
+          targetPort: 8888
+      selector:
+        app: visualqna
+      type: ClusterIP