added visual qna & update deployment template

2024-10-22 05:45:00 +03:00
parent bcaffd7db4
commit bb46f5b355
11 changed files with 392 additions and 2 deletions
--- a/AudioQnA/benchmark/helm_charts/templates/deployment.yaml
+++ b/AudioQnA/benchmark/helm_charts/templates/deployment.yaml
@@ -51,7 +51,14 @@ spec:
        {{- end }}
        {{- end }}
-        image: {{ $microservice.image }}
+        {{- $image := $microservice.image }}
        {{- range $podSpec := $global.podSpecs }}
          {{- if eq $podSpec.name $microservice.name }}
            {{- $image = $podSpec.image | default $microservice.image }}
          {{- end }}
        {{- end }}
        image: {{ $image }}
        imagePullPolicy: IfNotPresent
        name: {{ $microservice.name }}
--- a/ChatQnA/benchmark/performance/helm_charts/hpu_with_rerank.yaml
+++ b/ChatQnA/benchmark/performance/helm_charts/hpu_with_rerank.yaml
@@ -64,6 +64,9 @@ microservices:
  - name: reranking-dependency-deploy
    image: opea/tei-gaudi:latest
    replicas: 1
    resources:
      limits:
        habana.ai/gaudi: 1
    args:
      - name: "--model-id"
      - value: $(RERANK_MODEL_ID)
--- a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
@@ -51,7 +51,14 @@ spec:
        {{- end }}
        {{- end }}
-        image: {{ $microservice.image }}
+        {{- $image := $microservice.image }}
        {{- range $podSpec := $global.podSpecs }}
          {{- if eq $podSpec.name $microservice.name }}
            {{- $image = $podSpec.image | default $microservice.image }}
          {{- end }}
        {{- end }}
        image: {{ $image }}
        imagePullPolicy: IfNotPresent
        name: {{ $microservice.name }}
--- a/VisualQnA/benchmark/performance/helm_charts/.helmignore
+++ b/VisualQnA/benchmark/performance/helm_charts/.helmignore
@@ -0,0 +1,23 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 *.tmproj
 .vscode/
--- a/VisualQnA/benchmark/performance/helm_charts/Chart.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/Chart.yaml
@@ -0,0 +1,27 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 apiVersion: v2
 name: chatqna-charts
 description: A Helm chart for Kubernetes
 # A chart can be either an 'application' or a 'library' chart.
 #
 # Application charts are a collection of templates that can be packaged into versioned archives
 # to be deployed.
 #
 # Library charts provide useful utilities or functions for the chart developer. They're included as
 # a dependency of application charts to inject those utilities and functions into the rendering
 # pipeline. Library charts do not define any templates and therefore cannot be deployed.
 type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
 version: 1.0
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
 appVersion: "1.16.0"
--- a/VisualQnA/benchmark/performance/helm_charts/README.md
+++ b/VisualQnA/benchmark/performance/helm_charts/README.md
@@ -0,0 +1,36 @@
 # ChatQnA Deployment
 This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
 ## Getting Started
 ### Preparation
 ```bash
 # on k8s-master node
 cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
 # Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
 # vim customize.yaml
 HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
 ```
 ### Deploy your ChatQnA
 ```bash
 # Deploy a ChatQnA pipeline using the specified YAML configuration.
 # To deploy with different configurations, simply provide a different YAML file.
 helm install chatqna helm_charts/ -f customize.yaml
 ```
 Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
 ## Customize your own ChatQnA pipelines. (Optional)
 There are two yaml configs you can specify.
 - customize.yaml
  This file can specify image names, the number of replicas and CPU cores to manage your pods.
 - values.yaml
  This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes.
--- a/VisualQnA/benchmark/performance/helm_charts/customize.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/customize.yaml
@@ -0,0 +1,23 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 podSpecs:
  - name: faq-mega-server-deploy
    replicas: 2
    resources:
      limits:
        cpu: "8"
        memory: "8000Mi"
      requests:
        cpu: "8"
        memory: "8000Mi"
  - name: faq-tgi-deploy
    replicas: 7
    resources:
      limits:
        habana.ai/gaudi: 1
  - name: faq-micro-deploy
    replicas: 1
--- a/VisualQnA/benchmark/performance/helm_charts/templates/configmap.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/templates/configmap.yaml
@@ -0,0 +1,25 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: {{ .Values.config.CONFIG_MAP_NAME }}
  namespace: default
 data:
  HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
  LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
  NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
  CONFIG_MAP_NAME: visualqna-config
  NODE_SELECTOR: opea
  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
  LVM_ENDPOINT: "http://visualqna-tgi"
  MODEL_ID: "llava-hf/llava-v1.6-mistral-7b-hf"
  LVM_SERVICE_HOST_IP: visualqna-lvm-uservice
  PORT: "8399"
  MAX_INPUT_TOKENS: "4096"
  MAX_TOTAL_TOKENS: "8192"
 ---
--- a/VisualQnA/benchmark/performance/helm_charts/templates/deployment.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/templates/deployment.yaml
@@ -0,0 +1,131 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 {{- $global := .Values }}
 {{- range $microservice := .Values.microservices }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ $microservice.name }}
  namespace: default
 spec:
  {{- $replicas := $microservice.replicas }}
  {{- range $podSpec := $global.podSpecs }}
    {{- if eq $podSpec.name $microservice.name }}
      {{- $replicas = $podSpec.replicas | default $microservice.replicas }}
    {{- end }}
  {{- end }}
  replicas: {{ $replicas }}
  selector:
    matchLabels:
      app: {{ $microservice.name }}
  template:
    metadata:
      annotations:
        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
        app: {{ $microservice.name }}
    spec:
      containers:
      - envFrom:
        - configMapRef:
            name: {{ $global.config.CONFIG_MAP_NAME }}
        {{- if $microservice.args }}
        args:
        {{- range $arg := $microservice.args }}
          {{- if $arg.name }}
          - {{ $arg.name }}
          {{- end }}
          {{- if $arg.value }}
          - "{{ $arg.value }}"
          {{- end }}
        {{- end }}
        {{- end }}
        {{- if $microservice.env }}
        env:
        {{- range $env := $microservice.env }}
          - name: {{ $env.name }}
            value: "{{ $env.value }}"
        {{- end }}
        {{- end }}
        {{- $image := $microservice.image }}
        {{- range $podSpec := $global.podSpecs }}
          {{- if eq $podSpec.name $microservice.name }}
            {{- $image = $podSpec.image | default $microservice.image }}
          {{- end }}
        {{- end }}
        image: {{ $image }}
        imagePullPolicy: IfNotPresent
        name: {{ $microservice.name }}
        {{- if $microservice.ports }}
        ports:
        {{- range $port := $microservice.ports }}
          {{- range $port_name, $port_id := $port }}
          - {{ $port_name }}: {{ $port_id }}
          {{- end }}
        {{- end }}
        {{- end }}
        {{- $resources := $microservice.resources }}
        {{- range $podSpec := $global.podSpecs }}
          {{- if eq $podSpec.name $microservice.name }}
            {{- if $podSpec.resources }}
              {{- $resources = $podSpec.resources }}
            {{- end }}
          {{- end }}
        {{- end }}
        {{- if $resources }}
        resources:
        {{- range $resourceType, $resource := $resources }}
          {{ $resourceType }}:
          {{- range $limitType, $limit := $resource }}
            {{ $limitType }}: {{ $limit }}
          {{- end }}
        {{- end }}
        {{- end }}
        {{- if $microservice.volumeMounts }}
        volumeMounts:
        {{- range $volumeMount := $microservice.volumeMounts }}
          - mountPath: {{ $volumeMount.mountPath }}
            name: {{ $volumeMount.name }}
        {{- end }}
        {{- end }}
      hostIPC: true
      nodeSelector:
        node-type: {{ $global.config.NODE_SELECTOR }}
      serviceAccountName: default
      topologySpreadConstraints:
      - labelSelector:
          matchLabels:
            app: {{ $microservice.name }}
        maxSkew: 1
        topologyKey: kubernetes.io/hostname
        whenUnsatisfiable: ScheduleAnyway
      {{- if $microservice.volumes }}
      volumes:
      {{- range $index, $volume := $microservice.volumes }}
        - name: {{ $volume.name }}
          {{- if $volume.hostPath }}
          hostPath:
            path: {{ $volume.hostPath.path }}
            type: {{ $volume.hostPath.type }}
          {{- else if $volume.emptyDir }}
          emptyDir:
            medium: {{ $volume.emptyDir.medium }}
            sizeLimit: {{ $volume.emptyDir.sizeLimit }}
          {{- end }}
      {{- end }}
      {{- end }}
 ---
 {{- end }}
--- a/VisualQnA/benchmark/performance/helm_charts/templates/service.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/templates/service.yaml
@@ -0,0 +1,24 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 {{- range $service := .Values.services }}
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ $service.name }}
  namespace: default
 spec:
  ports:
  {{- range $port := $service.spec.ports }}
    - name: {{ $port.name }}
    {{- range $port_name, $port_id := $port }}
      {{- if ne $port_name "name"}}
      {{ $port_name }}: {{ $port_id }}
      {{- end }}
    {{- end }}
  {{- end }}
  selector:
    app: {{ $service.spec.selector.app }}
  type: {{ $service.spec.type }}
 ---
 {{- end }}
--- a/VisualQnA/benchmark/performance/helm_charts/values.yaml
+++ b/VisualQnA/benchmark/performance/helm_charts/values.yaml
@@ -0,0 +1,84 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 namespace: default
 config:
  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
  CONFIG_MAP_NAME: visualqna-config
  NODE_SELECTOR: opea
  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
  LVM_ENDPOINT: "http://visualqna-tgi"
  MODEL_ID: "llava-hf/llava-v1.6-mistral-7b-hf"
  LVM_SERVICE_HOST_IP: visualqna-lvm-uservice
  PORT: "8399"
  MAX_INPUT_TOKENS: "4096"
  MAX_TOTAL_TOKENS: "8192"
 microservices:
  - name: visualqna-lvm-uservice
    image: opea/lvm-tgi:latest
    replicas: 1
    ports:
      - containerPort: 9399
  - name: visualqna
    image: opea/visualqna:latest
    replicas: 1
    ports:
      - containerPort: 8399
  - name: visualqna-tgi
    image: opea/llava-tgi:latest
    replicas: 1
    ports:
      - containerPort: 8399
    resources:
      limits:
        habana.ai/gaudi: 1
    volumeMounts:
      - mountPath: /data
        name: model-volume
      - mountPath: /dev/shm
        name: shm
    volumes:
      - hostPath:
          path: /mnt/models
          type: Directory
        name: model-volume
      - emptyDir:
          medium: Memory
          sizeLimit: 1Gi
        name: shm
 services:
  - name: visualqna-lvm-uservice
    spec:
      ports:
        - name: service
          port: 9399
          targetPort: 9399
      selector:
        app: visualqna-lvm-uservice
      type: ClusterIP
  - name: visualqna-tgi
    spec:
      ports:
        - name: service
          port: 80
          targetPort: 8399
      selector:
        app: lvm-uservice
      type: ClusterIP
  - name: visualqna
    spec:
      ports:
        - name: service
          port: 8888
          targetPort: 8888
      selector:
        app: visualqna
      type: ClusterIP