diff --git a/AudioQnA/benchmark/helm_charts/.helmignore b/AudioQnA/benchmark/helm_charts/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/AudioQnA/benchmark/helm_charts/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/AudioQnA/benchmark/helm_charts/Chart.yaml b/AudioQnA/benchmark/helm_charts/Chart.yaml new file mode 100644 index 000000000..51f94d087 --- /dev/null +++ b/AudioQnA/benchmark/helm_charts/Chart.yaml @@ -0,0 +1,27 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: chatqna-charts +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" diff --git a/AudioQnA/benchmark/helm_charts/README.md b/AudioQnA/benchmark/helm_charts/README.md new file mode 100644 index 000000000..f6df9ce4f --- /dev/null +++ b/AudioQnA/benchmark/helm_charts/README.md @@ -0,0 +1,36 @@ +# ChatQnA Deployment + +This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources. + +## Getting Started + +### Preparation + +```bash +# on k8s-master node +cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts + +# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token: +# vim customize.yaml +HUGGINGFACEHUB_API_TOKEN: hf_xxxxx +``` + +### Deploy your ChatQnA + +```bash +# Deploy a ChatQnA pipeline using the specified YAML configuration. +# To deploy with different configurations, simply provide a different YAML file. +helm install chatqna helm_charts/ -f customize.yaml +``` + +Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool. + +## Customize your own ChatQnA pipelines. (Optional) + +There are two yaml configs you can specify. + +- customize.yaml + This file can specify image names, the number of replicas and CPU cores to manage your pods. + +- values.yaml + This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes. diff --git a/AudioQnA/benchmark/helm_charts/customize.yaml b/AudioQnA/benchmark/helm_charts/customize.yaml new file mode 100644 index 000000000..31e1b6ca6 --- /dev/null +++ b/AudioQnA/benchmark/helm_charts/customize.yaml @@ -0,0 +1,50 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + +podSpecs: + - name: audioqna-backend-server-deploy + spec: + image_name: opea/audioqna + image_tag: latest + replicas: 1 + + - name: asr-deploy + spec: + image_name: opea/asr + image_tag: latest + replicas: 1 + + - name: whisper-deploy + spec: + image_name: opea/whisper-gaudi + image_tag: latest + replicas: 1 + resources: + limits: + habana.ai/gaudi: 1 + + - name: tts-deploy + spec: + image_name: opea/tts + image_tag: latest + replicas: 1 + + - name: speecht5-deploy + spec: + image_name: opea/speecht5-gaudi + image_tag: latest + replicas: 1 + resources: + limits: + habana.ai/gaudi: 1 + + - name: llm-dependency-deploy + spec: + image_name: ghcr.io/huggingface/tgi-gaudi + image_tag: 2.0.5 + replicas: 1 + resources: + limits: + habana.ai/gaudi: 1 diff --git a/AudioQnA/benchmark/helm_charts/templates/configmap.yaml b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml new file mode 100644 index 000000000..79246763f --- /dev/null +++ b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml @@ -0,0 +1,25 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Values.config.CONFIG_MAP_NAME }} + namespace: default +data: + HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }} + LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }} + NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }} + TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010 + + ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066 + TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006 + MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc + ASR_SERVICE_HOST_IP: asr-svc + ASR_SERVICE_PORT: "3001" + LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVICE_PORT: "3007" + TTS_SERVICE_HOST_IP: tts-svc + TTS_SERVICE_PORT: "3002" +--- diff --git a/AudioQnA/benchmark/helm_charts/templates/deployment.yaml b/AudioQnA/benchmark/helm_charts/templates/deployment.yaml new file mode 100644 index 000000000..7a9fe0c54 --- /dev/null +++ b/AudioQnA/benchmark/helm_charts/templates/deployment.yaml @@ -0,0 +1,113 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- $global := .Values }} +{{- range $deployment := .Values.deployments }} +{{- range $podSpec := $global.podSpecs }} +{{- if eq $podSpec.name $deployment.name }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ $deployment.name }} + namespace: default +spec: + replicas: {{ $podSpec.spec.replicas }} + selector: + matchLabels: + app: {{ $deployment.name }} + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: {{ $deployment.name }} + spec: + containers: + - envFrom: + - configMapRef: + name: {{ $global.config.CONFIG_MAP_NAME }} + {{- if $deployment.spec.args }} + args: + {{- range $arg := $deployment.spec.args }} + {{- if $arg.name }} + - {{ $arg.name }} + {{- end }} + {{- if $arg.value }} + - "{{ $arg.value }}" + {{- end }} + {{- end }} + {{- end }} + + {{- if $deployment.spec.env }} + env: + {{- range $env := $deployment.spec.env }} + - name: {{ $env.name }} + value: "{{ $env.value }}" + {{- end }} + {{- end }} + + image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }} + imagePullPolicy: IfNotPresent + name: {{ $podSpec.name }} + + {{- if $deployment.spec.ports }} + ports: + {{- range $port := $deployment.spec.ports }} + {{- range $port_name, $port_id := $port }} + - {{ $port_name }}: {{ $port_id }} + {{- end }} + {{- end }} + {{- end }} + + + {{- if $podSpec.spec.resources }} + resources: + {{- range $resourceType, $resource := $podSpec.spec.resources }} + {{ $resourceType }}: + {{- range $limitType, $limit := $resource }} + {{ $limitType }}: {{ $limit }} + {{- end }} + {{- end }} + {{- end }} + + {{- if $deployment.spec.volumeMounts }} + volumeMounts: + {{- range $volumeMount := $deployment.spec.volumeMounts }} + - mountPath: {{ $volumeMount.mountPath }} + name: {{ $volumeMount.name }} + {{- end }} + {{- end }} + + hostIPC: true + nodeSelector: + node-type: {{ $global.config.NODE_SELECTOR }} + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: {{ $deployment.name }} + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + + + {{- if $deployment.spec.volumes }} + volumes: + {{- range $index, $volume := $deployment.spec.volumes }} + - name: {{ $volume.name }} + {{- if $volume.hostPath }} + hostPath: + path: {{ $volume.hostPath.path }} + type: {{ $volume.hostPath.type }} + {{- else if $volume.emptyDir }} + emptyDir: + medium: {{ $volume.emptyDir.medium }} + sizeLimit: {{ $volume.emptyDir.sizeLimit }} + {{- end }} + {{- end }} + {{- end }} + +--- +{{- end }} +{{- end }} +{{- end }} diff --git a/AudioQnA/benchmark/helm_charts/templates/service.yaml b/AudioQnA/benchmark/helm_charts/templates/service.yaml new file mode 100644 index 000000000..5a5896921 --- /dev/null +++ b/AudioQnA/benchmark/helm_charts/templates/service.yaml @@ -0,0 +1,24 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- range $service := .Values.services }} +apiVersion: v1 +kind: Service +metadata: + name: {{ $service.name }} + namespace: default +spec: + ports: + {{- range $port := $service.spec.ports }} + - name: {{ $port.name }} + {{- range $port_name, $port_id := $port }} + {{- if ne $port_name "name"}} + {{ $port_name }}: {{ $port_id }} + {{- end }} + {{- end }} + {{- end }} + selector: + app: {{ $service.spec.selector.app }} + type: {{ $service.spec.type }} +--- +{{- end }} diff --git a/AudioQnA/benchmark/helm_charts/values.yaml b/AudioQnA/benchmark/helm_charts/values.yaml new file mode 100644 index 000000000..e2f03da95 --- /dev/null +++ b/AudioQnA/benchmark/helm_charts/values.yaml @@ -0,0 +1,186 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +namespace: default + +config: + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + CONFIG_MAP_NAME: audio-qna-config + NODE_SELECTOR: audioqna-opea + ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066 + TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006 + MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc + ASR_SERVICE_HOST_IP: asr-svc + ASR_SERVICE_PORT: "3001" + LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVICE_PORT: "3007" + TTS_SERVICE_HOST_IP: tts-svc + TTS_SERVICE_PORT: "3002" + +deployments: + - name: audioqna-backend-server-deploy + spec: + ports: + - containerPort: 8888 + + - name: asr-deploy + spec: + ports: + - containerPort: 9099 + + - name: whisper-deploy + spec: + ports: + - containerPort: 7066 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + + - name: tts-deploy + spec: + ports: + - containerPort: 9088 + + - name: llm-deploy + spec: + ports: + - containerPort: 9000 + + - name: speecht5-deploy + spec: + ports: + - containerPort: 7055 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + + - name: llm-dependency-deploy + spec: + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + args: + - name: "--model-id" + value: $(LLM_MODEL_ID) + - name: "--max-input-length" + value: "2048" + - name: "--max-total-tokens" + value: "4096" + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: "true" + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: ENABLE_HPU_GRAPH + value: 'true' + - name: LIMIT_HPU_GRAPH + value: 'true' + - name: USE_FLASH_ATTENTION + value: 'true' + - name: FLASH_ATTENTION_RECOMPUTE + value: 'true' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm + +services: + - name: asr-svc + spec: + ports: + - name: service + port: 3001 + targetPort: 9099 + selector: + app: asr-deploy + type: ClusterIP + + - name: whisper-svc + spec: + ports: + - name: service + port: 7066 + targetPort: 7066 + selector: + app: whisper-deploy + type: ClusterIP + + - name: tts-svc + spec: + ports: + - name: service + port: 3002 + targetPort: 9088 + selector: + app: tts-deploy + type: ClusterIP + + - name: speecht5-svc + spec: + ports: + - name: service + port: 7055 + targetPort: 7055 + selector: + app: speecht5-deploy + type: ClusterIP + + - name: llm-dependency-svc + spec: + ports: + - name: service + port: 3006 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP + + - name: llm-svc + spec: + ports: + - name: service + port: 3007 + targetPort: 9000 + selector: + app: llm-deploy + type: ClusterIP + + - name: audioqna-backend-server-svc + spec: + ports: + - name: service + port: 3088 + targetPort: 8888 + nodePort: 30666 + selector: + app: audioqna-backend-server-deploy + type: NodePort diff --git a/ChatQnA/benchmark/performance/helm_charts/README.md b/ChatQnA/benchmark/performance/helm_charts/README.md index f6df9ce4f..8da6d836e 100644 --- a/ChatQnA/benchmark/performance/helm_charts/README.md +++ b/ChatQnA/benchmark/performance/helm_charts/README.md @@ -20,7 +20,7 @@ HUGGINGFACEHUB_API_TOKEN: hf_xxxxx ```bash # Deploy a ChatQnA pipeline using the specified YAML configuration. # To deploy with different configurations, simply provide a different YAML file. -helm install chatqna helm_charts/ -f customize.yaml +helm install chatqna ../helm_charts/ -f customize.yaml ``` Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool. diff --git a/FaqGen/benchmark/performance/helm_charts/.helmignore b/FaqGen/benchmark/performance/helm_charts/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/FaqGen/benchmark/performance/helm_charts/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/FaqGen/benchmark/performance/helm_charts/Chart.yaml b/FaqGen/benchmark/performance/helm_charts/Chart.yaml new file mode 100644 index 000000000..51f94d087 --- /dev/null +++ b/FaqGen/benchmark/performance/helm_charts/Chart.yaml @@ -0,0 +1,27 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: chatqna-charts +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" diff --git a/FaqGen/benchmark/performance/helm_charts/README.md b/FaqGen/benchmark/performance/helm_charts/README.md new file mode 100644 index 000000000..f6df9ce4f --- /dev/null +++ b/FaqGen/benchmark/performance/helm_charts/README.md @@ -0,0 +1,36 @@ +# ChatQnA Deployment + +This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources. + +## Getting Started + +### Preparation + +```bash +# on k8s-master node +cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts + +# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token: +# vim customize.yaml +HUGGINGFACEHUB_API_TOKEN: hf_xxxxx +``` + +### Deploy your ChatQnA + +```bash +# Deploy a ChatQnA pipeline using the specified YAML configuration. +# To deploy with different configurations, simply provide a different YAML file. +helm install chatqna helm_charts/ -f customize.yaml +``` + +Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool. + +## Customize your own ChatQnA pipelines. (Optional) + +There are two yaml configs you can specify. + +- customize.yaml + This file can specify image names, the number of replicas and CPU cores to manage your pods. + +- values.yaml + This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes. diff --git a/FaqGen/benchmark/performance/helm_charts/customize.yaml b/FaqGen/benchmark/performance/helm_charts/customize.yaml new file mode 100644 index 000000000..5e156db45 --- /dev/null +++ b/FaqGen/benchmark/performance/helm_charts/customize.yaml @@ -0,0 +1,34 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + +podSpecs: + - name: faq-mega-server-deploy + spec: + image_name: opea/chatqna + image_tag: latest + replicas: 2 + resources: + limits: + cpu: "8" + memory: "8000Mi" + requests: + cpu: "8" + memory: "8000Mi" + + + - name: faq-tgi-deploy + spec: + image_name: ghcr.io/huggingface/tgi-gaudi + image_tag: 2.0.5 + replicas: 7 + resources: + limits: + habana.ai/gaudi: 1 + + - name: faq-micro-deploy + spec: + image_name: opea/llm-faqgen-tgi + image_tag: latest + replicas: 1 diff --git a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml new file mode 100644 index 000000000..df3e61d20 --- /dev/null +++ b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml @@ -0,0 +1,16 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Values.config.CONFIG_MAP_NAME }} + namespace: default +data: + HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }} + LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }} + NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }} + TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010 + LLM_SERVICE_HOST_IP: faq-micro-svc + MEGA_SERVICE_HOST_IP: faq-mega-server-svc +--- diff --git a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml new file mode 100644 index 000000000..7a9fe0c54 --- /dev/null +++ b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml @@ -0,0 +1,113 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- $global := .Values }} +{{- range $deployment := .Values.deployments }} +{{- range $podSpec := $global.podSpecs }} +{{- if eq $podSpec.name $deployment.name }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ $deployment.name }} + namespace: default +spec: + replicas: {{ $podSpec.spec.replicas }} + selector: + matchLabels: + app: {{ $deployment.name }} + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: {{ $deployment.name }} + spec: + containers: + - envFrom: + - configMapRef: + name: {{ $global.config.CONFIG_MAP_NAME }} + {{- if $deployment.spec.args }} + args: + {{- range $arg := $deployment.spec.args }} + {{- if $arg.name }} + - {{ $arg.name }} + {{- end }} + {{- if $arg.value }} + - "{{ $arg.value }}" + {{- end }} + {{- end }} + {{- end }} + + {{- if $deployment.spec.env }} + env: + {{- range $env := $deployment.spec.env }} + - name: {{ $env.name }} + value: "{{ $env.value }}" + {{- end }} + {{- end }} + + image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }} + imagePullPolicy: IfNotPresent + name: {{ $podSpec.name }} + + {{- if $deployment.spec.ports }} + ports: + {{- range $port := $deployment.spec.ports }} + {{- range $port_name, $port_id := $port }} + - {{ $port_name }}: {{ $port_id }} + {{- end }} + {{- end }} + {{- end }} + + + {{- if $podSpec.spec.resources }} + resources: + {{- range $resourceType, $resource := $podSpec.spec.resources }} + {{ $resourceType }}: + {{- range $limitType, $limit := $resource }} + {{ $limitType }}: {{ $limit }} + {{- end }} + {{- end }} + {{- end }} + + {{- if $deployment.spec.volumeMounts }} + volumeMounts: + {{- range $volumeMount := $deployment.spec.volumeMounts }} + - mountPath: {{ $volumeMount.mountPath }} + name: {{ $volumeMount.name }} + {{- end }} + {{- end }} + + hostIPC: true + nodeSelector: + node-type: {{ $global.config.NODE_SELECTOR }} + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: {{ $deployment.name }} + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + + + {{- if $deployment.spec.volumes }} + volumes: + {{- range $index, $volume := $deployment.spec.volumes }} + - name: {{ $volume.name }} + {{- if $volume.hostPath }} + hostPath: + path: {{ $volume.hostPath.path }} + type: {{ $volume.hostPath.type }} + {{- else if $volume.emptyDir }} + emptyDir: + medium: {{ $volume.emptyDir.medium }} + sizeLimit: {{ $volume.emptyDir.sizeLimit }} + {{- end }} + {{- end }} + {{- end }} + +--- +{{- end }} +{{- end }} +{{- end }} diff --git a/FaqGen/benchmark/performance/helm_charts/templates/service.yaml b/FaqGen/benchmark/performance/helm_charts/templates/service.yaml new file mode 100644 index 000000000..5a5896921 --- /dev/null +++ b/FaqGen/benchmark/performance/helm_charts/templates/service.yaml @@ -0,0 +1,24 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- range $service := .Values.services }} +apiVersion: v1 +kind: Service +metadata: + name: {{ $service.name }} + namespace: default +spec: + ports: + {{- range $port := $service.spec.ports }} + - name: {{ $port.name }} + {{- range $port_name, $port_id := $port }} + {{- if ne $port_name "name"}} + {{ $port_name }}: {{ $port_id }} + {{- end }} + {{- end }} + {{- end }} + selector: + app: {{ $service.spec.selector.app }} + type: {{ $service.spec.type }} +--- +{{- end }} diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml new file mode 100644 index 000000000..eeb206761 --- /dev/null +++ b/FaqGen/benchmark/performance/helm_charts/values.yaml @@ -0,0 +1,98 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +namespace: default + +config: + LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct + CONFIG_MAP_NAME: faq-config + NODE_SELECTOR: faq-opea + +deployments: + - name: faq-mega-server-deploy + spec: + ports: + - containerPort: 7777 + + - name: faq-micro-deploy + spec: + ports: + - containerPort: 9000 + + - name: faq-tgi-deploy + spec: + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + args: + - name: "--model-id" + value: $(LLM_MODEL_ID) + - name: "--max-input-length" + value: "2048" + - name: "--max-total-tokens" + value: "4096" + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: "true" + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: ENABLE_HPU_GRAPH + value: 'true' + - name: LIMIT_HPU_GRAPH + value: 'true' + - name: USE_FLASH_ATTENTION + value: 'true' + - name: FLASH_ATTENTION_RECOMPUTE + value: 'true' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm + +services: + - name: faq-micro-svc + spec: + ports: + - name: service + port: 9003 + targetPort: 9000 + selector: + app: faq-micro-deploy + type: ClusterIP + + - name: faq-tgi-svc + spec: + ports: + - name: service + port: 8010 + targetPort: 80 + selector: + app: faq-tgi-deploy + type: ClusterIP + + - name: faq-mega-server-svc + spec: + ports: + - name: service + port: 7779 + targetPort: 7777 + nodePort: 30779 + selector: + app: faq-mega-server-deploy + type: NodePort