benchmark helmcharts (#995)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Zhenzhong1
2024-10-21 11:13:24 +08:00
committed by GitHub
parent 184e9a43b8
commit d6b04b3405
17 changed files with 856 additions and 1 deletions

View File

@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View File

@@ -0,0 +1,27 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v2
name: chatqna-charts
description: A Helm chart for Kubernetes
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

View File

@@ -0,0 +1,36 @@
# ChatQnA Deployment
This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
## Getting Started
### Preparation
```bash
# on k8s-master node
cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
# vim customize.yaml
HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
```
### Deploy your ChatQnA
```bash
# Deploy a ChatQnA pipeline using the specified YAML configuration.
# To deploy with different configurations, simply provide a different YAML file.
helm install chatqna helm_charts/ -f customize.yaml
```
Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
## Customize your own ChatQnA pipelines. (Optional)
There are two yaml configs you can specify.
- customize.yaml
This file can specify image names, the number of replicas and CPU cores to manage your pods.
- values.yaml
This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes.

View File

@@ -0,0 +1,50 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
podSpecs:
- name: audioqna-backend-server-deploy
spec:
image_name: opea/audioqna
image_tag: latest
replicas: 1
- name: asr-deploy
spec:
image_name: opea/asr
image_tag: latest
replicas: 1
- name: whisper-deploy
spec:
image_name: opea/whisper-gaudi
image_tag: latest
replicas: 1
resources:
limits:
habana.ai/gaudi: 1
- name: tts-deploy
spec:
image_name: opea/tts
image_tag: latest
replicas: 1
- name: speecht5-deploy
spec:
image_name: opea/speecht5-gaudi
image_tag: latest
replicas: 1
resources:
limits:
habana.ai/gaudi: 1
- name: llm-dependency-deploy
spec:
image_name: ghcr.io/huggingface/tgi-gaudi
image_tag: 2.0.5
replicas: 1
resources:
limits:
habana.ai/gaudi: 1

View File

@@ -0,0 +1,25 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ .Values.config.CONFIG_MAP_NAME }}
namespace: default
data:
HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
ASR_SERVICE_HOST_IP: asr-svc
ASR_SERVICE_PORT: "3001"
LLM_SERVICE_HOST_IP: llm-svc
LLM_SERVICE_PORT: "3007"
TTS_SERVICE_HOST_IP: tts-svc
TTS_SERVICE_PORT: "3002"
---

View File

@@ -0,0 +1,113 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
{{- $global := .Values }}
{{- range $deployment := .Values.deployments }}
{{- range $podSpec := $global.podSpecs }}
{{- if eq $podSpec.name $deployment.name }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ $deployment.name }}
namespace: default
spec:
replicas: {{ $podSpec.spec.replicas }}
selector:
matchLabels:
app: {{ $deployment.name }}
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: {{ $deployment.name }}
spec:
containers:
- envFrom:
- configMapRef:
name: {{ $global.config.CONFIG_MAP_NAME }}
{{- if $deployment.spec.args }}
args:
{{- range $arg := $deployment.spec.args }}
{{- if $arg.name }}
- {{ $arg.name }}
{{- end }}
{{- if $arg.value }}
- "{{ $arg.value }}"
{{- end }}
{{- end }}
{{- end }}
{{- if $deployment.spec.env }}
env:
{{- range $env := $deployment.spec.env }}
- name: {{ $env.name }}
value: "{{ $env.value }}"
{{- end }}
{{- end }}
image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }}
imagePullPolicy: IfNotPresent
name: {{ $podSpec.name }}
{{- if $deployment.spec.ports }}
ports:
{{- range $port := $deployment.spec.ports }}
{{- range $port_name, $port_id := $port }}
- {{ $port_name }}: {{ $port_id }}
{{- end }}
{{- end }}
{{- end }}
{{- if $podSpec.spec.resources }}
resources:
{{- range $resourceType, $resource := $podSpec.spec.resources }}
{{ $resourceType }}:
{{- range $limitType, $limit := $resource }}
{{ $limitType }}: {{ $limit }}
{{- end }}
{{- end }}
{{- end }}
{{- if $deployment.spec.volumeMounts }}
volumeMounts:
{{- range $volumeMount := $deployment.spec.volumeMounts }}
- mountPath: {{ $volumeMount.mountPath }}
name: {{ $volumeMount.name }}
{{- end }}
{{- end }}
hostIPC: true
nodeSelector:
node-type: {{ $global.config.NODE_SELECTOR }}
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: {{ $deployment.name }}
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
{{- if $deployment.spec.volumes }}
volumes:
{{- range $index, $volume := $deployment.spec.volumes }}
- name: {{ $volume.name }}
{{- if $volume.hostPath }}
hostPath:
path: {{ $volume.hostPath.path }}
type: {{ $volume.hostPath.type }}
{{- else if $volume.emptyDir }}
emptyDir:
medium: {{ $volume.emptyDir.medium }}
sizeLimit: {{ $volume.emptyDir.sizeLimit }}
{{- end }}
{{- end }}
{{- end }}
---
{{- end }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,24 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
{{- range $service := .Values.services }}
apiVersion: v1
kind: Service
metadata:
name: {{ $service.name }}
namespace: default
spec:
ports:
{{- range $port := $service.spec.ports }}
- name: {{ $port.name }}
{{- range $port_name, $port_id := $port }}
{{- if ne $port_name "name"}}
{{ $port_name }}: {{ $port_id }}
{{- end }}
{{- end }}
{{- end }}
selector:
app: {{ $service.spec.selector.app }}
type: {{ $service.spec.type }}
---
{{- end }}

View File

@@ -0,0 +1,186 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
namespace: default
config:
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
CONFIG_MAP_NAME: audio-qna-config
NODE_SELECTOR: audioqna-opea
ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
ASR_SERVICE_HOST_IP: asr-svc
ASR_SERVICE_PORT: "3001"
LLM_SERVICE_HOST_IP: llm-svc
LLM_SERVICE_PORT: "3007"
TTS_SERVICE_HOST_IP: tts-svc
TTS_SERVICE_PORT: "3002"
deployments:
- name: audioqna-backend-server-deploy
spec:
ports:
- containerPort: 8888
- name: asr-deploy
spec:
ports:
- containerPort: 9099
- name: whisper-deploy
spec:
ports:
- containerPort: 7066
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: tts-deploy
spec:
ports:
- containerPort: 9088
- name: llm-deploy
spec:
ports:
- containerPort: 9000
- name: speecht5-deploy
spec:
ports:
- containerPort: 7055
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: llm-dependency-deploy
spec:
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
value: $(LLM_MODEL_ID)
- name: "--max-input-length"
value: "2048"
- name: "--max-total-tokens"
value: "4096"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
services:
- name: asr-svc
spec:
ports:
- name: service
port: 3001
targetPort: 9099
selector:
app: asr-deploy
type: ClusterIP
- name: whisper-svc
spec:
ports:
- name: service
port: 7066
targetPort: 7066
selector:
app: whisper-deploy
type: ClusterIP
- name: tts-svc
spec:
ports:
- name: service
port: 3002
targetPort: 9088
selector:
app: tts-deploy
type: ClusterIP
- name: speecht5-svc
spec:
ports:
- name: service
port: 7055
targetPort: 7055
selector:
app: speecht5-deploy
type: ClusterIP
- name: llm-dependency-svc
spec:
ports:
- name: service
port: 3006
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
- name: llm-svc
spec:
ports:
- name: service
port: 3007
targetPort: 9000
selector:
app: llm-deploy
type: ClusterIP
- name: audioqna-backend-server-svc
spec:
ports:
- name: service
port: 3088
targetPort: 8888
nodePort: 30666
selector:
app: audioqna-backend-server-deploy
type: NodePort

View File

@@ -20,7 +20,7 @@ HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
```bash
# Deploy a ChatQnA pipeline using the specified YAML configuration.
# To deploy with different configurations, simply provide a different YAML file.
helm install chatqna helm_charts/ -f customize.yaml
helm install chatqna ../helm_charts/ -f customize.yaml
```
Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.

View File

@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View File

@@ -0,0 +1,27 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v2
name: chatqna-charts
description: A Helm chart for Kubernetes
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

View File

@@ -0,0 +1,36 @@
# ChatQnA Deployment
This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
## Getting Started
### Preparation
```bash
# on k8s-master node
cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
# vim customize.yaml
HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
```
### Deploy your ChatQnA
```bash
# Deploy a ChatQnA pipeline using the specified YAML configuration.
# To deploy with different configurations, simply provide a different YAML file.
helm install chatqna helm_charts/ -f customize.yaml
```
Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
## Customize your own ChatQnA pipelines. (Optional)
There are two yaml configs you can specify.
- customize.yaml
This file can specify image names, the number of replicas and CPU cores to manage your pods.
- values.yaml
This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes.

View File

@@ -0,0 +1,34 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
podSpecs:
- name: faq-mega-server-deploy
spec:
image_name: opea/chatqna
image_tag: latest
replicas: 2
resources:
limits:
cpu: "8"
memory: "8000Mi"
requests:
cpu: "8"
memory: "8000Mi"
- name: faq-tgi-deploy
spec:
image_name: ghcr.io/huggingface/tgi-gaudi
image_tag: 2.0.5
replicas: 7
resources:
limits:
habana.ai/gaudi: 1
- name: faq-micro-deploy
spec:
image_name: opea/llm-faqgen-tgi
image_tag: latest
replicas: 1

View File

@@ -0,0 +1,16 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ .Values.config.CONFIG_MAP_NAME }}
namespace: default
data:
HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
LLM_SERVICE_HOST_IP: faq-micro-svc
MEGA_SERVICE_HOST_IP: faq-mega-server-svc
---

View File

@@ -0,0 +1,113 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
{{- $global := .Values }}
{{- range $deployment := .Values.deployments }}
{{- range $podSpec := $global.podSpecs }}
{{- if eq $podSpec.name $deployment.name }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ $deployment.name }}
namespace: default
spec:
replicas: {{ $podSpec.spec.replicas }}
selector:
matchLabels:
app: {{ $deployment.name }}
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: {{ $deployment.name }}
spec:
containers:
- envFrom:
- configMapRef:
name: {{ $global.config.CONFIG_MAP_NAME }}
{{- if $deployment.spec.args }}
args:
{{- range $arg := $deployment.spec.args }}
{{- if $arg.name }}
- {{ $arg.name }}
{{- end }}
{{- if $arg.value }}
- "{{ $arg.value }}"
{{- end }}
{{- end }}
{{- end }}
{{- if $deployment.spec.env }}
env:
{{- range $env := $deployment.spec.env }}
- name: {{ $env.name }}
value: "{{ $env.value }}"
{{- end }}
{{- end }}
image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }}
imagePullPolicy: IfNotPresent
name: {{ $podSpec.name }}
{{- if $deployment.spec.ports }}
ports:
{{- range $port := $deployment.spec.ports }}
{{- range $port_name, $port_id := $port }}
- {{ $port_name }}: {{ $port_id }}
{{- end }}
{{- end }}
{{- end }}
{{- if $podSpec.spec.resources }}
resources:
{{- range $resourceType, $resource := $podSpec.spec.resources }}
{{ $resourceType }}:
{{- range $limitType, $limit := $resource }}
{{ $limitType }}: {{ $limit }}
{{- end }}
{{- end }}
{{- end }}
{{- if $deployment.spec.volumeMounts }}
volumeMounts:
{{- range $volumeMount := $deployment.spec.volumeMounts }}
- mountPath: {{ $volumeMount.mountPath }}
name: {{ $volumeMount.name }}
{{- end }}
{{- end }}
hostIPC: true
nodeSelector:
node-type: {{ $global.config.NODE_SELECTOR }}
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: {{ $deployment.name }}
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
{{- if $deployment.spec.volumes }}
volumes:
{{- range $index, $volume := $deployment.spec.volumes }}
- name: {{ $volume.name }}
{{- if $volume.hostPath }}
hostPath:
path: {{ $volume.hostPath.path }}
type: {{ $volume.hostPath.type }}
{{- else if $volume.emptyDir }}
emptyDir:
medium: {{ $volume.emptyDir.medium }}
sizeLimit: {{ $volume.emptyDir.sizeLimit }}
{{- end }}
{{- end }}
{{- end }}
---
{{- end }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,24 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
{{- range $service := .Values.services }}
apiVersion: v1
kind: Service
metadata:
name: {{ $service.name }}
namespace: default
spec:
ports:
{{- range $port := $service.spec.ports }}
- name: {{ $port.name }}
{{- range $port_name, $port_id := $port }}
{{- if ne $port_name "name"}}
{{ $port_name }}: {{ $port_id }}
{{- end }}
{{- end }}
{{- end }}
selector:
app: {{ $service.spec.selector.app }}
type: {{ $service.spec.type }}
---
{{- end }}

View File

@@ -0,0 +1,98 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
namespace: default
config:
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
CONFIG_MAP_NAME: faq-config
NODE_SELECTOR: faq-opea
deployments:
- name: faq-mega-server-deploy
spec:
ports:
- containerPort: 7777
- name: faq-micro-deploy
spec:
ports:
- containerPort: 9000
- name: faq-tgi-deploy
spec:
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
value: $(LLM_MODEL_ID)
- name: "--max-input-length"
value: "2048"
- name: "--max-total-tokens"
value: "4096"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
services:
- name: faq-micro-svc
spec:
ports:
- name: service
port: 9003
targetPort: 9000
selector:
app: faq-micro-deploy
type: ClusterIP
- name: faq-tgi-svc
spec:
ports:
- name: service
port: 8010
targetPort: 80
selector:
app: faq-tgi-deploy
type: ClusterIP
- name: faq-mega-server-svc
spec:
ports:
- name: service
port: 7779
targetPort: 7777
nodePort: 30779
selector:
app: faq-mega-server-deploy
type: NodePort