added visual qna & update deployment template
This commit is contained in:
@@ -51,7 +51,14 @@ spec:
|
|||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
image: {{ $microservice.image }}
|
{{- $image := $microservice.image }}
|
||||||
|
{{- range $podSpec := $global.podSpecs }}
|
||||||
|
{{- if eq $podSpec.name $microservice.name }}
|
||||||
|
{{- $image = $podSpec.image | default $microservice.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
image: {{ $image }}
|
||||||
|
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: {{ $microservice.name }}
|
name: {{ $microservice.name }}
|
||||||
|
|
||||||
|
|||||||
@@ -64,6 +64,9 @@ microservices:
|
|||||||
- name: reranking-dependency-deploy
|
- name: reranking-dependency-deploy
|
||||||
image: opea/tei-gaudi:latest
|
image: opea/tei-gaudi:latest
|
||||||
replicas: 1
|
replicas: 1
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
args:
|
args:
|
||||||
- name: "--model-id"
|
- name: "--model-id"
|
||||||
- value: $(RERANK_MODEL_ID)
|
- value: $(RERANK_MODEL_ID)
|
||||||
|
|||||||
@@ -51,7 +51,14 @@ spec:
|
|||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
image: {{ $microservice.image }}
|
{{- $image := $microservice.image }}
|
||||||
|
{{- range $podSpec := $global.podSpecs }}
|
||||||
|
{{- if eq $podSpec.name $microservice.name }}
|
||||||
|
{{- $image = $podSpec.image | default $microservice.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
image: {{ $image }}
|
||||||
|
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: {{ $microservice.name }}
|
name: {{ $microservice.name }}
|
||||||
|
|
||||||
|
|||||||
23
VisualQnA/benchmark/performance/helm_charts/.helmignore
Normal file
23
VisualQnA/benchmark/performance/helm_charts/.helmignore
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
# Patterns to ignore when building packages.
|
||||||
|
# This supports shell glob matching, relative path matching, and
|
||||||
|
# negation (prefixed with !). Only one pattern per line.
|
||||||
|
.DS_Store
|
||||||
|
# Common VCS dirs
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
.bzr/
|
||||||
|
.bzrignore
|
||||||
|
.hg/
|
||||||
|
.hgignore
|
||||||
|
.svn/
|
||||||
|
# Common backup files
|
||||||
|
*.swp
|
||||||
|
*.bak
|
||||||
|
*.tmp
|
||||||
|
*.orig
|
||||||
|
*~
|
||||||
|
# Various IDEs
|
||||||
|
.project
|
||||||
|
.idea/
|
||||||
|
*.tmproj
|
||||||
|
.vscode/
|
||||||
27
VisualQnA/benchmark/performance/helm_charts/Chart.yaml
Normal file
27
VisualQnA/benchmark/performance/helm_charts/Chart.yaml
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v2
|
||||||
|
name: chatqna-charts
|
||||||
|
description: A Helm chart for Kubernetes
|
||||||
|
|
||||||
|
# A chart can be either an 'application' or a 'library' chart.
|
||||||
|
#
|
||||||
|
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||||
|
# to be deployed.
|
||||||
|
#
|
||||||
|
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||||
|
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||||
|
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||||
|
type: application
|
||||||
|
|
||||||
|
# This is the chart version. This version number should be incremented each time you make changes
|
||||||
|
# to the chart and its templates, including the app version.
|
||||||
|
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||||
|
version: 1.0
|
||||||
|
|
||||||
|
# This is the version number of the application being deployed. This version number should be
|
||||||
|
# incremented each time you make changes to the application. Versions are not expected to
|
||||||
|
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||||
|
# It is recommended to use it with quotes.
|
||||||
|
appVersion: "1.16.0"
|
||||||
36
VisualQnA/benchmark/performance/helm_charts/README.md
Normal file
36
VisualQnA/benchmark/performance/helm_charts/README.md
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
# ChatQnA Deployment
|
||||||
|
|
||||||
|
This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
|
||||||
|
|
||||||
|
## Getting Started
|
||||||
|
|
||||||
|
### Preparation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# on k8s-master node
|
||||||
|
cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
|
||||||
|
|
||||||
|
# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
|
||||||
|
# vim customize.yaml
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
|
||||||
|
```
|
||||||
|
|
||||||
|
### Deploy your ChatQnA
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Deploy a ChatQnA pipeline using the specified YAML configuration.
|
||||||
|
# To deploy with different configurations, simply provide a different YAML file.
|
||||||
|
helm install chatqna helm_charts/ -f customize.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
|
||||||
|
|
||||||
|
## Customize your own ChatQnA pipelines. (Optional)
|
||||||
|
|
||||||
|
There are two yaml configs you can specify.
|
||||||
|
|
||||||
|
- customize.yaml
|
||||||
|
This file can specify image names, the number of replicas and CPU cores to manage your pods.
|
||||||
|
|
||||||
|
- values.yaml
|
||||||
|
This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes.
|
||||||
23
VisualQnA/benchmark/performance/helm_charts/customize.yaml
Normal file
23
VisualQnA/benchmark/performance/helm_charts/customize.yaml
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
podSpecs:
|
||||||
|
- name: faq-mega-server-deploy
|
||||||
|
replicas: 2
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: "8"
|
||||||
|
memory: "8000Mi"
|
||||||
|
requests:
|
||||||
|
cpu: "8"
|
||||||
|
memory: "8000Mi"
|
||||||
|
|
||||||
|
|
||||||
|
- name: faq-tgi-deploy
|
||||||
|
replicas: 7
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
|
||||||
|
- name: faq-micro-deploy
|
||||||
|
replicas: 1
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: {{ .Values.config.CONFIG_MAP_NAME }}
|
||||||
|
namespace: default
|
||||||
|
data:
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
|
||||||
|
LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
|
||||||
|
NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
|
||||||
|
|
||||||
|
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
CONFIG_MAP_NAME: visualqna-config
|
||||||
|
NODE_SELECTOR: opea
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
|
||||||
|
LVM_ENDPOINT: "http://visualqna-tgi"
|
||||||
|
MODEL_ID: "llava-hf/llava-v1.6-mistral-7b-hf"
|
||||||
|
LVM_SERVICE_HOST_IP: visualqna-lvm-uservice
|
||||||
|
PORT: "8399"
|
||||||
|
MAX_INPUT_TOKENS: "4096"
|
||||||
|
MAX_TOTAL_TOKENS: "8192"
|
||||||
|
---
|
||||||
@@ -0,0 +1,131 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
{{- $global := .Values }}
|
||||||
|
{{- range $microservice := .Values.microservices }}
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: {{ $microservice.name }}
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
{{- $replicas := $microservice.replicas }}
|
||||||
|
{{- range $podSpec := $global.podSpecs }}
|
||||||
|
{{- if eq $podSpec.name $microservice.name }}
|
||||||
|
{{- $replicas = $podSpec.replicas | default $microservice.replicas }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
replicas: {{ $replicas }}
|
||||||
|
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: {{ $microservice.name }}
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: {{ $microservice.name }}
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: {{ $global.config.CONFIG_MAP_NAME }}
|
||||||
|
{{- if $microservice.args }}
|
||||||
|
args:
|
||||||
|
{{- range $arg := $microservice.args }}
|
||||||
|
{{- if $arg.name }}
|
||||||
|
- {{ $arg.name }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if $arg.value }}
|
||||||
|
- "{{ $arg.value }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- if $microservice.env }}
|
||||||
|
env:
|
||||||
|
{{- range $env := $microservice.env }}
|
||||||
|
- name: {{ $env.name }}
|
||||||
|
value: "{{ $env.value }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- $image := $microservice.image }}
|
||||||
|
{{- range $podSpec := $global.podSpecs }}
|
||||||
|
{{- if eq $podSpec.name $microservice.name }}
|
||||||
|
{{- $image = $podSpec.image | default $microservice.image }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
image: {{ $image }}
|
||||||
|
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: {{ $microservice.name }}
|
||||||
|
|
||||||
|
{{- if $microservice.ports }}
|
||||||
|
ports:
|
||||||
|
{{- range $port := $microservice.ports }}
|
||||||
|
{{- range $port_name, $port_id := $port }}
|
||||||
|
- {{ $port_name }}: {{ $port_id }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- $resources := $microservice.resources }}
|
||||||
|
{{- range $podSpec := $global.podSpecs }}
|
||||||
|
{{- if eq $podSpec.name $microservice.name }}
|
||||||
|
{{- if $podSpec.resources }}
|
||||||
|
{{- $resources = $podSpec.resources }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- if $resources }}
|
||||||
|
resources:
|
||||||
|
{{- range $resourceType, $resource := $resources }}
|
||||||
|
{{ $resourceType }}:
|
||||||
|
{{- range $limitType, $limit := $resource }}
|
||||||
|
{{ $limitType }}: {{ $limit }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- if $microservice.volumeMounts }}
|
||||||
|
volumeMounts:
|
||||||
|
{{- range $volumeMount := $microservice.volumeMounts }}
|
||||||
|
- mountPath: {{ $volumeMount.mountPath }}
|
||||||
|
name: {{ $volumeMount.name }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: {{ $global.config.NODE_SELECTOR }}
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: {{ $microservice.name }}
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
|
||||||
|
|
||||||
|
{{- if $microservice.volumes }}
|
||||||
|
volumes:
|
||||||
|
{{- range $index, $volume := $microservice.volumes }}
|
||||||
|
- name: {{ $volume.name }}
|
||||||
|
{{- if $volume.hostPath }}
|
||||||
|
hostPath:
|
||||||
|
path: {{ $volume.hostPath.path }}
|
||||||
|
type: {{ $volume.hostPath.type }}
|
||||||
|
{{- else if $volume.emptyDir }}
|
||||||
|
emptyDir:
|
||||||
|
medium: {{ $volume.emptyDir.medium }}
|
||||||
|
sizeLimit: {{ $volume.emptyDir.sizeLimit }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
---
|
||||||
|
{{- end }}
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
{{- range $service := .Values.services }}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: {{ $service.name }}
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
{{- range $port := $service.spec.ports }}
|
||||||
|
- name: {{ $port.name }}
|
||||||
|
{{- range $port_name, $port_id := $port }}
|
||||||
|
{{- if ne $port_name "name"}}
|
||||||
|
{{ $port_name }}: {{ $port_id }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
selector:
|
||||||
|
app: {{ $service.spec.selector.app }}
|
||||||
|
type: {{ $service.spec.type }}
|
||||||
|
---
|
||||||
|
{{- end }}
|
||||||
84
VisualQnA/benchmark/performance/helm_charts/values.yaml
Normal file
84
VisualQnA/benchmark/performance/helm_charts/values.yaml
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
namespace: default
|
||||||
|
|
||||||
|
config:
|
||||||
|
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
CONFIG_MAP_NAME: visualqna-config
|
||||||
|
NODE_SELECTOR: opea
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
|
||||||
|
LVM_ENDPOINT: "http://visualqna-tgi"
|
||||||
|
MODEL_ID: "llava-hf/llava-v1.6-mistral-7b-hf"
|
||||||
|
LVM_SERVICE_HOST_IP: visualqna-lvm-uservice
|
||||||
|
PORT: "8399"
|
||||||
|
MAX_INPUT_TOKENS: "4096"
|
||||||
|
MAX_TOTAL_TOKENS: "8192"
|
||||||
|
|
||||||
|
microservices:
|
||||||
|
- name: visualqna-lvm-uservice
|
||||||
|
image: opea/lvm-tgi:latest
|
||||||
|
replicas: 1
|
||||||
|
ports:
|
||||||
|
- containerPort: 9399
|
||||||
|
|
||||||
|
- name: visualqna
|
||||||
|
image: opea/visualqna:latest
|
||||||
|
replicas: 1
|
||||||
|
ports:
|
||||||
|
- containerPort: 8399
|
||||||
|
|
||||||
|
- name: visualqna-tgi
|
||||||
|
image: opea/llava-tgi:latest
|
||||||
|
replicas: 1
|
||||||
|
ports:
|
||||||
|
- containerPort: 8399
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
|
||||||
|
services:
|
||||||
|
- name: visualqna-lvm-uservice
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9399
|
||||||
|
targetPort: 9399
|
||||||
|
selector:
|
||||||
|
app: visualqna-lvm-uservice
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: visualqna-tgi
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 80
|
||||||
|
targetPort: 8399
|
||||||
|
selector:
|
||||||
|
app: lvm-uservice
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: visualqna
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
selector:
|
||||||
|
app: visualqna
|
||||||
|
type: ClusterIP
|
||||||
Reference in New Issue
Block a user