[ChatQnA] Deploy ChatQnA for benchmarking with different configurations. (#870)
This commit is contained in:
@@ -18,8 +18,6 @@ repos:
|
||||
SearchQnA/ui/svelte/tsconfig.json|
|
||||
DocSum/ui/svelte/tsconfig.json
|
||||
)$
|
||||
- id: check-yaml
|
||||
args: [--allow-multiple-documents]
|
||||
- id: debug-statements
|
||||
- id: requirements-txt-fixer
|
||||
- id: trailing-whitespace
|
||||
|
||||
23
ChatQnA/benchmark/performance/helm_charts/.helmignore
Normal file
23
ChatQnA/benchmark/performance/helm_charts/.helmignore
Normal file
@@ -0,0 +1,23 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
27
ChatQnA/benchmark/performance/helm_charts/Chart.yaml
Normal file
27
ChatQnA/benchmark/performance/helm_charts/Chart.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v2
|
||||
name: chatqna-charts
|
||||
description: A Helm chart for Kubernetes
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "1.16.0"
|
||||
38
ChatQnA/benchmark/performance/helm_charts/README.md
Normal file
38
ChatQnA/benchmark/performance/helm_charts/README.md
Normal file
@@ -0,0 +1,38 @@
|
||||
## ChatQnA Deployment
|
||||
|
||||
This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
|
||||
|
||||
## Getting Started
|
||||
|
||||
### Preparation
|
||||
|
||||
```bash
|
||||
# on k8s-master node
|
||||
cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
|
||||
|
||||
# Replace <your token> with your actual Hugging Face token and run the following command:
|
||||
HUGGINGFACE_TOKEN=<your token>
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#\${HF_TOKEN}#${HUGGINGFACE_TOKEN}#g" {} \;
|
||||
|
||||
# Replace the following placeholders with the desired model IDs:
|
||||
LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
|
||||
RERANK_MODEL_ID=BAAI/bge-reranker-base
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#\$(LLM_MODEL_ID)#${LLM_MODEL_ID}#g" {} \;
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#\$(EMBEDDING_MODEL_ID)#${EMBEDDING_MODEL_ID}#g" {} \;
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL_ID}#g" {} \;
|
||||
|
||||
```
|
||||
|
||||
### ChatQnA Installation
|
||||
|
||||
```bash
|
||||
# Deploy a ChatQnA pipeline using the specified YAML configuration.
|
||||
# To deploy with different configurations, simply provide a different YAML file.
|
||||
helm install chatqna helm_charts/ -f helm_charts/oob_single_node.yaml
|
||||
|
||||
# Tips: To display rendered manifests according to the given yaml.
|
||||
helm template chatqna helm_charts/ -f helm_charts/oob_single_node.yaml
|
||||
```
|
||||
|
||||
Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
|
||||
237
ChatQnA/benchmark/performance/helm_charts/oob_single_node.yaml
Normal file
237
ChatQnA/benchmark/performance/helm_charts/oob_single_node.yaml
Normal file
@@ -0,0 +1,237 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
config:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
|
||||
deployments:
|
||||
- name: chatqna-backend-server-deploy
|
||||
spec:
|
||||
image_name: opea/chatqna-no-wrapper
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
|
||||
- name: dataprep-deploy
|
||||
spec:
|
||||
image_name: opea/dataprep-redis
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
image_name: redis/redis-stack
|
||||
image_tag: 7.2.0-v9
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
|
||||
- name: retriever-deploy
|
||||
spec:
|
||||
image_name: opea/retriever-redis
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
|
||||
- name: embedding-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/text-embeddings-inference
|
||||
image_tag: cpu-1.5
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(EMBEDDING_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: reranking-dependency-deploy
|
||||
spec:
|
||||
image_name: opea/tei-gaudi
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
- value: $(RERANK_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: "512"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/tgi-gaudi
|
||||
image_tag: 2.0.4
|
||||
replicas: 7
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(LLM_MODEL_ID)
|
||||
- name: "--max-input-length"
|
||||
value: "2048"
|
||||
- name: "--max-total-tokens"
|
||||
value: "4096"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
services:
|
||||
- name: chatqna-backend-server-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
|
||||
- name: dataprep-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: embedding-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: llm-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: reranking-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: retriever-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
@@ -0,0 +1,25 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: {{ .Values.config.EMBEDDING_MODEL_ID }}
|
||||
EMBEDDING_SERVER_HOST_IP: {{ .Values.config.EMBEDDING_SERVER_HOST_IP }}
|
||||
HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }}
|
||||
INDEX_NAME: {{ .Values.config.INDEX_NAME }}
|
||||
LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
|
||||
LLM_SERVER_HOST_IP: {{ .Values.config.LLM_SERVER_HOST_IP }}
|
||||
NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
|
||||
REDIS_URL: {{ .Values.config.REDIS_URL }}
|
||||
RERANK_MODEL_ID: {{ .Values.config.RERANK_MODEL_ID }}
|
||||
RERANK_SERVER_HOST_IP: {{ .Values.config.RERANK_SERVER_HOST_IP }}
|
||||
RETRIEVER_SERVICE_HOST_IP: {{ .Values.config.RETRIEVER_SERVICE_HOST_IP }}
|
||||
TEI_EMBEDDING_ENDPOINT: {{ .Values.config.TEI_EMBEDDING_ENDPOINT }}
|
||||
TEI_ENDPOINT: {{ .Values.config.TEI_ENDPOINT }}
|
||||
TEI_RERANKING_ENDPOINT: {{ .Values.config.TEI_RERANKING_ENDPOINT }}
|
||||
TGI_LLM_ENDPOINT: {{ .Values.config.TGI_LLM_ENDPOINT }}
|
||||
---
|
||||
@@ -0,0 +1,108 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
{{- range $deployment := .Values.deployments }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ $deployment.name }}
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: {{ $deployment.spec.replicas }}
|
||||
selector:
|
||||
matchLabels:
|
||||
app: {{ $deployment.name }}
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: {{ $deployment.name }}
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
{{- if $deployment.spec.args }}
|
||||
args:
|
||||
{{- range $arg := $deployment.spec.args }}
|
||||
{{- if $arg.name }}
|
||||
- {{ $arg.name }}
|
||||
{{- end }}
|
||||
{{- if $arg.value }}
|
||||
- "{{ $arg.value }}"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $deployment.spec.env }}
|
||||
env:
|
||||
{{- range $env := $deployment.spec.env }}
|
||||
- name: {{ $env.name }}
|
||||
value: "{{ $env.value }}"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
image: {{ $deployment.spec.image_name }}:{{ $deployment.spec.image_tag }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: {{ $deployment.name }}
|
||||
|
||||
{{- if $deployment.spec.ports }}
|
||||
ports:
|
||||
{{- range $port := $deployment.spec.ports }}
|
||||
{{- range $port_name, $port_id := $port }}
|
||||
- {{ $port_name }}: {{ $port_id }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $deployment.spec.resources }}
|
||||
resources:
|
||||
{{- range $resourceType, $resource := $deployment.spec.resources }}
|
||||
{{ $resourceType }}:
|
||||
{{- range $limitType, $limit := $resource }}
|
||||
{{ $limitType }}: {{ $limit }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $deployment.spec.volumeMounts }}
|
||||
volumeMounts:
|
||||
{{- range $volumeMount := $deployment.spec.volumeMounts }}
|
||||
- mountPath: {{ $volumeMount.mountPath }}
|
||||
name: {{ $volumeMount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: {{ $deployment.name }}
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
|
||||
|
||||
{{- if $deployment.spec.volumes }}
|
||||
volumes:
|
||||
{{- range $index, $volume := $deployment.spec.volumes }}
|
||||
- name: {{ $volume.name }}
|
||||
{{- if $volume.hostPath }}
|
||||
hostPath:
|
||||
path: {{ $volume.hostPath.path }}
|
||||
type: {{ $volume.hostPath.type }}
|
||||
{{- else if $volume.emptyDir }}
|
||||
emptyDir:
|
||||
medium: {{ $volume.emptyDir.medium }}
|
||||
sizeLimit: {{ $volume.emptyDir.sizeLimit }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
|
||||
---
|
||||
{{- end }}
|
||||
@@ -0,0 +1,24 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
{{- range $service := .Values.services }}
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ $service.name }}
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
{{- range $port := $service.spec.ports }}
|
||||
- name: {{ $port.name }}
|
||||
{{- range $port_name, $port_id := $port }}
|
||||
{{- if ne $port_name "name"}}
|
||||
{{ $port_name }}: {{ $port_id }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
selector:
|
||||
app: {{ $service.spec.selector.app }}
|
||||
type: {{ $service.spec.type }}
|
||||
---
|
||||
{{- end }}
|
||||
259
ChatQnA/benchmark/performance/helm_charts/tuned_single_node.yaml
Normal file
259
ChatQnA/benchmark/performance/helm_charts/tuned_single_node.yaml
Normal file
@@ -0,0 +1,259 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
config:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
|
||||
deployments:
|
||||
- name: chatqna-backend-server-deploy
|
||||
spec:
|
||||
image_name: opea/chatqna-no-wrapper
|
||||
image_tag: latest
|
||||
replicas: 2
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: "8"
|
||||
memory: "8000Mi"
|
||||
requests:
|
||||
cpu: "8"
|
||||
memory: "8000Mi"
|
||||
|
||||
- name: dataprep-deploy
|
||||
spec:
|
||||
image_name: opea/dataprep-redis
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
image_name: redis/redis-stack
|
||||
image_tag: 7.2.0-v9
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
|
||||
- name: retriever-deploy
|
||||
spec:
|
||||
image_name: opea/retriever-redis
|
||||
image_tag: latest
|
||||
replicas: 2
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: "4"
|
||||
memory: "4000Mi"
|
||||
|
||||
- name: embedding-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/text-embeddings-inference
|
||||
image_tag: cpu-1.5
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(EMBEDDING_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
resources:
|
||||
limits:
|
||||
cpu: "80"
|
||||
memory: "20000Mi"
|
||||
requests:
|
||||
cpu: "80"
|
||||
memory: "20000Mi"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: reranking-dependency-deploy
|
||||
spec:
|
||||
image_name: opea/tei-gaudi
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
- value: $(RERANK_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: "512"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/tgi-gaudi
|
||||
image_tag: 2.0.4
|
||||
replicas: 7
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(LLM_MODEL_ID)
|
||||
- name: "--max-input-length"
|
||||
value: "1280"
|
||||
- name: "--max-total-tokens"
|
||||
value: "2048"
|
||||
- name: "--max-batch-total-tokens"
|
||||
value: "65536"
|
||||
- name: "--max-batch-prefill-tokens"
|
||||
value: "4096"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
services:
|
||||
- name: chatqna-backend-server-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
|
||||
- name: dataprep-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: embedding-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: llm-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: reranking-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: retriever-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
237
ChatQnA/benchmark/performance/helm_charts/values.yaml
Normal file
237
ChatQnA/benchmark/performance/helm_charts/values.yaml
Normal file
@@ -0,0 +1,237 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
config:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
|
||||
deployments:
|
||||
- name: chatqna-backend-server-deploy
|
||||
spec:
|
||||
image_name: opea/chatqna-no-wrapper
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
|
||||
- name: dataprep-deploy
|
||||
spec:
|
||||
image_name: opea/dataprep-redis
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
image_name: redis/redis-stack
|
||||
image_tag: 7.2.0-v9
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
|
||||
- name: retriever-deploy
|
||||
spec:
|
||||
image_name: opea/retriever-redis
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
|
||||
- name: embedding-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/text-embeddings-inference
|
||||
image_tag: cpu-1.5
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(EMBEDDING_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: reranking-dependency-deploy
|
||||
spec:
|
||||
image_name: opea/tei-gaudi
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
- value: $(RERANK_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: "512"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/tgi-gaudi
|
||||
image_tag: 2.0.4
|
||||
replicas: 7
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(LLM_MODEL_ID)
|
||||
- name: "--max-input-length"
|
||||
value: "2048"
|
||||
- name: "--max-total-tokens"
|
||||
value: "4096"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
services:
|
||||
- name: chatqna-backend-server-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
|
||||
- name: dataprep-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: embedding-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: llm-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: reranking-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: retriever-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
Reference in New Issue
Block a user