Compare commits

...

4 Commits

Author SHA1 Message Date
bjzhjing
c8c6fa2e3e Provide unified scalable deployment and benchmarking support for exam… (#1315)
Signed-off-by: Cathy Zhang <cathy.zhang@intel.com>
Signed-off-by: letonghan <letong.han@intel.com>
Co-authored-by: letonghan <letong.han@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
(cherry picked from commit ed163087ba)
2025-01-24 22:55:38 +08:00
NeuralChatBot
905a5100f9 Freeze OPEA images tag
Signed-off-by: NeuralChatBot <grp_neural_chat_bot@intel.com>
2025-01-24 08:31:22 +00:00
chen, suyue
259099d19f Remove kubernetes manifest related code and tests (#1466)
Remove deprecated kubernetes manifest related code and tests.
k8s implementation for those examples based on helm charts will target for next release.

Signed-off-by: chensuyue <suyue.chen@intel.com>
2025-01-24 15:23:12 +08:00
chen, suyue
9a1118730b Freeze the triton version in vllm-gaudi image to 3.1.0 (#1463)
The new triton version 3.2.0 can't work with vllm-gaudi. Freeze the triton version in vllm-gaudi image to 3.1.0.

Issue create for vllm-fork: HabanaAI/vllm-fork#732
Signed-off-by: chensuyue <suyue.chen@intel.com>
2025-01-24 09:50:59 +08:00
30 changed files with 1480 additions and 3741 deletions

View File

@@ -78,7 +78,8 @@ jobs:
cd vllm && git rev-parse HEAD && cd ../
fi
if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt
fi
git clone --depth 1 --branch ${{ inputs.opea_branch }} https://github.com/opea-project/GenAIComps.git
cd GenAIComps && git rev-parse HEAD && cd ../

View File

@@ -54,6 +54,6 @@ jobs:
${{ env.changed_files }}
Please verify if the helm charts and manifests need to be changed accordingly.
Please verify if the helm charts need to be changed accordingly.
> This issue was created automatically by CI.

View File

@@ -43,6 +43,7 @@ function build_vllm_docker_image() {
fi
cd ./vllm-fork
git checkout v0.6.4.post2+Gaudi-1.19.0
sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt
docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:ci --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
if [ $? -ne 0 ]; then
echo "opea/vllm-gaudi:ci failed"

View File

@@ -1,7 +1,6 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -xe
WORKPATH=$(dirname "$PWD")

View File

@@ -0,0 +1,83 @@
# Copyright (C) 2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
deploy:
device: gaudi
version: 1.1.0
modelUseHostPath: /mnt/models
HUGGINGFACEHUB_API_TOKEN: ""
node: [1, 2, 4, 8]
namespace: ""
services:
backend:
instance_num: [2, 2, 4, 8]
cores_per_instance: ""
memory_capacity: ""
teirerank:
enabled: True
model_id: ""
replicaCount: [1, 1, 1, 1]
cards_per_instance: 1
tei:
model_id: ""
replicaCount: [1, 2, 4, 8]
cores_per_instance: ""
memory_capacity: ""
llm:
engine: tgi
model_id: ""
replicaCount: [7, 15, 31, 63]
max_batch_size: [1, 2, 4, 8]
max_input_length: ""
max_total_tokens: ""
max_batch_total_tokens: ""
max_batch_prefill_tokens: ""
cards_per_instance: 1
data-prep:
replicaCount: [1, 1, 1, 1]
cores_per_instance: ""
memory_capacity: ""
retriever-usvc:
replicaCount: [2, 2, 4, 8]
cores_per_instance: ""
memory_capacity: ""
redis-vector-db:
replicaCount: [1, 1, 1, 1]
cores_per_instance: ""
memory_capacity: ""
chatqna-ui:
replicaCount: [1, 1, 1, 1]
nginx:
replicaCount: [1, 1, 1, 1]
benchmark:
# http request behavior related fields
concurrency: [1, 2, 4]
totoal_query_num: [2048, 4096]
duration: [5, 10] # unit minutes
query_num_per_concurrency: [4, 8, 16]
possion: True
possion_arrival_rate: 1.0
warmup_iterations: 10
seed: 1024
# workload, all of the test cases will run for benchmark
test_cases:
- chatqnafixed
- chatqna_qlist_pubmed:
dataset: pub_med10 # pub_med10, pub_med100, pub_med1000
user_queries: [1, 2, 4]
query_token_size: 128 # if specified, means fixed query token size will be sent out
llm:
# specify the llm output token size
max_token_size: [128, 256]

View File

@@ -1,64 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -xe
USER_ID=$(whoami)
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
IMAGE_REPO=${IMAGE_REPO:-opea}
IMAGE_TAG=${IMAGE_TAG:-latest}
ROLLOUT_TIMEOUT_SECONDS="1800s"
KUBECTL_TIMEOUT_SECONDS="60s"
function init_chatqna() {
# replace the mount dir "path: /mnt/opea-models" with "path: $CHART_MOUNT"
find ../../kubernetes/intel/*/*/manifest -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
# replace microservice image tag
find ../../kubernetes/intel/*/*/manifest -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \;
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/"
find ../../kubernetes/intel/*/*/manifest -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}/#g" {} \;
# set huggingface token
find ../../kubernetes/intel/*/*/manifest -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
}
function get_end_point() {
# $1 is service name, $2 is namespace
ip_address=$(kubectl get svc $1 -n $2 -o jsonpath='{.spec.clusterIP}')
port=$(kubectl get svc $1 -n $2 -o jsonpath='{.spec.ports[0].port}')
echo "$ip_address:$port"
}
function _cleanup_ns() {
local ns=$1
if kubectl get ns $ns; then
if ! kubectl delete ns $ns --timeout=$KUBECTL_TIMEOUT_SECONDS; then
kubectl delete pods --namespace $ns --force --grace-period=0 --all
kubectl delete ns $ns --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
fi
fi
}
if [ $# -eq 0 ]; then
echo "Usage: $0 <function_name>"
exit 1
fi
case "$1" in
init_ChatQnA)
init_chatqna
;;
get_end_point)
service=$2
NAMESPACE=$3
get_end_point $service $NAMESPACE
;;
_cleanup_ns)
NAMESPACE=$2
_cleanup_ns $NAMESPACE
;;
*)
echo "Unknown function: $1"
;;
esac

View File

@@ -29,6 +29,7 @@ function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna-guardrails chatqna-ui dataprep retriever vllm-gaudi guardrails nginx"

View File

@@ -29,6 +29,7 @@ function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui dataprep retriever vllm-gaudi nginx"

View File

@@ -29,6 +29,7 @@ function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna-without-rerank chatqna-ui dataprep retriever vllm-gaudi nginx"

View File

@@ -180,4 +180,3 @@ Utilizes the open-source platform **Keycloak** for single sign-on identity and a
- **[Keycloak Configuration Guide](./docker_compose/intel/cpu/xeon/keycloak_setup_guide.md)**: Instructions to set up Keycloak for identity and access management.
- **[Xeon Guide](./docker_compose/intel/cpu/xeon/README.md)**: Instructions to build Docker images from source and run the application via Docker Compose.
- **[Xeon Kubernetes Guide](./kubernetes/intel/README.md)**: Instructions to deploy the application via Kubernetes.

View File

@@ -1,111 +0,0 @@
# 🚀 Deploy ProductivitySuite with ReactUI
The document outlines the deployment steps for ProductivitySuite via Kubernetes cluster while utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline components and ReactUI, a popular React-based user interface library.
In ProductivitySuite, it consists of following pipelines/examples and components:
```
- productivity-suite-react-ui
- chatqna
- codegen
- docsum
- faqgen
- dataprep via redis
- chat-history
- prompt-registry
- mongo
- keycloak
```
---
## ⚠️ Prerequisites for Deploying ProductivitySuite with ReactUI
To begin with, ensure that you have following prerequisites in place:
1. ☸ Kubernetes installation: Make sure that you have Kubernetes installed.
2. 🐳 Images: Make sure you have all the images ready for the examples and components stated above. You may refer to [README](../../docker_compose/intel/cpu/xeon/README.md) for steps to build the images.
3. 🔧 Configuration Values: Set the following values in all the yaml files before proceeding with the deployment:
Download and set up yq for YAML processing:
```
sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64
sudo chmod a+x /usr/local/bin/yq
cd GenAIExamples/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/
. ../utils
```
a. HUGGINGFACEHUB_API_TOKEN (Your HuggingFace token to download your desired model from HuggingFace):
```
# You may set the HUGGINGFACEHUB_API_TOKEN via method:
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
set_hf_token $HUGGINGFACEHUB_API_TOKEN
```
b. Set the proxies based on your network configuration
```
# Look for http_proxy, https_proxy and no_proxy key and fill up the values for all the yaml files with your system proxy configuration.
set_http_proxy $http_proxy
set_https_proxy $https_proxy
set_no_proxy $no_proxy
```
c. Set all the backend service endpoint for REACT UI service
```
# Setup all the backend service endpoint in productivity_suite_reactui.yaml for UI to consume with.
# Look for ENDPOINT in the yaml and insert all the url endpoint for all the required backend service.
set_services_endpoint
```
4. MODEL_ID and model-volume **(OPTIONAL)**: You may as well customize the "MODEL_ID" to use different model and model-volume for the volume to be mounted.
```
sudo mkdir -p /mnt/opea-models
sudo chmod -R a+xwr /mnt/opea-models
set_model_id
```
5. MODEL_MIRROR **(OPTIONAL)**: Please set the exact huggingface mirror if cannot access huggingface website directly from your country. You can set it as https://hf-mirror.com in PRC.
```
set_model_mirror
```
6. After finish with steps above, you can proceed with the deployment of the yaml file.
```
git diff
```
---
## 🌐 Deploying ProductivitySuite
You can use yaml files in xeon folder to deploy ProductivitySuite with reactUI.
```
cd GenAIExamples/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/
kubectl apply -f .
```
---
## 🔐 User Management via Keycloak Configuration
Please refer to **[keycloak_setup_guide](../../docker_compose/intel/cpu/xeon/keycloak_setup_guide.md)** for more detail related to Keycloak configuration setup.
---
## ✅ Verify Services
To verify the installation, run command 'kubectl get pod' to make sure all pods are running.
To view all the available services, run command 'kubectl get svc' to obtain ports that need to used as backend service endpoint in productivity_suite_reactui.yaml.
You may use `kubectl port-forward service/<service_name> <forwarded_port>/<service_port>` to forward the port of all the services if necessary.
```
# For example, 'kubectl get svc | grep productivity'
productivity-suite-react-ui ClusterIP 10.96.3.236 <none> 80/TCP
# By default, productivity-suite-react-ui service export port 80, forward it to 5174 via command:
'kubectl port-forward service/productivity-suite-react-ui 5174:80'
```
Or simple way to forward the productivity suite service port.
```
label='app.kubernetes.io/name=react-ui'
port=$(kubectl -n ${ns:-default} get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
kubectl port-forward service/productivity-suite-react-ui 5174:$port
```
You may open up the productivity suite react UI by using http://localhost:5174 in the browser.

View File

@@ -1,75 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
---
apiVersion: v1
kind: ConfigMap
metadata:
name: chat-history-config
data:
http_proxy: ""
https_proxy: ""
no_proxy: ""
MONGO_HOST: "mongo"
MONGO_PORT: "27017"
DB_NAME: "OPEA"
COLLECTION_NAME: "ChatHistory"
---
apiVersion: v1
kind: Service
metadata:
name: chat-history
labels:
helm.sh/chart: chat-history-0.1.0
app.kubernetes.io/name: chat-history
app.kubernetes.io/instance: chat-history
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 6012
targetPort: 6012
protocol: TCP
name: chat-history
selector:
app.kubernetes.io/name: chat-history
app.kubernetes.io/instance: chat-history
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chat-history
labels:
helm.sh/chart: chat-history-0.1.0
app.kubernetes.io/name: chat-history
app.kubernetes.io/instance: chat-history
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: chat-history
app.kubernetes.io/instance: chat-history
template:
metadata:
labels:
app.kubernetes.io/name: chat-history
app.kubernetes.io/instance: chat-history
spec:
securityContext: null
containers:
- name: chat-history
envFrom:
- configMapRef:
name: chat-history-config
securityContext: null
image: "opea/chathistory-mongo-server:latest"
imagePullPolicy: IfNotPresent
ports:
- name: chat-history
containerPort: 6012
protocol: TCP
resources: null
---

View File

@@ -1,333 +0,0 @@
---
# Source: codegen/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: codegen-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://codegen-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: insert-your-langchain-key-here
LANGCHAIN_PROJECT: "opea-llm-uservice"
---
# Source: codegen/charts/tgi/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: codegen-tgi-config
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/managed-by: Helm
data:
MODEL_ID: "meta-llama/CodeLlama-7b-hf"
PORT: "2080"
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
HF_TOKEN: "insert-your-huggingface-token-here"
MAX_INPUT_TOKENS: "1024"
MAX_TOTAL_TOKENS: "4096"
http_proxy: ""
https_proxy: ""
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
---
# Source: codegen/charts/llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: codegen-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
---
# Source: codegen/charts/tgi/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: codegen-tgi
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 2080
protocol: TCP
name: tgi
selector:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
---
# Source: codegen/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: codegen
labels:
helm.sh/chart: codegen-0.8.0
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 7778
targetPort: 7778
protocol: TCP
name: codegen
selector:
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
---
# Source: codegen/charts/llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: codegen-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
spec:
securityContext:
{}
containers:
- name: codegen
envFrom:
- configMapRef:
name: codegen-llm-uservice-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-textgen:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
startupProbe:
exec:
command:
- curl
- http://codegen-tgi
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 120
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: codegen/charts/tgi/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: codegen-tgi
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
template:
metadata:
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
spec:
securityContext:
{}
containers:
- name: tgi
envFrom:
- configMapRef:
name: codegen-tgi-config
securityContext:
{}
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /tmp
name: tmp
ports:
- name: http
containerPort: 2080
protocol: TCP
resources:
{}
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
- name: tmp
emptyDir: {}
---
# Source: codegen/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: codegen
labels:
helm.sh/chart: codegen-0.8.0
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
template:
metadata:
labels:
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
spec:
securityContext:
null
containers:
- name: codegen
env:
- name: LLM_SERVICE_HOST_IP
value: codegen-llm-uservice
- name: http_proxy
value: ""
- name: https_proxy
value: ""
- name: no_proxy
value: ""
#- name: MEGA_SERVICE_PORT
# value: 7778
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/codegen:latest"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /tmp
name: tmp
ports:
- name: codegen
containerPort: 7778
protocol: TCP
# startupProbe:
# httpGet:
# host: codegen-llm-uservice
# port: 9000
# path: /
# initialDelaySeconds: 5
# periodSeconds: 5
# failureThreshold: 120
# livenessProbe:
# httpGet:
# path: /
# port: 7778
# readinessProbe:
# httpGet:
# path: /
# port: 7778
resources:
null
volumes:
- name: tmp
emptyDir: {}

View File

@@ -1,317 +0,0 @@
---
# Source: docsum/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: docsum-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://docsum-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: insert-your-langchain-key-here
LANGCHAIN_PROJECT: "opea-llm-uservice"
---
# Source: docsum/charts/tgi/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: docsum-tgi-config
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
data:
MODEL_ID: "Intel/neural-chat-7b-v3-3"
PORT: "2080"
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
HF_TOKEN: "insert-your-huggingface-token-here"
MAX_INPUT_TOKENS: "1024"
MAX_TOTAL_TOKENS: "4096"
http_proxy: ""
https_proxy: ""
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
---
# Source: docsum/charts/llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: docsum-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
---
# Source: docsum/charts/tgi/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: docsum-tgi
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 2080
protocol: TCP
name: tgi
selector:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: docsum
---
# Source: docsum/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: docsum
labels:
helm.sh/chart: docsum-0.8.0
app.kubernetes.io/name: docsum
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 8888
targetPort: 8888
protocol: TCP
name: docsum
selector:
app.kubernetes.io/name: docsum
app.kubernetes.io/instance: docsum
---
# Source: docsum/charts/llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: docsum-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
spec:
securityContext:
{}
containers:
- name: docsum
envFrom:
- configMapRef:
name: docsum-llm-uservice-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-docsum-tgi:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
startupProbe:
exec:
command:
- curl
- http://docsum-tgi
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 120
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: docsum/charts/tgi/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: docsum-tgi
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: docsum
template:
metadata:
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: docsum
spec:
securityContext:
{}
containers:
- name: tgi
envFrom:
- configMapRef:
name: docsum-tgi-config
securityContext:
{}
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /tmp
name: tmp
ports:
- name: http
containerPort: 2080
protocol: TCP
resources:
{}
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
- name: tmp
emptyDir: {}
---
# Source: docsum/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: docsum
labels:
helm.sh/chart: docsum-0.8.0
app.kubernetes.io/name: docsum
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: docsum
app.kubernetes.io/instance: docsum
template:
metadata:
labels:
app.kubernetes.io/name: docsum
app.kubernetes.io/instance: docsum
spec:
securityContext:
null
containers:
- name: docsum
env:
- name: LLM_SERVICE_HOST_IP
value: docsum-llm-uservice
- name: http_proxy
value: ""
- name: https_proxy
value: ""
- name: no_proxy
value: ""
#- name: MEGA_SERVICE_PORT
# value: 8888
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/docsum:latest"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /tmp
name: tmp
ports:
- name: docsum
containerPort: 8888
protocol: TCP
resources:
null
volumes:
- name: tmp
emptyDir: {}

View File

@@ -1,243 +0,0 @@
---
# Source: faqgen/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: faqgen-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: faqgen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://faqgen-tgi:80"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
http_proxy: ""
https_proxy: ""
no_proxy: ""
---
# Source: faqgen/charts/tgi/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: faqgen-tgi-config
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: faqgen
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
data:
MODEL_ID: "Intel/neural-chat-7b-v3-3"
PORT: "80"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
http_proxy: ""
https_proxy: ""
no_proxy: ""
---
# Source: faqgen/charts/llm-uservice/charts/tgi/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: faqgen-tgi
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: faqgen
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 80
protocol: TCP
name: tgi
selector:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: faqgen
---
apiVersion: v1
kind: Service
metadata:
name: faqgen-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: faqgen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: faqgen
---
apiVersion: v1
kind: Service
metadata:
name: faqgen
labels:
helm.sh/chart: faqgen-0.8.0
app.kubernetes.io/name: faqgen
app.kubernetes.io/instance: faqgen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 8888
targetPort: 8888
protocol: TCP
name: faqgen
selector:
app.kubernetes.io/name: faqgen
app.kubernetes.io/instance: faqgen
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: faqgen-tgi
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: faqgen
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: faqgen
template:
metadata:
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: faqgen
spec:
securityContext: {}
containers:
- name: tgi
envFrom:
- configMapRef:
name: faqgen-tgi-config
securityContext: {}
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
name: model-volume
ports:
- name: http
containerPort: 80
protocol: TCP
resources: {}
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: faqgen-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: faqgen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: faqgen
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: faqgen
spec:
securityContext: {}
containers:
- name: faqgen
envFrom:
- configMapRef:
name: faqgen-llm-uservice-config
securityContext: {}
image: "opea/llm-faqgen:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
startupProbe:
exec:
command:
- curl
- http://faqgen-tgi:80
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 120
resources: {}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: faqgen
labels:
helm.sh/chart: faqgen-0.8.0
app.kubernetes.io/name: faqgen
app.kubernetes.io/instance: faqgen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: faqgen
app.kubernetes.io/instance: faqgen
template:
metadata:
labels:
app.kubernetes.io/name: faqgen
app.kubernetes.io/instance: faqgen
spec:
securityContext: null
containers:
- name: faqgen
env:
- name: LLM_SERVICE_HOST_IP
value: faqgen-llm-uservice
- name: http_proxy
value: ""
- name: https_proxy
value: ""
- name: no_proxy
value: ""
securityContext: null
image: "opea/faqgen:latest"
imagePullPolicy: IfNotPresent
ports:
- name: faqgen
containerPort: 8888
protocol: TCP
resources: null

View File

@@ -1,66 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: keycloak
spec:
progressDeadlineSeconds: 600
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
app: keycloak
template:
metadata:
labels:
app: keycloak
spec:
containers:
- args:
- start-dev
env:
- name: KEYCLOAK_ADMIN
value: admin
- name: KEYCLOAK_ADMIN_PASSWORD
value: admin
- name: KC_PROXY
value: edge
image: quay.io/keycloak/keycloak:25.0.2
imagePullPolicy: IfNotPresent
name: keycloak
ports:
- containerPort: 8080
name: http
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /realms/master
port: 8080
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
dnsPolicy: ClusterFirst
restartPolicy: Always
---
apiVersion: v1
kind: Service
metadata:
name: keycloak
spec:
allocateLoadBalancerNodePorts: true
ports:
- name: http
nodePort: 31503
port: 8080
protocol: TCP
targetPort: 8080
selector:
app: keycloak
type: LoadBalancer

View File

@@ -1,71 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
---
apiVersion: v1
kind: ConfigMap
metadata:
name: mongo-config
data:
http_proxy: ""
https_proxy: ""
no_proxy: ""
---
apiVersion: v1
kind: Service
metadata:
name: mongo
labels:
helm.sh/chart: mongo-0.1.0
app.kubernetes.io/name: mongo
app.kubernetes.io/instance: mongo
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 27017
targetPort: 27017
protocol: TCP
name: mongo
selector:
app.kubernetes.io/name: mongo
app.kubernetes.io/instance: mongo
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: mongo
labels:
helm.sh/chart: mongo-0.1.0
app.kubernetes.io/name: mongo
app.kubernetes.io/instance: mongo
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: mongo
app.kubernetes.io/instance: mongo
template:
metadata:
labels:
app.kubernetes.io/name: mongo
app.kubernetes.io/instance: mongo
spec:
securityContext: null
containers:
- name: mongo
envFrom:
- configMapRef:
name: mongo-config
securityContext: null
image: "mongo:7.0.11"
imagePullPolicy: IfNotPresent
ports:
- name: mongo
containerPort: 27017
protocol: TCP
resources: null
command: ["mongod", "--bind_ip", "0.0.0.0", "--quiet", "--logpath", "/dev/null"]

View File

@@ -1,91 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
---
apiVersion: v1
kind: Service
metadata:
name: productivity-suite-react-ui
labels:
helm.sh/chart: productivity-suite-react-ui-0.1.0
app.kubernetes.io/name: react-ui
app.kubernetes.io/instance: productivity-suite
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 80
protocol: TCP
name: react-ui
selector:
app.kubernetes.io/name: react-ui
app.kubernetes.io/instance: productivity-suite
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: productivity-suite-react-ui
labels:
helm.sh/chart: productivity-suite-react-ui-0.1.0
app.kubernetes.io/name: react-ui
app.kubernetes.io/instance: productivity-suite
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: react-ui
app.kubernetes.io/instance: productivity-suite
template:
metadata:
labels:
app.kubernetes.io/name: react-ui
app.kubernetes.io/instance: productivity-suite
spec:
securityContext: null
containers:
- name: productivity-suite-react-ui
env:
- name: http_proxy
value: ""
- name: https_proxy
value: ""
- name: no_proxy
value: ""
- name: APP_BACKEND_SERVICE_ENDPOINT_CHATQNA
value: ""
- name: APP_BACKEND_SERVICE_ENDPOINT_CODEGEN
value: ""
- name: APP_BACKEND_SERVICE_ENDPOINT_DOCSUM
value: ""
- name: APP_BACKEND_SERVICE_ENDPOINT_FAQGEN
value: ""
- name: APP_DATAPREP_SERVICE_ENDPOINT
value: ""
- name: APP_DATAPREP_GET_FILE_ENDPOINT
value: ""
- name: APP_DATAPREP_DELETE_FILE_ENDPOINT
value: ""
- name: APP_CHAT_HISTORY_CREATE_ENDPOINT
value: ""
- name: APP_CHAT_HISTORY_DELETE_ENDPOINT
value: ""
- name: APP_CHAT_HISTORY_GET_ENDPOINT
value: ""
- name: APP_PROMPT_SERVICE_GET_ENDPOINT
value: ""
- name: APP_PROMPT_SERVICE_CREATE_ENDPOINT
value: ""
- name: APP_KEYCLOAK_SERVICE_ENDPOINT
value: ""
securityContext: null
image: "opea/productivity-suite-react-ui-server:latest"
imagePullPolicy: IfNotPresent
ports:
- name: react-ui
containerPort: 80
protocol: TCP
resources: null

View File

@@ -1,75 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
---
apiVersion: v1
kind: ConfigMap
metadata:
name: prompt-registry-config
data:
http_proxy: ""
https_proxy: ""
no_proxy: ""
MONGO_HOST: "mongo"
MONGO_PORT: "27017"
DB_NAME: "OPEA"
COLLECTION_NAME: "Prompt"
---
apiVersion: v1
kind: Service
metadata:
name: prompt-registry
labels:
helm.sh/chart: prompt-registry-0.1.0
app.kubernetes.io/name: prompt-registry
app.kubernetes.io/instance: prompt-registry
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 6018
targetPort: 6018
protocol: TCP
name: prompt-registry
selector:
app.kubernetes.io/name: prompt-registry
app.kubernetes.io/instance: prompt-registry
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prompt-registry
labels:
helm.sh/chart: prompt-registry-0.1.0
app.kubernetes.io/name: prompt-registry
app.kubernetes.io/instance: prompt-registry
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: prompt-registry
app.kubernetes.io/instance: prompt-registry
template:
metadata:
labels:
app.kubernetes.io/name: prompt-registry
app.kubernetes.io/instance: prompt-registry
spec:
securityContext: null
containers:
- name: prompt-registry
envFrom:
- configMapRef:
name: prompt-registry-config
securityContext: null
image: "opea/promptregistry-mongo-server:latest"
imagePullPolicy: IfNotPresent
ports:
- name: prompt-registry
containerPort: 6018
protocol: TCP
resources: null
---

View File

@@ -1,157 +0,0 @@
set_model_id() {
if [ -z "$1" ] && [ -z "$2" ]; then
yq -o json '.| select(.data | has("MODEL_ID"))| {"ConfigMap": .metadata.name, "MODEL_ID": .data.MODEL_ID}' *.yaml
echo "usage:"
echo " set_model_id \${ConfigMap} \${MODEL_ID}"
return
fi
conf=$1
file=${1%%-*}
sed -i '/name: '"${conf}"'/,/---/s|\(MODEL_ID:\).*|\1 "'"${2}"'"|' ${file}.yaml
}
set_model_mirror() {
if [ -z "$1" ] ; then
yq -o json '.| select(.data | has("MODEL_ID"))| {"ConfigMap": .metadata.name, "MODEL_MIRROR": .data.HF_ENDPOINT}' *.yaml
echo "usage:"
echo " set_model_mirror \${MODEL_MIRROR}"
return
fi
cm=$(yq -r -o json '.| select(.data | has("MODEL_ID"))| .metadata.name' *.yaml)
mirror=$1
for i in $cm; do
conf=$i
file=${i%%-*}
echo "ConfigMap: $conf set mirror as $mirror"
has_mirror=$(yq -r -o json '.| select(.metadata.name == "'"${conf}"'")| .data.HF_ENDPOINT' ${file}.yaml)
if [ "$has_mirror" == "null" ]; then
sed -i '/name: '"${conf}"'/,/---/s|\(data:\)|\1\n HF_ENDPOINT: "'"${mirror}"'"|' ${file}.yaml
else
sed -i '/name: '"${conf}"'/,/---/s|\(HF_ENDPOINT:\).*|\1 "'"${1}"'"|' ${file}.yaml
fi
done
}
set_hf_token() {
if [ -z "$1" ] ; then
echo "usage:"
echo " set_hf_token \${HF_TOKEN}"
return
fi
sed -i "s/\(HF_TOKEN:\).*/\1 \"${1}\"/g" *.yaml
sed -i "s/\(HUGGINGFACEHUB_API_TOKEN:\).*/\1 \"${1}\"/g" *.yaml
sed -i "s/\(HUGGING_FACE_HUB_TOKEN:\).*/\1 \"${1}\"/g" *.yaml
}
set_https_proxy() {
if [ -z "$1" ] ; then
echo "usage:"
echo " set_https_proxy \${https_proxy}"
return
fi
https_proxy=$1
sed -i -e "s|\(https_proxy:\)\s*\"\"|\1 \"$https_proxy\"|g" *.yaml
sed -i '/https_proxy/{n;s|\(value:\)\s.*""|\1 "'"$https_proxy"'"|g}' *.yaml
}
set_http_proxy() {
if [ -z "$1" ] ; then
echo "usage:"
echo " set_http_proxy \${http_proxy}"
return
fi
http_proxy=$1
sed -i -e "s|\(http_proxy:\)\s*\"\"|\1 \"$http_proxy\"|g" *.yaml
sed -i '/http_proxy/{n;s|\(value:\)\s.*""|\1 "'"$http_proxy"'"|g}' *.yaml
}
set_no_proxy() {
if [ -z "$1" ] ; then
echo "usage:"
echo " set_no_proxy \${no_proxy}"
return
fi
no_proxy=$1
sed -i -e "s|\(no_proxy:\)\s*\"\"|\1 \"$no_proxy\"|g" *.yaml
sed -i '/no_proxy/{n;s|\(value:\)\s.*""|\1 "'"$no_proxy"'"|g}' *.yaml
}
set_backend_service_endpoint() {
for i in $(grep -oP "(?<=APP_BACKEND_SERVICE_ENDPOINT_).*" *.yaml); do
echo $i
name=${i##*:}
file=${name,,}.yaml
svc=$(yq -o json '. | select(.metadata.name == "'"${name,,}"'" and .kind=="Service")' $file)
port=$(jq .spec.ports[0].port <<< $svc)
url=http://${name,,}.${ns:-default}.svc.cluster.local:${port}
echo $url
sed -i -e '/APP_BACKEND_SERVICE_ENDPOINT_'"$name"'/{n;s|\(value:\)\s.*|\1 "'"$url"'"|}' productivity_suite_reactui.yaml
done
}
set_dataprep_service_endpoint() {
name=chatqna-data-prep
file=chatqna.yaml
svc=$(yq -o json '. | select(.metadata.name == "'"$name"'" and .kind=="Service")' $file)
port=$(jq .spec.ports[0].port <<< $svc)
url=http://${name}.${ns:-default}.svc.cluster.local:${port}
echo $url
for i in $(grep -oP "(?<=APP_)DATAPREP.*(?=_ENDPOINT)" *.yaml); do
echo $i
curd=${i##*:};
sed -i -e '/'"$curd"'/{n;s|\(value:\)\s.*|\1 "'"$url"'"|}' productivity_suite_reactui.yaml;
done
}
set_chat_history_endpoint() {
for i in $(grep -oP "(?<=APP_)CHAT_HISTORY.*(?=_ENDPOINT)" *.yaml); do
echo $i;
curd=${i##*:};
name=${curd%_*};
file=${name,,}.yaml;
name=${name/_/-};
svc=$(yq -o json '. | select(.metadata.name == "'"${name,,}"'" and .kind=="Service")' $file)
port=$(jq .spec.ports[0].port <<< $svc)
url=http://${name,,}.${ns:-default}.svc.cluster.local:${port};
echo $url;
sed -i -e '/'"$curd"'/{n;s|\(value:\)\s.*|\1 "'"$url"'"|}' productivity_suite_reactui.yaml;
done
}
set_prompt_service_endpoint() {
for i in $(grep -oP "(?<=APP_)PROMPT_SERVICE.*(?=_ENDPOINT)" *.yaml); do
echo $i;
curd=${i##*:};
curdr=${curd/SERVICE/REGISTRY};
name=${curdr%_*};
file=${name,,}.yaml;
name=${name/_/-};
svc=$(yq -o json '. | select(.metadata.name == "'"${name,,}"'" and .kind=="Service")' $file)
port=$(jq .spec.ports[0].port <<< $svc)
url=http://${name,,}.${ns:-default}.svc.cluster.local:${port};
echo $url;
sed -i -e '/'"$curd"'/{n;s|\(value:\)\s.*|\1 "'"$url"'"|}' productivity_suite_reactui.yaml ;
done
}
set_keycloak_service_endpoint() {
name=keycloak
file=keycloak_install.yaml
svc=$(yq -o json '. | select(.metadata.name == "'"$name"'" and .kind=="Service")' $file)
port=$(jq .spec.ports[0].port <<< $svc)
url=http://${name}.${ns:-default}.svc.cluster.local:${port}
echo $url
sed -i -e '/APP_KEYCLOAK_SERVICE_ENDPOINT/{n;s|\(value:\)\s.*|\1 "'"$url"'"|}' productivity_suite_reactui.yaml
}
set_services_endpoint() {
set_backend_service_endpoint
set_keycloak_service_endpoint
set_chat_history_endpoint
set_prompt_service_endpoint
set_dataprep_service_endpoint
}

View File

@@ -0,0 +1,69 @@
# ChatQnA Benchmarking
## Purpose
We aim to run these benchmarks and share them with the OPEA community for three primary reasons:
- To offer insights on inference throughput in real-world scenarios, helping you choose the best service or deployment for your needs.
- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading LLMs, serving frameworks etc.
## Table of Contents
- [Prerequisites](#prerequisites)
- [Overview](#overview)
- [Using deploy_and_benchmark.py](#using-deploy_and_benchmark.py-recommended)
- [Data Preparation](#data-preparation)
- [Configuration](#configuration)
## Prerequisites
Before running the benchmarks, ensure you have:
1. **Kubernetes Environment**
- Kubernetes installation: Use [kubespray](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md) or other official Kubernetes installation guides
- (Optional) [Kubernetes set up guide on Intel Gaudi product](https://github.com/opea-project/GenAIInfra/blob/main/README.md#setup-kubernetes-cluster)
2. **Configuration YAML**
The configuration file (e.g., `./ChatQnA/benchmark_chatqna.yaml`) consists of two main sections: deployment and benchmarking. Required fields must be filled with valid values (like the Hugging Face token). For all other fields, you can either customize them according to your needs or leave them empty ("") to use the default values from the [helm charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts).
## Data Preparation
Before running benchmarks, you need to:
1. **Prepare Test Data**
- Download the retrieval file:
```bash
wget https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark/data/upload_file.txt
```
- For the `chatqna_qlist_pubmed` test case, prepare `pubmed_${max_lines}.txt` by following this [README](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/stresscli/README_Pubmed_qlist.md)
2. **Prepare Model Files (Recommended)**
```bash
pip install -U "huggingface_hub[cli]"
sudo mkdir -p /mnt/models
sudo chmod 777 /mnt/models
huggingface-cli download --cache-dir /mnt/models Intel/neural-chat-7b-v3-3
```
## Overview
The benchmarking process consists of two main components: deployment and benchmarking. We provide `deploy_and_benchmark.py` as a unified entry point that combines both steps.
### Using deploy_and_benchmark.py (Recommended)
The script `deploy_and_benchmark.py` serves as the main entry point. Here's an example using ChatQnA configuration (you can replace it with any other example's configuration YAML file):
1. For a specific number of nodes:
```bash
python deploy_and_benchmark.py ./ChatQnA/benchmark_chatqna.yaml --target-node 1
```
2. For all node configurations:
```bash
python deploy_and_benchmark.py ./ChatQnA/benchmark_chatqna.yaml
```
This will iterate through the node list in your configuration YAML file, performing deployment and benchmarking for each node count.

View File

@@ -1,41 +0,0 @@
# Deploy Translation in Kubernetes Cluster
> [NOTE]
> The following values must be set before you can deploy:
> HUGGINGFACEHUB_API_TOKEN
>
> You can also customize the "MODEL_ID" if needed.
>
> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the Translation workload is running. Otherwise, you need to modify the `translation.yaml` file to change the `model-volume` to a directory that exists on the node.
## Deploy On Xeon
```
cd GenAIExamples/Translation/kubernetes/intel/cpu/xeon/manifest
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" translation.yaml
kubectl apply -f translation.yaml
```
## Deploy On Gaudi
```
cd GenAIExamples/Translation/kubernetes/intel/hpu/gaudi/manifest
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" translation.yaml
kubectl apply -f translation.yaml
```
## Verify Services
To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
Then run the command `kubectl port-forward svc/translation 8888:8888` to expose the Translation service for access.
Open another terminal and run the following command to verify the service if working:
```console
curl http://localhost:8888/v1/translation \
-H 'Content-Type: application/json' \
-d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
```

View File

@@ -1,495 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: translation-tgi-config
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "2.1.0"
data:
MODEL_ID: "haoranxu/ALMA-13B"
PORT: "2080"
HF_TOKEN: "insert-your-huggingface-token-here"
http_proxy: ""
https_proxy: ""
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
HF_HOME: "/tmp/.cache/huggingface"
CUDA_GRAPHS: "0"
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: translation-llm-uservice-config
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
data:
TGI_LLM_ENDPOINT: "http://translation-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: translation-ui-config
labels:
app.kubernetes.io/name: translation-ui
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
data:
BASE_URL: "/v1/translation"
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
default.conf: |+
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
server {
listen 80;
listen [::]:80;
location /home {
alias /usr/share/nginx/html/index.html;
}
location / {
proxy_pass http://translation-ui:5173;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
location /v1/translation {
proxy_pass http://translation:8888;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
}
kind: ConfigMap
metadata:
name: translation-nginx-config
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: translation-ui
labels:
app.kubernetes.io/name: translation-ui
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
spec:
type: ClusterIP
ports:
- port: 5173
targetPort: ui
protocol: TCP
name: ui
selector:
app.kubernetes.io/name: translation-ui
app.kubernetes.io/instance: translation
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: translation-llm-uservice
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: translation
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: translation-tgi
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "2.1.0"
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 2080
protocol: TCP
name: tgi
selector:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: translation
---
apiVersion: v1
kind: Service
metadata:
name: translation-nginx
spec:
ports:
- port: 80
protocol: TCP
targetPort: 80
selector:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app: translation-nginx
type: NodePort
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: translation
labels:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
spec:
type: ClusterIP
ports:
- port: 8888
targetPort: 8888
protocol: TCP
name: translation
selector:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app: translation
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: translation-ui
labels:
app.kubernetes.io/name: translation-ui
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: translation-ui
app.kubernetes.io/instance: translation
template:
metadata:
labels:
app.kubernetes.io/name: translation-ui
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
spec:
securityContext:
{}
containers:
- name: translation-ui
envFrom:
- configMapRef:
name: translation-ui-config
securityContext:
{}
image: "opea/translation-ui:latest"
imagePullPolicy: IfNotPresent
ports:
- name: ui
containerPort: 80
protocol: TCP
resources:
{}
volumeMounts:
- mountPath: /tmp
name: tmp
volumes:
- name: tmp
emptyDir: {}
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: translation-llm-uservice
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: translation
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: translation
spec:
securityContext:
{}
containers:
- name: translation
envFrom:
- configMapRef:
name: translation-llm-uservice-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-textgen:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: translation-tgi
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "2.1.0"
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: translation
template:
metadata:
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: translation
spec:
securityContext:
{}
containers:
- name: tgi
envFrom:
- configMapRef:
name: translation-tgi-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /tmp
name: tmp
ports:
- name: http
containerPort: 2080
protocol: TCP
livenessProbe:
failureThreshold: 24
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
startupProbe:
failureThreshold: 120
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
resources:
{}
volumes:
- name: model-volume
emptyDir: {}
- name: tmp
emptyDir: {}
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: translation
labels:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
app: translation
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app: translation
template:
metadata:
labels:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app: translation
spec:
securityContext:
null
containers:
- name: translation
env:
- name: LLM_SERVICE_HOST_IP
value: translation-llm-uservice
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/translation:latest"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /tmp
name: tmp
ports:
- name: translation
containerPort: 8888
protocol: TCP
resources:
null
volumes:
- name: tmp
emptyDir: {}
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: translation-nginx
labels:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
app: translation-nginx
spec:
selector:
matchLabels:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app: translation-nginx
template:
metadata:
labels:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app: translation-nginx
spec:
containers:
- image: nginx:1.27.1
imagePullPolicy: IfNotPresent
name: nginx
volumeMounts:
- mountPath: /etc/nginx/conf.d
name: nginx-config-volume
securityContext: {}
volumes:
- configMap:
defaultMode: 420
name: translation-nginx-config
name: nginx-config-volume

View File

@@ -1,497 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: translation-tgi-config
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "2.1.0"
data:
MODEL_ID: "haoranxu/ALMA-13B"
PORT: "2080"
HF_TOKEN: "insert-your-huggingface-token-here"
http_proxy: ""
https_proxy: ""
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
HF_HOME: "/tmp/.cache/huggingface"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: translation-llm-uservice-config
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
data:
TGI_LLM_ENDPOINT: "http://translation-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: translation-ui-config
labels:
app.kubernetes.io/name: translation-ui
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
data:
BASE_URL: "/v1/translation"
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
default.conf: |+
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
server {
listen 80;
listen [::]:80;
location /home {
alias /usr/share/nginx/html/index.html;
}
location / {
proxy_pass http://translation-ui:5173;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
location /v1/translation {
proxy_pass http://translation;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
}
kind: ConfigMap
metadata:
name: translation-nginx-config
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: translation-ui
labels:
app.kubernetes.io/name: translation-ui
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
spec:
type: ClusterIP
ports:
- port: 5173
targetPort: ui
protocol: TCP
name: ui
selector:
app.kubernetes.io/name: translation-ui
app.kubernetes.io/instance: translation
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: translation-llm-uservice
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: translation
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: translation-tgi
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "2.1.0"
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 2080
protocol: TCP
name: tgi
selector:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: translation
---
apiVersion: v1
kind: Service
metadata:
name: translation-nginx
spec:
ports:
- port: 80
protocol: TCP
targetPort: 80
selector:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app: translation-nginx
type: NodePort
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: translation
labels:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
spec:
type: ClusterIP
ports:
- port: 8888
targetPort: 8888
protocol: TCP
name: translation
selector:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app: translation
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: translation-ui
labels:
app.kubernetes.io/name: translation-ui
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: translation-ui
app.kubernetes.io/instance: translation
template:
metadata:
labels:
app.kubernetes.io/name: translation-ui
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
spec:
securityContext:
{}
containers:
- name: translation-ui
envFrom:
- configMapRef:
name: translation-ui-config
securityContext:
{}
image: "opea/translation-ui:latest"
imagePullPolicy: IfNotPresent
ports:
- name: ui
containerPort: 80
protocol: TCP
resources:
{}
volumeMounts:
- mountPath: /tmp
name: tmp
volumes:
- name: tmp
emptyDir: {}
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: translation-llm-uservice
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: translation
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: translation
spec:
securityContext:
{}
containers:
- name: translation
envFrom:
- configMapRef:
name: translation-llm-uservice-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-textgen:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: translation-tgi
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "2.1.0"
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: translation
template:
metadata:
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: translation
spec:
securityContext:
{}
containers:
- name: tgi
envFrom:
- configMapRef:
name: translation-tgi-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /tmp
name: tmp
ports:
- name: http
containerPort: 2080
protocol: TCP
livenessProbe:
failureThreshold: 24
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
startupProbe:
failureThreshold: 120
initialDelaySeconds: 20
periodSeconds: 5
tcpSocket:
port: http
resources:
limits:
habana.ai/gaudi: 1
volumes:
- name: model-volume
emptyDir: {}
- name: tmp
emptyDir: {}
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: translation
labels:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
app: translation
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app: translation
template:
metadata:
labels:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app: translation
spec:
securityContext:
null
containers:
- name: translation
env:
- name: LLM_SERVICE_HOST_IP
value: translation-llm-uservice
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/translation:latest"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /tmp
name: tmp
ports:
- name: translation
containerPort: 8888
protocol: TCP
resources:
null
volumes:
- name: tmp
emptyDir: {}
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: translation-nginx
labels:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "v1.0"
app: translation-nginx
spec:
selector:
matchLabels:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app: translation-nginx
template:
metadata:
labels:
app.kubernetes.io/name: translation
app.kubernetes.io/instance: translation
app: translation-nginx
spec:
containers:
- image: nginx:1.27.1
imagePullPolicy: IfNotPresent
name: nginx
volumeMounts:
- mountPath: /etc/nginx/conf.d
name: nginx-config-volume
securityContext: {}
volumes:
- configMap:
defaultMode: 420
name: translation-nginx-config
name: nginx-config-volume

343
benchmark.py Normal file
View File

@@ -0,0 +1,343 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
import sys
from datetime import datetime
import yaml
from evals.benchmark.stresscli.commands.load_test import locust_runtests
from kubernetes import client, config
# only support chatqna for now
service_endpoints = {
"chatqna": "/v1/chatqna",
}
def load_yaml(file_path):
with open(file_path, "r") as f:
data = yaml.safe_load(f)
return data
def construct_benchmark_config(test_suite_config):
"""Extract relevant data from the YAML based on the specified test cases."""
return {
"concurrency": test_suite_config.get("concurrency", []),
"totoal_query_num": test_suite_config.get("user_queries", []),
"duration:": test_suite_config.get("duration:", []),
"query_num_per_concurrency": test_suite_config.get("query_num_per_concurrency", []),
"possion": test_suite_config.get("possion", False),
"possion_arrival_rate": test_suite_config.get("possion_arrival_rate", 1.0),
"warmup_iterations": test_suite_config.get("warmup_iterations", 10),
"seed": test_suite_config.get("seed", None),
"test_cases": test_suite_config.get("test_cases", ["chatqnafixed"]),
"user_queries": test_suite_config.get("user_queries", [1]),
"query_token_size": test_suite_config.get("query_token_size", 128),
"llm_max_token_size": test_suite_config.get("llm", {}).get("max_token_size", [128]),
}
def _get_cluster_ip(service_name, namespace="default"):
"""Get the Cluster IP of a service in a Kubernetes cluster."""
# Load the Kubernetes configuration
config.load_kube_config() # or use config.load_incluster_config() if running inside a Kubernetes pod
# Create an API client for the core API (which handles services)
v1 = client.CoreV1Api()
try:
# Get the service object
service = v1.read_namespaced_service(name=service_name, namespace=namespace)
# Extract the Cluster IP
cluster_ip = service.spec.cluster_ip
# Extract the port number (assuming the first port, modify if necessary)
if service.spec.ports:
port_number = service.spec.ports[0].port # Get the first port number
else:
port_number = None
return cluster_ip, port_number
except client.exceptions.ApiException as e:
print(f"Error fetching service: {e}")
return None
def _get_service_ip(service_name, deployment_type="k8s", service_ip=None, service_port=None, namespace="default"):
"""Get the service IP and port based on the deployment type.
Args:
service_name (str): The name of the service.
deployment_type (str): The type of deployment ("k8s" or "docker").
service_ip (str): The IP address of the service (required for Docker deployment).
service_port (int): The port of the service (required for Docker deployment).
namespace (str): The namespace of the service (default is "default").
Returns:
(str, int): The service IP and port.
"""
if deployment_type == "k8s":
# Kubernetes IP and port retrieval logic
svc_ip, port = _get_cluster_ip(service_name, namespace)
elif deployment_type == "docker":
# For Docker deployment, service_ip and service_port must be specified
if not service_ip or not service_port:
raise ValueError(
"For Docker deployment, service_ip and service_port must be provided in the configuration."
)
svc_ip = service_ip
port = service_port
else:
raise ValueError("Unsupported deployment type. Use 'k8s' or 'docker'.")
return svc_ip, port
def _create_yaml_content(service, base_url, bench_target, test_phase, num_queries, test_params):
"""Create content for the run.yaml file."""
# If a load shape includes the parameter concurrent_level,
# the parameter will be passed to Locust to launch fixed
# number of simulated users.
concurrency = 1
if num_queries >= 0:
concurrency = max(1, num_queries // test_params["concurrent_level"])
else:
concurrency = test_params["concurrent_level"]
import importlib.util
package_name = "opea-eval"
spec = importlib.util.find_spec(package_name)
print(spec)
# get folder path of opea-eval
eval_path = None
import pkg_resources
for dist in pkg_resources.working_set:
if "opea-eval" in dist.project_name:
eval_path = dist.location
if not eval_path:
print("Fail to load opea-eval package. Please install it first.")
exit(1)
yaml_content = {
"profile": {
"storage": {"hostpath": test_params["test_output_dir"]},
"global-settings": {
"tool": "locust",
"locustfile": os.path.join(eval_path, "evals/benchmark/stresscli/locust/aistress.py"),
"host": base_url,
"stop-timeout": test_params["query_timeout"],
"processes": 2,
"namespace": test_params["namespace"],
"bench-target": bench_target,
"service-metric-collect": test_params["collect_service_metric"],
"service-list": service.get("service_list", []),
"dataset": service.get("dataset", "default"),
"prompts": service.get("prompts", None),
"max-output": service.get("max_output", 128),
"seed": test_params.get("seed", None),
"llm-model": test_params["llm_model"],
"deployment-type": test_params["deployment_type"],
"load-shape": test_params["load_shape"],
},
"runs": [{"name": test_phase, "users": concurrency, "max-request": num_queries}],
}
}
# For the following scenarios, test will stop after the specified run-time
if test_params["run_time"] is not None and test_phase != "warmup":
yaml_content["profile"]["global-settings"]["run-time"] = test_params["run_time"]
return yaml_content
def _create_stresscli_confs(case_params, test_params, test_phase, num_queries, base_url, ts) -> str:
"""Create a stresscli configuration file and persist it on disk."""
stresscli_confs = []
# Get the workload
test_cases = test_params["test_cases"]
for test_case in test_cases:
stresscli_conf = {}
print(test_case)
if isinstance(test_case, str):
bench_target = test_case
elif isinstance(test_case, dict):
bench_target = list(test_case.keys())[0]
dataset_conf = test_case[bench_target]
if bench_target == "chatqna_qlist_pubmed":
max_lines = dataset_conf["dataset"].split("pub_med")[-1]
stresscli_conf["envs"] = {"DATASET": f"pubmed_{max_lines}.txt", "MAX_LINES": max_lines}
# Generate the content of stresscli configuration file
stresscli_yaml = _create_yaml_content(case_params, base_url, bench_target, test_phase, num_queries, test_params)
# Dump the stresscli configuration file
service_name = case_params.get("service_name")
run_yaml_path = os.path.join(
test_params["test_output_dir"], f"run_{service_name}_{ts}_{test_phase}_{num_queries}_{bench_target}.yaml"
)
with open(run_yaml_path, "w") as yaml_file:
yaml.dump(stresscli_yaml, yaml_file)
stresscli_conf["run_yaml_path"] = run_yaml_path
stresscli_confs.append(stresscli_conf)
return stresscli_confs
def create_stresscli_confs(service, base_url, test_suite_config, index):
"""Create and save the run.yaml file for the service being tested."""
os.makedirs(test_suite_config["test_output_dir"], exist_ok=True)
stresscli_confs = []
# Add YAML configuration of stresscli for warm-ups
warm_ups = test_suite_config["warm_ups"]
if warm_ups is not None and warm_ups > 0:
stresscli_confs.extend(_create_stresscli_confs(service, test_suite_config, "warmup", warm_ups, base_url, index))
# Add YAML configuration of stresscli for benchmark
user_queries_lst = test_suite_config["user_queries"]
if user_queries_lst is None or len(user_queries_lst) == 0:
# Test stop is controlled by run time
stresscli_confs.extend(_create_stresscli_confs(service, test_suite_config, "benchmark", -1, base_url, index))
else:
# Test stop is controlled by request count
for user_queries in user_queries_lst:
stresscli_confs.extend(
_create_stresscli_confs(service, test_suite_config, "benchmark", user_queries, base_url, index)
)
return stresscli_confs
def _run_service_test(example, service, test_suite_config):
"""Run the test for a specific service and example."""
print(f"[OPEA BENCHMARK] 🚀 Example: [ {example} ] Service: [ {service.get('service_name')} ], Running test...")
# Get the service name
service_name = service.get("service_name")
# Get the deployment type from the test suite configuration
deployment_type = test_suite_config.get("deployment_type", "k8s")
# Get the service IP and port based on deployment type
svc_ip, port = _get_service_ip(
service_name,
deployment_type,
test_suite_config.get("service_ip"),
test_suite_config.get("service_port"),
test_suite_config.get("namespace"),
)
base_url = f"http://{svc_ip}:{port}"
endpoint = service_endpoints[example]
url = f"{base_url}{endpoint}"
print(f"[OPEA BENCHMARK] 🚀 Running test for {service_name} at {url}")
# Generate a unique index based on the current time
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# Create the run.yaml for the service
stresscli_confs = create_stresscli_confs(service, base_url, test_suite_config, timestamp)
# Do benchmark in for-loop for different user queries
output_folders = []
for index, stresscli_conf in enumerate(stresscli_confs, start=1):
run_yaml_path = stresscli_conf["run_yaml_path"]
print(f"[OPEA BENCHMARK] 🚀 The {index} time test is running, run yaml: {run_yaml_path}...")
os.environ["MAX_TOKENS"] = str(service.get("max_output"))
if stresscli_conf.get("envs") is not None:
for key, value in stresscli_conf.get("envs").items():
os.environ[key] = value
output_folders.append(locust_runtests(None, run_yaml_path))
print(f"[OPEA BENCHMARK] 🚀 Test completed for {service_name} at {url}")
return output_folders
def run_benchmark(benchmark_config, chart_name, namespace, llm_model=None, report=False):
# If llm_model is None or an empty string, set to default value
if not llm_model:
llm_model = "Qwen/Qwen2.5-Coder-7B-Instruct"
# Extract data
parsed_data = construct_benchmark_config(benchmark_config)
test_suite_config = {
"user_queries": parsed_data["user_queries"], # num of user queries
"random_prompt": False, # whether to use random prompt, set to False by default
"run_time": "60m", # The max total run time for the test suite, set to 60m by default
"collect_service_metric": False, # whether to collect service metrics, set to False by default
"llm_model": llm_model, # The LLM model used for the test
"deployment_type": "k8s", # Default is "k8s", can also be "docker"
"service_ip": None, # Leave as None for k8s, specify for Docker
"service_port": None, # Leave as None for k8s, specify for Docker
"test_output_dir": os.getcwd() + "/benchmark_output", # The directory to store the test output
"load_shape": {
"name": "constant",
"params": {"constant": {"concurrent_level": 4}, "poisson": {"arrival_rate": 1.0}},
},
"concurrent_level": 4,
"arrival_rate": 1.0,
"query_timeout": 120,
"warm_ups": parsed_data["warmup_iterations"],
"seed": parsed_data["seed"],
"namespace": namespace,
"test_cases": parsed_data["test_cases"],
"llm_max_token_size": parsed_data["llm_max_token_size"],
}
dataset = None
query_data = None
# Do benchmark in for-loop for different llm_max_token_size
for llm_max_token in parsed_data["llm_max_token_size"]:
print(f"[OPEA BENCHMARK] 🚀 Run benchmark on {dataset} with llm max-output-token {llm_max_token}.")
case_data = {}
# Support chatqna only for now
if chart_name == "chatqna":
case_data = {
"run_test": True,
"service_name": "chatqna",
"service_list": [
"chatqna",
"chatqna-chatqna-ui",
"chatqna-data-prep",
"chatqna-nginx",
"chatqna-redis-vector-db",
"chatqna-retriever-usvc",
"chatqna-tei",
"chatqna-teirerank",
"chatqna-tgi",
],
"test_cases": parsed_data["test_cases"],
# Activate if random_prompt=true: leave blank = default dataset(WebQuestions) or sharegpt
"prompts": query_data,
"max_output": llm_max_token, # max number of output tokens
"k": 1, # number of retrieved documents
}
output_folder = _run_service_test(chart_name, case_data, test_suite_config)
print(f"[OPEA BENCHMARK] 🚀 Test Finished. Output saved in {output_folder}.")
if report:
print(output_folder)
all_results = dict()
for folder in output_folder:
from evals.benchmark.stresscli.commands.report import get_report_results
results = get_report_results(folder)
all_results[folder] = results
print(f"results = {results}\n")
return all_results
if __name__ == "__main__":
benchmark_config = load_yaml("./benchmark.yaml")
run_benchmark(benchmark_config=benchmark_config, chart_name="chatqna", namespace="deploy-benchmark")

674
deploy.py Normal file
View File

@@ -0,0 +1,674 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import argparse
import glob
import json
import os
import subprocess
import sys
import time
from enum import Enum, auto
import yaml
################################################################################
# #
# HELM VALUES GENERATION SECTION #
# #
################################################################################
def configure_node_selectors(values, node_selector, deploy_config):
"""Configure node selectors for all services."""
for service_name, config in deploy_config["services"].items():
if service_name == "backend":
values["nodeSelector"] = {key: value for key, value in node_selector.items()}
elif service_name == "llm":
engine = config.get("engine", "tgi")
values[engine] = {"nodeSelector": {key: value for key, value in node_selector.items()}}
else:
values[service_name] = {"nodeSelector": {key: value for key, value in node_selector.items()}}
return values
def configure_replica(values, deploy_config):
"""Get replica configuration based on example type and node count."""
for service_name, config in deploy_config["services"].items():
if not config.get("replicaCount"):
continue
if service_name == "llm":
engine = config.get("engine", "tgi")
values[engine]["replicaCount"] = config["replicaCount"]
elif service_name == "backend":
values["replicaCount"] = config["replicaCount"]
else:
values[service_name]["replicaCount"] = config["replicaCount"]
return values
def get_output_filename(num_nodes, with_rerank, example_type, device, action_type):
"""Generate output filename based on configuration."""
rerank_suffix = "with-rerank-" if with_rerank else ""
action_suffix = "deploy-" if action_type == 0 else "update-" if action_type == 1 else ""
return f"{example_type}-{num_nodes}-{device}-{action_suffix}{rerank_suffix}values.yaml"
def configure_resources(values, deploy_config):
"""Configure resources when tuning is enabled."""
resource_configs = []
for service_name, config in deploy_config["services"].items():
resources = {}
if deploy_config["device"] == "gaudi" and config.get("cards_per_instance", 0) > 1:
resources = {
"limits": {"habana.ai/gaudi": config["cards_per_instance"]},
"requests": {"habana.ai/gaudi": config["cards_per_instance"]},
}
else:
limits = {}
requests = {}
# Only add CPU if cores_per_instance has a value
if config.get("cores_per_instance"):
limits["cpu"] = config["cores_per_instance"]
requests["cpu"] = config["cores_per_instance"]
# Only add memory if memory_capacity has a value
if config.get("memory_capacity"):
limits["memory"] = config["memory_capacity"]
requests["memory"] = config["memory_capacity"]
# Only create resources if we have any limits/requests
if limits and requests:
resources["limits"] = limits
resources["requests"] = requests
if resources:
if service_name == "llm":
engine = config.get("engine", "tgi")
resource_configs.append(
{
"name": engine,
"resources": resources,
}
)
else:
resource_configs.append(
{
"name": service_name,
"resources": resources,
}
)
for config in [r for r in resource_configs if r]:
service_name = config["name"]
if service_name == "backend":
values["resources"] = config["resources"]
elif service_name in values:
values[service_name]["resources"] = config["resources"]
return values
def configure_extra_cmd_args(values, deploy_config):
"""Configure extra command line arguments for services."""
for service_name, config in deploy_config["services"].items():
extra_cmd_args = []
for param in [
"max_batch_size",
"max_input_length",
"max_total_tokens",
"max_batch_total_tokens",
"max_batch_prefill_tokens",
]:
if config.get(param):
extra_cmd_args.extend([f"--{param.replace('_', '-')}", str(config[param])])
if extra_cmd_args:
if service_name == "llm":
engine = config.get("engine", "tgi")
if engine not in values:
values[engine] = {}
values[engine]["extraCmdArgs"] = extra_cmd_args
else:
if service_name not in values:
values[service_name] = {}
values[service_name]["extraCmdArgs"] = extra_cmd_args
return values
def configure_models(values, deploy_config):
"""Configure model settings for services."""
for service_name, config in deploy_config["services"].items():
# Skip if no model_id defined or service is disabled
if not config.get("model_id") or config.get("enabled") is False:
continue
if service_name == "llm":
# For LLM service, use its engine as the key
engine = config.get("engine", "tgi")
values[engine]["LLM_MODEL_ID"] = config.get("model_id")
elif service_name == "tei":
values[service_name]["EMBEDDING_MODEL_ID"] = config.get("model_id")
elif service_name == "teirerank":
values[service_name]["RERANK_MODEL_ID"] = config.get("model_id")
return values
def configure_rerank(values, with_rerank, deploy_config, example_type, node_selector):
"""Configure rerank service."""
if with_rerank:
if "teirerank" not in values:
values["teirerank"] = {"nodeSelector": {key: value for key, value in node_selector.items()}}
elif "nodeSelector" not in values["teirerank"]:
values["teirerank"]["nodeSelector"] = {key: value for key, value in node_selector.items()}
else:
if example_type == "chatqna":
values["image"] = {"repository": "opea/chatqna-without-rerank"}
if "teirerank" not in values:
values["teirerank"] = {"enabled": False}
elif "enabled" not in values["teirerank"]:
values["teirerank"]["enabled"] = False
return values
def generate_helm_values(example_type, deploy_config, chart_dir, action_type, node_selector=None):
"""Create a values.yaml file based on the provided configuration."""
if deploy_config is None:
raise ValueError("deploy_config is required")
# Ensure the chart_dir exists
if not os.path.exists(chart_dir):
return {"status": "false", "message": f"Chart directory {chart_dir} does not exist"}
num_nodes = deploy_config.get("node", 1)
with_rerank = deploy_config["services"].get("teirerank", {}).get("enabled", False)
print(f"Generating values for {example_type} example")
print(f"with_rerank: {with_rerank}")
print(f"num_nodes: {num_nodes}")
print(f"node_selector: {node_selector}")
# Initialize base values
values = {
"global": {
"HUGGINGFACEHUB_API_TOKEN": deploy_config.get("HUGGINGFACEHUB_API_TOKEN", ""),
"modelUseHostPath": deploy_config.get("modelUseHostPath", ""),
}
}
# Configure components
values = configure_node_selectors(values, node_selector or {}, deploy_config)
values = configure_rerank(values, with_rerank, deploy_config, example_type, node_selector or {})
values = configure_replica(values, deploy_config)
values = configure_resources(values, deploy_config)
values = configure_extra_cmd_args(values, deploy_config)
values = configure_models(values, deploy_config)
device = deploy_config.get("device", "unknown")
# Generate and write YAML file
filename = get_output_filename(num_nodes, with_rerank, example_type, device, action_type)
yaml_string = yaml.dump(values, default_flow_style=False)
filepath = os.path.join(chart_dir, filename)
# Write the YAML data to the file
with open(filepath, "w") as file:
file.write(yaml_string)
print(f"YAML file {filepath} has been generated.")
return {"status": "success", "filepath": filepath}
################################################################################
# #
# DEPLOYMENT SECTION #
# #
################################################################################
def run_kubectl_command(command):
"""Run a kubectl command and return the output."""
try:
result = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
return result.stdout
except subprocess.CalledProcessError as e:
print(f"Error running command: {command}\n{e.stderr}")
exit(1)
def get_all_nodes():
"""Get the list of all nodes in the Kubernetes cluster."""
command = ["kubectl", "get", "nodes", "-o", "json"]
output = run_kubectl_command(command)
nodes = json.loads(output)
return [node["metadata"]["name"] for node in nodes["items"]]
def add_label_to_node(node_name, label):
"""Add a label to the specified node."""
command = ["kubectl", "label", "node", node_name, label, "--overwrite"]
print(f"Labeling node {node_name} with {label}...")
run_kubectl_command(command)
print(f"Label {label} added to node {node_name} successfully.")
def add_labels_to_nodes(node_count=None, label=None, node_names=None):
"""Add a label to the specified number of nodes or to specified nodes."""
if node_names:
# Add label to the specified nodes
for node_name in node_names:
add_label_to_node(node_name, label)
else:
# Fetch the node list and label the specified number of nodes
all_nodes = get_all_nodes()
if node_count is None or node_count > len(all_nodes):
print(f"Error: Node count exceeds the number of available nodes ({len(all_nodes)} available).")
sys.exit(1)
selected_nodes = all_nodes[:node_count]
for node_name in selected_nodes:
add_label_to_node(node_name, label)
def clear_labels_from_nodes(label, node_names=None):
"""Clear the specified label from specific nodes if provided, otherwise from all nodes."""
label_key = label.split("=")[0] # Extract key from 'key=value' format
# If specific nodes are provided, use them; otherwise, get all nodes
nodes_to_clear = node_names if node_names else get_all_nodes()
for node_name in nodes_to_clear:
# Check if the node has the label by inspecting its metadata
command = ["kubectl", "get", "node", node_name, "-o", "json"]
node_info = run_kubectl_command(command)
node_metadata = json.loads(node_info)
# Check if the label exists on this node
labels = node_metadata["metadata"].get("labels", {})
if label_key in labels:
# Remove the label from the node
command = ["kubectl", "label", "node", node_name, f"{label_key}-"]
print(f"Removing label {label_key} from node {node_name}...")
run_kubectl_command(command)
print(f"Label {label_key} removed from node {node_name} successfully.")
else:
print(f"Label {label_key} not found on node {node_name}, skipping.")
def get_hw_values_file(deploy_config, chart_dir):
"""Get the hardware-specific values file based on the deploy configuration."""
device_type = deploy_config.get("device", "cpu")
print(f"Device type is {device_type}. Using existing Helm chart values files...")
if device_type == "cpu":
print(f"Device type is {device_type}. Using existing Helm chart values files.")
return None
llm_engine = deploy_config.get("services", {}).get("llm", {}).get("engine", "tgi")
version = deploy_config.get("version", "1.1.0")
if os.path.isdir(chart_dir):
# Determine which values file to use based on version
if version in ["1.0.0", "1.1.0"]:
hw_values_file = os.path.join(chart_dir, f"{device_type}-values.yaml")
else:
hw_values_file = os.path.join(chart_dir, f"{device_type}-{llm_engine}-values.yaml")
if not os.path.exists(hw_values_file):
print(f"Warning: {hw_values_file} not found")
hw_values_file = None
else:
print(f"Device-specific values file found: {hw_values_file}")
else:
print(f"Error: Could not find directory for {chart_dir}")
hw_values_file = None
return hw_values_file
def install_helm_release(release_name, chart_name, namespace, hw_values_file, deploy_values_file):
"""Deploy a Helm release with a specified name and chart.
Parameters:
- release_name: The name of the Helm release.
- chart_name: The Helm chart name or path.
- namespace: The Kubernetes namespace for deployment.
- hw_values_file: The values file for hw specific
- deploy_values_file: The values file for deployment.
"""
# Check if the namespace exists; if not, create it
try:
command = ["kubectl", "get", "namespace", namespace]
subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
except subprocess.CalledProcessError:
print(f"Namespace '{namespace}' does not exist. Creating it...")
command = ["kubectl", "create", "namespace", namespace]
subprocess.run(command, check=True)
print(f"Namespace '{namespace}' created successfully.")
try:
# Prepare the Helm install command
command = ["helm", "install", release_name, chart_name, "--namespace", namespace]
# Append values files in order
if hw_values_file:
command.extend(["-f", hw_values_file])
if deploy_values_file:
command.extend(["-f", deploy_values_file])
# Execute the Helm install command
print(f"Running command: {' '.join(command)}")
subprocess.run(command, check=True)
print("Deployment initiated successfully.")
except subprocess.CalledProcessError as e:
print(f"Error occurred while deploying Helm release: {e}")
def uninstall_helm_release(release_name, namespace=None):
"""Uninstall a Helm release and clean up resources, optionally delete the namespace if not 'default'."""
# Default to 'default' namespace if none is specified
if not namespace:
namespace = "default"
try:
# Uninstall the Helm release
command = ["helm", "uninstall", release_name, "--namespace", namespace]
print(f"Uninstalling Helm release {release_name} in namespace {namespace}...")
run_kubectl_command(command)
print(f"Helm release {release_name} uninstalled successfully.")
# If the namespace is specified and not 'default', delete it
if namespace != "default":
print(f"Deleting namespace {namespace}...")
delete_namespace_command = ["kubectl", "delete", "namespace", namespace]
run_kubectl_command(delete_namespace_command)
print(f"Namespace {namespace} deleted successfully.")
else:
print("Namespace is 'default', skipping deletion.")
except subprocess.CalledProcessError as e:
print(f"Error occurred while uninstalling Helm release or deleting namespace: {e}")
def update_service(release_name, chart_name, namespace, hw_values_file, deploy_values_file, update_values_file):
"""Update the deployment using helm upgrade with new values.
Args:
release_name: The helm release name
namespace: The kubernetes namespace
deploy_config: The deployment configuration
chart_name: The chart name for the deployment
"""
# Construct helm upgrade command
command = [
"helm",
"upgrade",
release_name,
chart_name,
"--namespace",
namespace,
"-f",
hw_values_file,
"-f",
deploy_values_file,
"-f",
update_values_file,
]
# Execute helm upgrade
print(f"Running command: {' '.join(command)}")
run_kubectl_command(command)
print("Deployment updated successfully")
def read_deploy_config(config_path):
"""Read and parse the deploy config file.
Args:
config_path: Path to the deploy config file
Returns:
The parsed deploy config dictionary or None if failed
"""
try:
with open(config_path, "r") as f:
return yaml.safe_load(f)
except Exception as e:
print(f"Failed to load deploy config: {str(e)}")
return None
def check_deployment_ready(release_name, namespace, timeout=300, interval=5, logfile="deployment.log"):
"""Wait until all pods in the deployment are running and ready.
Args:
namespace: The Kubernetes namespace
timeout: The maximum time to wait in seconds (default 120 seconds)
interval: The interval between checks in seconds (default 5 seconds)
logfile: The file to log output to (default 'deployment.log')
Returns:
0 if success, 1 if failure (timeout reached)
"""
try:
# Get the list of deployments in the namespace
cmd = ["kubectl", "-n", namespace, "get", "deployments", "-o", "jsonpath='{.items[*].metadata.name}'"]
deployments_output = subprocess.check_output(cmd, text=True)
deployments = deployments_output.strip().split()
# Strip the first and last elements of single quotes if present
deployments[0] = deployments[0].strip("'")
deployments[-1] = deployments[-1].strip("'")
with open(logfile, "a") as log:
log.write(f"Found deployments: {', '.join(deployments)}\n")
timer = 0
# Loop through each deployment to check its readiness
for deployment_name in deployments:
if "-" not in deployment_name or "ui" in deployment_name or "nginx" in deployment_name:
continue
instance_name = deployment_name.split("-", 1)[0]
app_name = deployment_name.split("-", 1)[1]
if instance_name != release_name:
continue
cmd = ["kubectl", "-n", namespace, "get", "deployment", deployment_name, "-o", "jsonpath={.spec.replicas}"]
desired_replicas = int(subprocess.check_output(cmd, text=True).strip())
with open(logfile, "a") as log:
log.write(f"Checking deployment '{deployment_name}' with desired replicas: {desired_replicas}\n")
while True:
cmd = [
"kubectl",
"-n",
namespace,
"get",
"pods",
"-l",
f"app.kubernetes.io/instance={instance_name}",
"-l",
f"app.kubernetes.io/name={app_name}",
"--field-selector=status.phase=Running",
"-o",
"json",
]
pods_output = subprocess.check_output(cmd, text=True)
pods = json.loads(pods_output)
ready_pods = sum(
1
for pod in pods["items"]
if all(container.get("ready") for container in pod.get("status", {}).get("containerStatuses", []))
)
terminating_pods = sum(
1 for pod in pods["items"] if pod.get("metadata", {}).get("deletionTimestamp") is not None
)
with open(logfile, "a") as log:
log.write(
f"Ready pods: {ready_pods}, Desired replicas: {desired_replicas}, Terminating pods: {terminating_pods}\n"
)
if ready_pods == desired_replicas and terminating_pods == 0:
with open(logfile, "a") as log:
log.write(f"All pods for deployment '{deployment_name}' are running and ready.\n")
break
if timer >= timeout:
with open(logfile, "a") as log:
log.write(
f"Timeout reached for deployment '{deployment_name}'. Not all pods are running and ready.\n"
)
return 1 # Failure
time.sleep(interval)
timer += interval
return 0 # Success for all deployments
except subprocess.CalledProcessError as e:
with open(logfile, "a") as log:
log.write(f"Error executing kubectl command: {e}\n")
return 1 # Failure
except json.JSONDecodeError as e:
with open(logfile, "a") as log:
log.write(f"Error parsing kubectl output: {e}\n")
return 1 # Failure
except Exception as e:
with open(logfile, "a") as log:
log.write(f"Unexpected error: {e}\n")
return 1 # Failure
def main():
parser = argparse.ArgumentParser(description="Manage Helm Deployment.")
parser.add_argument(
"--chart-name",
type=str,
default="chatqna",
help="The chart name to deploy (default: chatqna).",
)
parser.add_argument("--namespace", default="default", help="Kubernetes namespace (default: default).")
parser.add_argument("--user-values", help="Path to a user-specified values.yaml file.")
parser.add_argument("--deploy-config", help="Path to a deploy config yaml file.")
parser.add_argument(
"--create-values-only", action="store_true", help="Only create the values.yaml file without deploying."
)
parser.add_argument("--uninstall", action="store_true", help="Uninstall the Helm release.")
parser.add_argument("--num-nodes", type=int, default=1, help="Number of nodes to use (default: 1).")
parser.add_argument("--node-names", nargs="*", help="Optional specific node names to label.")
parser.add_argument("--add-label", action="store_true", help="Add label to specified nodes if this flag is set.")
parser.add_argument(
"--delete-label", action="store_true", help="Delete label from specified nodes if this flag is set."
)
parser.add_argument(
"--label", default="node-type=opea-benchmark", help="Label to add/delete (default: node-type=opea-benchmark)."
)
parser.add_argument("--update-service", action="store_true", help="Update the deployment with new configuration.")
parser.add_argument("--check-ready", action="store_true", help="Check if all services in the deployment are ready.")
parser.add_argument("--chart-dir", default=".", help="Path to the untarred Helm chart directory.")
args = parser.parse_args()
# Node labeling management
if args.add_label:
add_labels_to_nodes(args.num_nodes, args.label, args.node_names)
return
elif args.delete_label:
clear_labels_from_nodes(args.label, args.node_names)
return
elif args.check_ready:
is_ready = check_deployment_ready(args.chart_name, args.namespace)
return is_ready
elif args.uninstall:
uninstall_helm_release(args.chart_name, args.namespace)
return
# Load deploy_config if provided
deploy_config = None
if args.deploy_config:
deploy_config = read_deploy_config(args.deploy_config)
if deploy_config is None:
parser.error("Failed to load deploy config")
return
hw_values_file = get_hw_values_file(deploy_config, args.chart_dir)
action_type = 0
if args.update_service:
action_type = 1
# The user file is provided for deploy when --update-service is not specified
if args.user_values and not args.update_service:
values_file_path = args.user_values
else:
if not args.deploy_config:
parser.error("--deploy-config is required")
node_selector = {args.label.split("=")[0]: args.label.split("=")[1]}
print("go to generate deploy values" if action_type == 0 else "go to generate update values")
# Generate values file for deploy or update service
result = generate_helm_values(
example_type=args.chart_name,
deploy_config=deploy_config,
chart_dir=args.chart_dir,
action_type=action_type, # 0 - deploy, 1 - update
node_selector=node_selector,
)
# Check result status
if result["status"] == "success":
values_file_path = result["filepath"]
else:
parser.error(f"Failed to generate values.yaml: {result['message']}")
return
print("start to read the generated values file")
# Read back the generated YAML file for verification
with open(values_file_path, "r") as file:
print("Generated YAML contents:")
print(file.read())
# Handle service update if specified
if args.update_service:
if not args.user_values:
parser.error("--user-values is required for update reference")
try:
update_service(
args.chart_name, args.chart_name, args.namespace, hw_values_file, args.user_values, values_file_path
)
return
except Exception as e:
parser.error(f"Failed to update deployment: {str(e)}")
return
# Deploy unless --create-values-only is specified
if not args.create_values_only:
install_helm_release(args.chart_name, args.chart_name, args.namespace, hw_values_file, values_file_path)
print(f"values_file_path: {values_file_path}")
if __name__ == "__main__":
main()

292
deploy_and_benchmark.py Normal file
View File

@@ -0,0 +1,292 @@
# Copyright (C) 2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import argparse
import copy
import os
import re
import shutil
import subprocess
import sys
import yaml
from benchmark import run_benchmark
def read_yaml(file_path):
try:
with open(file_path, "r") as file:
return yaml.safe_load(file)
except Exception as e:
print(f"Error reading YAML file: {e}")
return None
def construct_deploy_config(deploy_config, target_node, max_batch_size=None):
"""Construct a new deploy config based on the target node number and optional max_batch_size.
Args:
deploy_config: Original deploy config dictionary
target_node: Target node number to match in the node array
max_batch_size: Optional specific max_batch_size value to use
Returns:
A new deploy config with single values for node and instance_num
"""
# Deep copy the original config to avoid modifying it
new_config = copy.deepcopy(deploy_config)
# Get the node array and validate
nodes = deploy_config.get("node")
if not isinstance(nodes, list):
raise ValueError("deploy_config['node'] must be an array")
# Find the index of the target node
try:
node_index = nodes.index(target_node)
except ValueError:
raise ValueError(f"Target node {target_node} not found in node array {nodes}")
# Set the single node value
new_config["node"] = target_node
# Update instance_num for each service based on the same index
for service_name, service_config in new_config.get("services", {}).items():
if "replicaCount" in service_config:
instance_nums = service_config["replicaCount"]
if isinstance(instance_nums, list):
if len(instance_nums) != len(nodes):
raise ValueError(
f"instance_num array length ({len(instance_nums)}) for service {service_name} "
f"doesn't match node array length ({len(nodes)})"
)
service_config["replicaCount"] = instance_nums[node_index]
# Update max_batch_size if specified
if max_batch_size is not None and "llm" in new_config["services"]:
new_config["services"]["llm"]["max_batch_size"] = max_batch_size
return new_config
def pull_helm_chart(chart_pull_url, version, chart_name):
# Pull and untar the chart
subprocess.run(["helm", "pull", chart_pull_url, "--version", version, "--untar"], check=True)
current_dir = os.getcwd()
untar_dir = os.path.join(current_dir, chart_name)
if not os.path.isdir(untar_dir):
print(f"Error: Could not find untarred directory for {chart_name}")
return None
return untar_dir
def main(yaml_file, target_node=None):
"""Main function to process deployment configuration.
Args:
yaml_file: Path to the YAML configuration file
target_node: Optional target number of nodes to deploy. If not specified, will process all nodes.
"""
config = read_yaml(yaml_file)
if config is None:
print("Failed to read YAML file.")
return None
deploy_config = config["deploy"]
benchmark_config = config["benchmark"]
# Extract chart name from the YAML file name
chart_name = os.path.splitext(os.path.basename(yaml_file))[0].split("_")[-1]
print(f"chart_name: {chart_name}")
python_cmd = sys.executable
# Process nodes
nodes = deploy_config.get("node", [])
if not isinstance(nodes, list):
print("Error: deploy_config['node'] must be an array")
return None
nodes_to_process = [target_node] if target_node is not None else nodes
node_names = deploy_config.get("node_name", [])
namespace = deploy_config.get("namespace", "default")
# Pull the Helm chart
chart_pull_url = f"oci://ghcr.io/opea-project/charts/{chart_name}"
version = deploy_config.get("version", "1.1.0")
chart_dir = pull_helm_chart(chart_pull_url, version, chart_name)
if not chart_dir:
return
for node in nodes_to_process:
try:
print(f"\nProcessing configuration for {node} nodes...")
# Get corresponding node names for this node count
current_node_names = node_names[:node] if node_names else []
# Add labels for current node configuration
print(f"Adding labels for {node} nodes...")
cmd = [python_cmd, "deploy.py", "--chart-name", chart_name, "--num-nodes", str(node), "--add-label"]
if current_node_names:
cmd.extend(["--node-names"] + current_node_names)
result = subprocess.run(cmd, check=True)
if result.returncode != 0:
print(f"Failed to add labels for {node} nodes")
continue
try:
# Process max_batch_sizes
max_batch_sizes = deploy_config.get("services", {}).get("llm", {}).get("max_batch_size", [])
if not isinstance(max_batch_sizes, list):
max_batch_sizes = [max_batch_sizes]
values_file_path = None
for i, max_batch_size in enumerate(max_batch_sizes):
print(f"\nProcessing max_batch_size: {max_batch_size}")
# Construct new deploy config
new_deploy_config = construct_deploy_config(deploy_config, node, max_batch_size)
# Write the new deploy config to a temporary file
temp_config_file = f"temp_deploy_config_{node}_{max_batch_size}.yaml"
try:
with open(temp_config_file, "w") as f:
yaml.dump(new_deploy_config, f)
if i == 0:
# First iteration: full deployment
cmd = [
python_cmd,
"deploy.py",
"--deploy-config",
temp_config_file,
"--chart-name",
chart_name,
"--namespace",
namespace,
"--chart-dir",
chart_dir,
]
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
match = re.search(r"values_file_path: (\S+)", result.stdout)
if match:
values_file_path = match.group(1)
print(f"Captured values_file_path: {values_file_path}")
else:
print("values_file_path not found in the output")
else:
# Subsequent iterations: update services with config change
cmd = [
python_cmd,
"deploy.py",
"--deploy-config",
temp_config_file,
"--chart-name",
chart_name,
"--namespace",
namespace,
"--chart-dir",
chart_dir,
"--user-values",
values_file_path,
"--update-service",
]
result = subprocess.run(cmd, check=True)
if result.returncode != 0:
print(
f"Update failed for {node} nodes configuration with max_batch_size {max_batch_size}"
)
break # Skip remaining max_batch_sizes for this node
# Wait for deployment to be ready
print("\nWaiting for deployment to be ready...")
cmd = [
python_cmd,
"deploy.py",
"--chart-name",
chart_name,
"--namespace",
namespace,
"--check-ready",
]
try:
result = subprocess.run(cmd, check=True)
print("Deployments are ready!")
except subprocess.CalledProcessError as e:
print(f"Deployments status failed with returncode: {e.returncode}")
# Run benchmark
run_benchmark(
benchmark_config=benchmark_config,
chart_name=chart_name,
namespace=namespace,
llm_model=deploy_config.get("services", {}).get("llm", {}).get("model_id", ""),
)
except Exception as e:
print(
f"Error during {'deployment' if i == 0 else 'update'} for {node} nodes with max_batch_size {max_batch_size}: {str(e)}"
)
break # Skip remaining max_batch_sizes for this node
finally:
# Clean up the temporary file
if os.path.exists(temp_config_file):
os.remove(temp_config_file)
finally:
# Uninstall the deployment
print(f"\nUninstalling deployment for {node} nodes...")
cmd = [
python_cmd,
"deploy.py",
"--chart-name",
chart_name,
"--namespace",
namespace,
"--uninstall",
]
try:
result = subprocess.run(cmd, check=True)
if result.returncode != 0:
print(f"Failed to uninstall deployment for {node} nodes")
except Exception as e:
print(f"Error while uninstalling deployment for {node} nodes: {str(e)}")
# Delete labels for current node configuration
print(f"Deleting labels for {node} nodes...")
cmd = [python_cmd, "deploy.py", "--chart-name", chart_name, "--num-nodes", str(node), "--delete-label"]
if current_node_names:
cmd.extend(["--node-names"] + current_node_names)
try:
result = subprocess.run(cmd, check=True)
if result.returncode != 0:
print(f"Failed to delete labels for {node} nodes")
except Exception as e:
print(f"Error while deleting labels for {node} nodes: {str(e)}")
except Exception as e:
print(f"Error processing configuration for {node} nodes: {str(e)}")
continue
# Cleanup: Remove the untarred directory
if chart_dir and os.path.isdir(chart_dir):
print(f"Removing temporary directory: {chart_dir}")
shutil.rmtree(chart_dir)
print("Temporary directory removed successfully.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Deploy and benchmark with specific node configuration.")
parser.add_argument("yaml_file", help="Path to the YAML configuration file")
parser.add_argument("--target-node", type=int, help="Optional: Target number of nodes to deploy.", default=None)
args = parser.parse_args()
main(args.yaml_file, args.target_node)

9
requirements.txt Normal file
View File

@@ -0,0 +1,9 @@
kubernetes
locust
numpy
opea-eval>=1.2
pytest
pyyaml
requests
sseclient-py
transformers

3
version.txt Normal file
View File

@@ -0,0 +1,3 @@
VERSION_MAJOR 1
VERSION_MINOR 2
VERSION_PATCH 0