K8S manifest: Update ChatQnA/CodeGen/CodeTrans/DocSum

- Update ChatQnA/CodeGen/CodeTrans/DocSum k8s manifest
  to avoid requiring creating directory for cache model.

- Add chatqna-guardrails manifest files.

- Fix bug #752 introduced by PR #669

Signed-off-by: Lianhao Lu <lianhao.lu@intel.com>
This commit is contained in:
Lianhao Lu
2024-09-05 18:05:11 +08:00
committed by dolpher
parent 4bd7841f17
commit 0629696333
14 changed files with 3574 additions and 297 deletions

View File

@@ -80,6 +80,7 @@ jobs:
echo "skip_validate=false" >> $GITHUB_ENV
else
echo "Timeout waiting for pods in namespace $NAMESPACE to be ready!"
.github/workflows/scripts/k8s-utils.sh dump_pods_status $NAMESPACE
exit 1
fi
sleep 60
@@ -91,7 +92,12 @@ jobs:
if $skip_validate; then
echo "Skip validate"
else
${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh validate_${{ inputs.example }} $NAMESPACE
if ${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh validate_${{ inputs.example }} $NAMESPACE ; then
echo "Validate ${{ inputs.example }} successful!"
else
echo "Validate ${{ inputs.example }} failure!!!"
.github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE
fi
fi
- name: Kubectl uninstall

71
.github/workflows/scripts/k8s-utils.sh vendored Executable file
View File

@@ -0,0 +1,71 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#set -xe
function dump_pod_log() {
pod_name=$1
namespace=$2
echo "-----------Pod: $pod_name---------"
echo "#kubectl describe pod $pod_name -n $namespace"
kubectl describe pod $pod_name -n $namespace
echo "-----------------------------------"
echo "#kubectl logs $pod_name -n $namespace"
kubectl logs $pod_name -n $namespace
echo "-----------------------------------"
}
function dump_pods_status() {
namespace=$1
echo "-----DUMP POD STATUS in NS $namespace------"
kubectl get pods -n $namespace -o wide
echo "-----------------------------------"
# Get all pods in the namespace and their statuses
pods=$(kubectl get pods -n $namespace --no-headers)
# Loop through each pod
echo "$pods" | while read -r line; do
pod_name=$(echo $line | awk '{print $1}')
ready=$(echo $line | awk '{print $2}')
status=$(echo $line | awk '{print $3}')
# Extract the READY count
ready_count=$(echo $ready | cut -d'/' -f1)
required_count=$(echo $ready | cut -d'/' -f2)
# Check if the pod is not in "Running" status or READY count is less than required
if [[ "$status" != "Running" || "$ready_count" -lt "$required_count" ]]; then
dump_pod_log $pod_name $namespace
fi
done
}
function dump_all_pod_logs() {
namespace=$1
echo "-----DUMP POD STATUS AND LOG in NS $namespace------"
pods=$(kubectl get pods -n $namespace -o jsonpath='{.items[*].metadata.name}')
for pod_name in $pods
do
dump_pod_log $pod_name $namespace
done
}
if [ $# -eq 0 ]; then
echo "Usage: $0 <function_name>"
exit 1
fi
case "$1" in
dump_pods_status)
dump_pods_status $2
;;
dump_all_pod_logs)
dump_all_pod_logs $2
;;
*)
echo "Unknown function: $1"
;;
esac

File diff suppressed because it is too large Load Diff

View File

@@ -8,24 +8,24 @@ kind: ConfigMap
metadata:
name: chatqna-data-prep-config
labels:
helm.sh/chart: data-prep-0.8.0
helm.sh/chart: data-prep-1.0.0
app.kubernetes.io/name: data-prep
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_ENDPOINT: "http://chatqna-tei"
EMBED_MODEL: ""
REDIS_URL: "redis://chatqna-redis-vector-db:6379"
INDEX_NAME: "rag-redis"
KEY_INDEX_NAME: "file-keys"
SEARCH_BATCH_SIZE: "10"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
LANGCHAIN_PROJECT: "opea-dataprep-service"
LOGFLAG: ""
---
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -36,19 +36,17 @@ kind: ConfigMap
metadata:
name: chatqna-embedding-usvc-config
labels:
helm.sh/chart: embedding-usvc-0.8.0
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: insert-your-langchain-key-here
LANGCHAIN_PROJECT: "opea-embedding-service"
LOGFLAG: ""
---
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -59,10 +57,10 @@ kind: ConfigMap
metadata:
name: chatqna-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
@@ -71,9 +69,7 @@ data:
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: insert-your-langchain-key-here
LANGCHAIN_PROJECT: "opea-llm-uservice"
LOGFLAG: ""
---
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -84,19 +80,17 @@ kind: ConfigMap
metadata:
name: chatqna-reranking-usvc-config
labels:
helm.sh/chart: reranking-usvc-0.8.0
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
LANGCHAIN_PROJECT: "opea-reranking-service"
LOGFLAG: ""
---
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -107,10 +101,10 @@ kind: ConfigMap
metadata:
name: chatqna-retriever-usvc-config
labels:
helm.sh/chart: retriever-usvc-0.8.0
helm.sh/chart: retriever-usvc-1.0.0
app.kubernetes.io/name: retriever-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
@@ -121,11 +115,9 @@ data:
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
LANGCHAIN_PROJECT: "opea-retriever-service"
HF_HOME: "/tmp/.cache/huggingface"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
LOGFLAG: ""
---
# Source: chatqna/charts/tei/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -136,7 +128,7 @@ kind: ConfigMap
metadata:
name: chatqna-tei-config
labels:
helm.sh/chart: tei-0.8.0
helm.sh/chart: tei-1.0.0
app.kubernetes.io/name: tei
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "cpu-1.5"
@@ -161,7 +153,7 @@ kind: ConfigMap
metadata:
name: chatqna-teirerank-config
labels:
helm.sh/chart: teirerank-0.8.0
helm.sh/chart: teirerank-1.0.0
app.kubernetes.io/name: teirerank
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "cpu-1.5"
@@ -185,7 +177,7 @@ kind: ConfigMap
metadata:
name: chatqna-tgi-config
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "2.1.0"
@@ -199,7 +191,6 @@ data:
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
@@ -213,10 +204,10 @@ kind: Service
metadata:
name: chatqna-data-prep
labels:
helm.sh/chart: data-prep-0.8.0
helm.sh/chart: data-prep-1.0.0
app.kubernetes.io/name: data-prep
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -238,10 +229,10 @@ kind: Service
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-0.8.0
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -263,10 +254,10 @@ kind: Service
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -288,7 +279,7 @@ kind: Service
metadata:
name: chatqna-redis-vector-db
labels:
helm.sh/chart: redis-vector-db-0.8.0
helm.sh/chart: redis-vector-db-1.0.0
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "7.2.0-v9"
@@ -317,10 +308,10 @@ kind: Service
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-0.8.0
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -342,10 +333,10 @@ kind: Service
metadata:
name: chatqna-retriever-usvc
labels:
helm.sh/chart: retriever-usvc-0.8.0
helm.sh/chart: retriever-usvc-1.0.0
app.kubernetes.io/name: retriever-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -367,7 +358,7 @@ kind: Service
metadata:
name: chatqna-tei
labels:
helm.sh/chart: tei-0.8.0
helm.sh/chart: tei-1.0.0
app.kubernetes.io/name: tei
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "cpu-1.5"
@@ -392,7 +383,7 @@ kind: Service
metadata:
name: chatqna-teirerank
labels:
helm.sh/chart: teirerank-0.8.0
helm.sh/chart: teirerank-1.0.0
app.kubernetes.io/name: teirerank
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "cpu-1.5"
@@ -417,7 +408,7 @@ kind: Service
metadata:
name: chatqna-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "2.1.0"
@@ -442,10 +433,10 @@ kind: Service
metadata:
name: chatqna
labels:
helm.sh/chart: chatqna-0.8.0
helm.sh/chart: chatqna-1.0.0
app.kubernetes.io/name: chatqna
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -467,10 +458,10 @@ kind: Deployment
metadata:
name: chatqna-data-prep
labels:
helm.sh/chart: data-prep-0.8.0
helm.sh/chart: data-prep-1.0.0
app.kubernetes.io/name: data-prep
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -545,10 +536,10 @@ kind: Deployment
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-0.8.0
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -623,10 +614,10 @@ kind: Deployment
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -701,7 +692,7 @@ kind: Deployment
metadata:
name: chatqna-redis-vector-db
labels:
helm.sh/chart: redis-vector-db-0.8.0
helm.sh/chart: redis-vector-db-1.0.0
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "7.2.0-v9"
@@ -773,10 +764,10 @@ kind: Deployment
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-0.8.0
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -851,10 +842,10 @@ kind: Deployment
metadata:
name: chatqna-retriever-usvc
labels:
helm.sh/chart: retriever-usvc-0.8.0
helm.sh/chart: retriever-usvc-1.0.0
app.kubernetes.io/name: retriever-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -929,12 +920,13 @@ kind: Deployment
metadata:
name: chatqna-tei
labels:
helm.sh/chart: tei-0.8.0
helm.sh/chart: tei-1.0.0
app.kubernetes.io/name: tei
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "cpu-1.5"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
@@ -954,7 +946,15 @@ spec:
- configMapRef:
name: chatqna-tei-config
securityContext:
{}
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/tei-gaudi:synapse_1.16"
imagePullPolicy: IfNotPresent
args:
@@ -995,9 +995,7 @@ spec:
habana.ai/gaudi: 1
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
emptyDir: {}
- name: shm
emptyDir:
medium: Memory
@@ -1014,12 +1012,13 @@ kind: Deployment
metadata:
name: chatqna-teirerank
labels:
helm.sh/chart: teirerank-0.8.0
helm.sh/chart: teirerank-1.0.0
app.kubernetes.io/name: teirerank
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "cpu-1.5"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
@@ -1039,7 +1038,15 @@ spec:
- configMapRef:
name: chatqna-teirerank-config
securityContext:
{}
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
imagePullPolicy: IfNotPresent
args:
@@ -1079,9 +1086,7 @@ spec:
{}
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
emptyDir: {}
- name: shm
emptyDir:
medium: Memory
@@ -1098,12 +1103,13 @@ kind: Deployment
metadata:
name: chatqna-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
@@ -1123,7 +1129,15 @@ spec:
- configMapRef:
name: chatqna-tgi-config
securityContext:
{}
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
imagePullPolicy: IfNotPresent
volumeMounts:
@@ -1157,9 +1171,7 @@ spec:
habana.ai/gaudi: 1
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
emptyDir: {}
- name: tmp
emptyDir: {}
---
@@ -1172,10 +1184,10 @@ kind: Deployment
metadata:
name: chatqna
labels:
helm.sh/chart: chatqna-0.8.0
helm.sh/chart: chatqna-1.0.0
app.kubernetes.io/name: chatqna
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -1202,6 +1214,8 @@ spec:
value: chatqna-retriever-usvc
- name: EMBEDDING_SERVICE_HOST_IP
value: chatqna-embedding-usvc
- name: GUARDRAIL_SERVICE_HOST_IP
value: chatqna-guardrails-usvc
securityContext:
allowPrivilegeEscalation: false
capabilities:

File diff suppressed because it is too large Load Diff

View File

@@ -25,7 +25,7 @@ data:
}
location / {
proxy_pass http://chatqna-ui.default.svc.cluster.local:5173;
proxy_pass http://chatqna-ui:5173;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -33,7 +33,7 @@ data:
}
location /v1/chatqna {
proxy_pass http://chatqna.default.svc.cluster.local:8888;
proxy_pass http://chatqna:8888;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -41,7 +41,7 @@ data:
}
location /v1/dataprep {
proxy_pass http://chatqna-data-prep.default.svc.cluster.local:6007;
proxy_pass http://chatqna-data-prep:6007;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -49,7 +49,7 @@ data:
}
location /v1/dataprep/get_file {
proxy_pass http://chatqna-data-prep.default.svc.cluster.local:6007;
proxy_pass http://chatqna-data-prep:6007;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -57,7 +57,7 @@ data:
}
location /v1/dataprep/delete_file {
proxy_pass http://chatqna-data-prep.default.svc.cluster.local:6007;
proxy_pass http://chatqna-data-prep:6007;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -74,24 +74,24 @@ kind: ConfigMap
metadata:
name: chatqna-data-prep-config
labels:
helm.sh/chart: data-prep-0.8.0
helm.sh/chart: data-prep-1.0.0
app.kubernetes.io/name: data-prep
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_ENDPOINT: "http://chatqna-tei"
EMBED_MODEL: ""
REDIS_URL: "redis://chatqna-redis-vector-db:6379"
INDEX_NAME: "rag-redis"
KEY_INDEX_NAME: "file-keys"
SEARCH_BATCH_SIZE: "10"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
LANGCHAIN_PROJECT: "opea-dataprep-service"
LOGFLAG: ""
---
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -102,19 +102,17 @@ kind: ConfigMap
metadata:
name: chatqna-embedding-usvc-config
labels:
helm.sh/chart: embedding-usvc-0.8.0
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: insert-your-langchain-key-here
LANGCHAIN_PROJECT: "opea-embedding-service"
LOGFLAG: ""
---
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -125,10 +123,10 @@ kind: ConfigMap
metadata:
name: chatqna-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
@@ -137,9 +135,7 @@ data:
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: insert-your-langchain-key-here
LANGCHAIN_PROJECT: "opea-llm-uservice"
LOGFLAG: ""
---
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -150,19 +146,17 @@ kind: ConfigMap
metadata:
name: chatqna-reranking-usvc-config
labels:
helm.sh/chart: reranking-usvc-0.8.0
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
LANGCHAIN_PROJECT: "opea-reranking-service"
LOGFLAG: ""
---
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -173,10 +167,10 @@ kind: ConfigMap
metadata:
name: chatqna-retriever-usvc-config
labels:
helm.sh/chart: retriever-usvc-0.8.0
helm.sh/chart: retriever-usvc-1.0.0
app.kubernetes.io/name: retriever-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
@@ -187,11 +181,9 @@ data:
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
LANGCHAIN_PROJECT: "opea-retriever-service"
HF_HOME: "/tmp/.cache/huggingface"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
LOGFLAG: ""
---
# Source: chatqna/charts/tei/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -202,7 +194,7 @@ kind: ConfigMap
metadata:
name: chatqna-tei-config
labels:
helm.sh/chart: tei-0.8.0
helm.sh/chart: tei-1.0.0
app.kubernetes.io/name: tei
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "cpu-1.5"
@@ -227,7 +219,7 @@ kind: ConfigMap
metadata:
name: chatqna-teirerank-config
labels:
helm.sh/chart: teirerank-0.8.0
helm.sh/chart: teirerank-1.0.0
app.kubernetes.io/name: teirerank
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "cpu-1.5"
@@ -251,7 +243,7 @@ kind: ConfigMap
metadata:
name: chatqna-tgi-config
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "2.1.0"
@@ -265,7 +257,6 @@ data:
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
CUDA_GRAPHS: "0"
---
@@ -278,10 +269,10 @@ kind: Service
metadata:
name: chatqna-data-prep
labels:
helm.sh/chart: data-prep-0.8.0
helm.sh/chart: data-prep-1.0.0
app.kubernetes.io/name: data-prep
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -303,10 +294,10 @@ kind: Service
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-0.8.0
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -328,10 +319,10 @@ kind: Service
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -353,7 +344,7 @@ kind: Service
metadata:
name: chatqna-redis-vector-db
labels:
helm.sh/chart: redis-vector-db-0.8.0
helm.sh/chart: redis-vector-db-1.0.0
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "7.2.0-v9"
@@ -382,10 +373,10 @@ kind: Service
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-0.8.0
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -407,10 +398,10 @@ kind: Service
metadata:
name: chatqna-retriever-usvc
labels:
helm.sh/chart: retriever-usvc-0.8.0
helm.sh/chart: retriever-usvc-1.0.0
app.kubernetes.io/name: retriever-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -432,7 +423,7 @@ kind: Service
metadata:
name: chatqna-tei
labels:
helm.sh/chart: tei-0.8.0
helm.sh/chart: tei-1.0.0
app.kubernetes.io/name: tei
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "cpu-1.5"
@@ -457,7 +448,7 @@ kind: Service
metadata:
name: chatqna-teirerank
labels:
helm.sh/chart: teirerank-0.8.0
helm.sh/chart: teirerank-1.0.0
app.kubernetes.io/name: teirerank
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "cpu-1.5"
@@ -482,7 +473,7 @@ kind: Service
metadata:
name: chatqna-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "2.1.0"
@@ -507,10 +498,10 @@ kind: Service
metadata:
name: chatqna
labels:
helm.sh/chart: chatqna-0.8.0
helm.sh/chart: chatqna-1.0.0
app.kubernetes.io/name: chatqna
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -557,10 +548,10 @@ kind: Deployment
metadata:
name: chatqna-data-prep
labels:
helm.sh/chart: data-prep-0.8.0
helm.sh/chart: data-prep-1.0.0
app.kubernetes.io/name: data-prep
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -635,10 +626,10 @@ kind: Deployment
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-0.8.0
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -713,10 +704,10 @@ kind: Deployment
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -791,7 +782,7 @@ kind: Deployment
metadata:
name: chatqna-redis-vector-db
labels:
helm.sh/chart: redis-vector-db-0.8.0
helm.sh/chart: redis-vector-db-1.0.0
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "7.2.0-v9"
@@ -863,10 +854,10 @@ kind: Deployment
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-0.8.0
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -941,10 +932,10 @@ kind: Deployment
metadata:
name: chatqna-retriever-usvc
labels:
helm.sh/chart: retriever-usvc-0.8.0
helm.sh/chart: retriever-usvc-1.0.0
app.kubernetes.io/name: retriever-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -1062,12 +1053,13 @@ kind: Deployment
metadata:
name: chatqna-tei
labels:
helm.sh/chart: tei-0.8.0
helm.sh/chart: tei-1.0.0
app.kubernetes.io/name: tei
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "cpu-1.5"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
@@ -1087,7 +1079,15 @@ spec:
- configMapRef:
name: chatqna-tei-config
securityContext:
{}
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
imagePullPolicy: IfNotPresent
args:
@@ -1127,9 +1127,7 @@ spec:
{}
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
emptyDir: {}
- name: shm
emptyDir:
medium: Memory
@@ -1146,12 +1144,13 @@ kind: Deployment
metadata:
name: chatqna-teirerank
labels:
helm.sh/chart: teirerank-0.8.0
helm.sh/chart: teirerank-1.0.0
app.kubernetes.io/name: teirerank
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "cpu-1.5"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
@@ -1171,7 +1170,15 @@ spec:
- configMapRef:
name: chatqna-teirerank-config
securityContext:
{}
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
imagePullPolicy: IfNotPresent
args:
@@ -1211,9 +1218,7 @@ spec:
{}
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
emptyDir: {}
- name: shm
emptyDir:
medium: Memory
@@ -1230,12 +1235,13 @@ kind: Deployment
metadata:
name: chatqna-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
@@ -1255,7 +1261,15 @@ spec:
- configMapRef:
name: chatqna-tgi-config
securityContext:
{}
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
@@ -1288,9 +1302,7 @@ spec:
{}
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
emptyDir: {}
- name: tmp
emptyDir: {}
---
@@ -1303,10 +1315,10 @@ kind: Deployment
metadata:
name: chatqna
labels:
helm.sh/chart: chatqna-0.8.0
helm.sh/chart: chatqna-1.0.0
app.kubernetes.io/name: chatqna
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -1333,6 +1345,8 @@ spec:
value: chatqna-retriever-usvc
- name: EMBEDDING_SERVICE_HOST_IP
value: chatqna-embedding-usvc
- name: GUARDRAIL_SERVICE_HOST_IP
value: chatqna-guardrails-usvc
securityContext:
allowPrivilegeEscalation: false
capabilities:

View File

@@ -9,6 +9,9 @@ MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
IMAGE_REPO=${IMAGE_REPO:-}
IMAGE_TAG=${IMAGE_TAG:-latest}
ROLLOUT_TIMEOUT_SECONDS="1800s"
KUBECTL_TIMEOUT_SECONDS="60s"
function init_chatqna() {
# replace the mount dir "path: /mnt/opea-models" with "path: $CHART_MOUNT"
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
@@ -27,7 +30,7 @@ function init_chatqna() {
function install_chatqna {
echo "namespace is $NAMESPACE"
kubectl apply -f . -n $NAMESPACE
kubectl apply -f chatqna.yaml -n $NAMESPACE
# Sleep enough time for retreiver-usvc to be ready
sleep 60
}
@@ -40,13 +43,15 @@ function get_end_point() {
}
function validate_chatqna() {
local ns=$1
local log=$2
max_retry=20
# make sure microservice retriever-usvc is ready
# try to curl retriever-svc for max_retry times
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
for ((i=1; i<=max_retry; i++))
do
endpoint_url=$(get_end_point "chatqna-retriever-usvc" $NAMESPACE)
endpoint_url=$(get_end_point "chatqna-retriever-usvc" $ns)
curl http://$endpoint_url/v1/retrieval -X POST \
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" \
-H 'Content-Type: application/json' && break
@@ -55,32 +60,32 @@ function validate_chatqna() {
# if i is bigger than max_retry, then exit with error
if [ $i -gt $max_retry ]; then
echo "Microservice retriever failed, exit with error."
exit 1
return 1
fi
# make sure microservice tgi-svc is ready
for ((i=1; i<=max_retry; i++))
do
endpoint_url=$(get_end_point "chatqna-tgi" $NAMESPACE)
endpoint_url=$(get_end_point "chatqna-tgi" $ns)
curl http://$endpoint_url/generate -X POST \
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-H 'Content-Type: application/json' && break
sleep 10
sleep 30
done
# if i is bigger than max_retry, then exit with error
if [ $i -gt $max_retry ]; then
echo "Microservice tgi failed, exit with error."
exit 1
return 1
fi
# check megaservice works
# generate a random logfile name to avoid conflict among multiple runners
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
endpoint_url=$(get_end_point "chatqna" $NAMESPACE)
LOGFILE=$LOG_PATH/curlmega_$log.log
endpoint_url=$(get_end_point "chatqna" $ns)
curl http://$endpoint_url/v1/chatqna -H "Content-Type: application/json" -d '{"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE
exit_code=$?
if [ $exit_code -ne 0 ]; then
echo "Megaservice failed, please check the logs in $LOGFILE!"
exit 1
return ${exit_code}
fi
echo "Checking response results, make sure the output is reasonable. "
@@ -91,10 +96,49 @@ function validate_chatqna() {
fi
if [ $status == false ]; then
echo "Response check failed, please check the logs in artifacts!"
exit 1
return 1
else
echo "Response check succeed!"
fi
return 0
}
function _cleanup_ns() {
local ns=$1
if kubectl get ns $ns; then
if ! kubectl delete ns $ns --timeout=$KUBECTL_TIMEOUT_SECONDS; then
kubectl delete pods --namespace $ns --force --grace-period=0 --all
kubectl delete ns $ns --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
fi
fi
}
function install_and_validate_chatqna_guardrail() {
echo "Testing manifests chatqna_guardrils"
local ns=${NAMESPACE}-gaurdrails
_cleanup_ns $ns
kubectl create namespace $ns
# install guardrail
kubectl apply -f chatqna-guardrails.yaml -n $ns
# Sleep enough time for chatqna_guardrail to be ready
sleep 60
if kubectl rollout status deployment -n "$ns" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
echo "Waiting for cahtqna_guardrail pod ready done!"
else
echo "Timeout waiting for chatqna_guardrail pod ready!"
_cleanup_ns $ns
exit 1
fi
# validate guardrail
validate_chatqna $ns chatqna-guardrails
local ret=$?
if [ $ret -ne 0 ]; then
_cleanup_ns $ns
exit 1
fi
_cleanup_ns $ns
}
if [ $# -eq 0 ]; then
@@ -117,7 +161,15 @@ case "$1" in
validate_ChatQnA)
NAMESPACE=$2
SERVICE_NAME=chatqna
validate_chatqna
validate_chatqna $NAMESPACE chatqna
ret=$?
if [ $ret -ne 0 ]; then
exit $ret
fi
pushd ChatQnA/kubernetes/manifests/gaudi
set +e
install_and_validate_chatqna_guardrail
popd
;;
*)
echo "Unknown function: $1"

View File

@@ -9,6 +9,9 @@ MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
IMAGE_REPO=${IMAGE_REPO:-}
IMAGE_TAG=${IMAGE_TAG:-latest}
ROLLOUT_TIMEOUT_SECONDS="1800s"
KUBECTL_TIMEOUT_SECONDS="60s"
function init_chatqna() {
# replace the mount dir "path: /mnt/opea-models" with "path: $CHART_MOUNT"
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
@@ -27,19 +30,29 @@ function init_chatqna() {
function install_chatqna {
echo "namespace is $NAMESPACE"
kubectl apply -f . -n $NAMESPACE
kubectl apply -f chatqna.yaml -n $NAMESPACE
# Sleep enough time for retreiver-usvc to be ready
sleep 60
}
function get_end_point() {
# $1 is service name, $2 is namespace
ip_address=$(kubectl get svc $1 -n $2 -o jsonpath='{.spec.clusterIP}')
port=$(kubectl get svc $1 -n $2 -o jsonpath='{.spec.ports[0].port}')
echo "$ip_address:$port"
}
function validate_chatqna() {
local ns=$1
local log=$2
max_retry=20
# make sure microservice retriever-usvc is ready
# try to curl retriever-svc for max_retry times
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
for ((i=1; i<=max_retry; i++))
do
curl http://chatqna-retriever-usvc.$NAMESPACE:7000/v1/retrieval -X POST \
endpoint_url=$(get_end_point "chatqna-retriever-usvc" $ns)
curl http://$endpoint_url/v1/retrieval -X POST \
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" \
-H 'Content-Type: application/json' && break
sleep 30
@@ -47,30 +60,32 @@ function validate_chatqna() {
# if i is bigger than max_retry, then exit with error
if [ $i -gt $max_retry ]; then
echo "Microservice retriever failed, exit with error."
exit 1
return 1
fi
# make sure microservice tgi-svc is ready
for ((i=1; i<=max_retry; i++))
do
curl http://chatqna-tgi.$NAMESPACE:80/generate -X POST \
endpoint_url=$(get_end_point "chatqna-tgi" $ns)
curl http://$endpoint_url/generate -X POST \
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-H 'Content-Type: application/json' && break
sleep 10
sleep 30
done
# if i is bigger than max_retry, then exit with error
if [ $i -gt $max_retry ]; then
echo "Microservice tgi failed, exit with error."
exit 1
return 1
fi
# check megaservice works
# generate a random logfile name to avoid conflict among multiple runners
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
curl http://chatqna.$NAMESPACE:8888/v1/chatqna -H "Content-Type: application/json" -d '{"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE
LOGFILE=$LOG_PATH/curlmega_$log.log
endpoint_url=$(get_end_point "chatqna" $ns)
curl http://$endpoint_url/v1/chatqna -H "Content-Type: application/json" -d '{"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE
exit_code=$?
if [ $exit_code -ne 0 ]; then
echo "Megaservice failed, please check the logs in $LOGFILE!"
exit 1
return ${exit_code}
fi
echo "Checking response results, make sure the output is reasonable. "
@@ -81,10 +96,49 @@ function validate_chatqna() {
fi
if [ $status == false ]; then
echo "Response check failed, please check the logs in artifacts!"
exit 1
return 1
else
echo "Response check succeed!"
fi
return 0
}
function _cleanup_ns() {
local ns=$1
if kubectl get ns $ns; then
if ! kubectl delete ns $ns --timeout=$KUBECTL_TIMEOUT_SECONDS; then
kubectl delete pods --namespace $ns --force --grace-period=0 --all
kubectl delete ns $ns --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
fi
fi
}
function install_and_validate_chatqna_guardrail() {
echo "Testing manifests chatqna_guardrils"
local ns=${NAMESPACE}-gaurdrails
_cleanup_ns $ns
kubectl create namespace $ns
# install guardrail
kubectl apply -f chatqna-guardrails.yaml -n $ns
# Sleep enough time for chatqna_guardrail to be ready
sleep 60
if kubectl rollout status deployment -n "$ns" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
echo "Waiting for cahtqna_guardrail pod ready done!"
else
echo "Timeout waiting for chatqna_guardrail pod ready!"
_cleanup_ns $ns
exit 1
fi
# validate guardrail
validate_chatqna $ns chatqna-guardrails
local ret=$?
if [ $ret -ne 0 ]; then
_cleanup_ns $ns
exit 1
fi
_cleanup_ns $ns
}
if [ $# -eq 0 ]; then
@@ -107,7 +161,15 @@ case "$1" in
validate_ChatQnA)
NAMESPACE=$2
SERVICE_NAME=chatqna
validate_chatqna
validate_chatqna $NAMESPACE chatqna
ret=$?
if [ $ret -ne 0 ]; then
exit $ret
fi
pushd ChatQnA/kubernetes/manifests/xeon
set +e
install_and_validate_chatqna_guardrail
popd
;;
*)
echo "Unknown function: $1"

View File

@@ -8,10 +8,10 @@ kind: ConfigMap
metadata:
name: codegen-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://codegen-tgi"
@@ -20,9 +20,7 @@ data:
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: insert-your-langchain-key-here
LANGCHAIN_PROJECT: "opea-llm-uservice"
LOGFLAG: ""
---
# Source: codegen/charts/tgi/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -33,7 +31,7 @@ kind: ConfigMap
metadata:
name: codegen-tgi-config
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "2.1.0"
@@ -47,7 +45,6 @@ data:
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
@@ -61,10 +58,10 @@ kind: Service
metadata:
name: codegen-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -86,7 +83,7 @@ kind: Service
metadata:
name: codegen-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "2.1.0"
@@ -111,10 +108,10 @@ kind: Service
metadata:
name: codegen
labels:
helm.sh/chart: codegen-0.8.0
helm.sh/chart: codegen-1.0.0
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -136,10 +133,10 @@ kind: Deployment
metadata:
name: codegen-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -214,12 +211,13 @@ kind: Deployment
metadata:
name: codegen-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
@@ -239,7 +237,15 @@ spec:
- configMapRef:
name: codegen-tgi-config
securityContext:
{}
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
imagePullPolicy: IfNotPresent
volumeMounts:
@@ -275,9 +281,7 @@ spec:
hugepages-2Mi: 500Mi
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
emptyDir: {}
- name: tmp
emptyDir: {}
---
@@ -290,10 +294,10 @@ kind: Deployment
metadata:
name: codegen
labels:
helm.sh/chart: codegen-0.8.0
helm.sh/chart: codegen-1.0.0
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1

View File

@@ -8,10 +8,10 @@ kind: ConfigMap
metadata:
name: codegen-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://codegen-tgi"
@@ -20,9 +20,7 @@ data:
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: insert-your-langchain-key-here
LANGCHAIN_PROJECT: "opea-llm-uservice"
LOGFLAG: ""
---
# Source: codegen/charts/tgi/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -33,7 +31,7 @@ kind: ConfigMap
metadata:
name: codegen-tgi-config
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "2.1.0"
@@ -47,7 +45,6 @@ data:
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
CUDA_GRAPHS: "0"
---
@@ -60,10 +57,10 @@ kind: Service
metadata:
name: codegen-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -85,7 +82,7 @@ kind: Service
metadata:
name: codegen-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "2.1.0"
@@ -110,10 +107,10 @@ kind: Service
metadata:
name: codegen
labels:
helm.sh/chart: codegen-0.8.0
helm.sh/chart: codegen-1.0.0
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -135,10 +132,10 @@ kind: Deployment
metadata:
name: codegen-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -213,12 +210,13 @@ kind: Deployment
metadata:
name: codegen-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
@@ -238,7 +236,15 @@ spec:
- configMapRef:
name: codegen-tgi-config
securityContext:
{}
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
@@ -271,9 +277,7 @@ spec:
{}
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
emptyDir: {}
- name: tmp
emptyDir: {}
---
@@ -286,10 +290,10 @@ kind: Deployment
metadata:
name: codegen
labels:
helm.sh/chart: codegen-0.8.0
helm.sh/chart: codegen-1.0.0
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1

View File

@@ -8,10 +8,10 @@ kind: ConfigMap
metadata:
name: codetrans-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://codetrans-tgi"
@@ -20,6 +20,7 @@ data:
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: codetrans/charts/tgi/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -30,7 +31,7 @@ kind: ConfigMap
metadata:
name: codetrans-tgi-config
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "2.1.0"
@@ -44,7 +45,6 @@ data:
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
@@ -58,10 +58,10 @@ kind: Service
metadata:
name: codetrans-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -83,7 +83,7 @@ kind: Service
metadata:
name: codetrans-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "2.1.0"
@@ -108,10 +108,10 @@ kind: Service
metadata:
name: codetrans
labels:
helm.sh/chart: codetrans-0.8.0
helm.sh/chart: codetrans-1.0.0
app.kubernetes.io/name: codetrans
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -133,10 +133,10 @@ kind: Deployment
metadata:
name: codetrans-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -211,12 +211,13 @@ kind: Deployment
metadata:
name: codetrans-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
@@ -236,7 +237,15 @@ spec:
- configMapRef:
name: codetrans-tgi-config
securityContext:
{}
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
imagePullPolicy: IfNotPresent
volumeMounts:
@@ -270,9 +279,7 @@ spec:
habana.ai/gaudi: 1
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
emptyDir: {}
- name: tmp
emptyDir: {}
---
@@ -285,10 +292,10 @@ kind: Deployment
metadata:
name: codetrans
labels:
helm.sh/chart: codetrans-0.8.0
helm.sh/chart: codetrans-1.0.0
app.kubernetes.io/name: codetrans
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1

View File

@@ -8,10 +8,10 @@ kind: ConfigMap
metadata:
name: codetrans-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://codetrans-tgi"
@@ -20,9 +20,7 @@ data:
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: insert-your-langchain-key-here
LANGCHAIN_PROJECT: "opea-llm-uservice"
LOGFLAG: ""
---
# Source: codetrans/charts/tgi/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -33,7 +31,7 @@ kind: ConfigMap
metadata:
name: codetrans-tgi-config
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "2.1.0"
@@ -47,7 +45,6 @@ data:
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
CUDA_GRAPHS: "0"
---
@@ -60,10 +57,10 @@ kind: Service
metadata:
name: codetrans-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -85,7 +82,7 @@ kind: Service
metadata:
name: codetrans-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "2.1.0"
@@ -110,10 +107,10 @@ kind: Service
metadata:
name: codetrans
labels:
helm.sh/chart: codetrans-0.8.0
helm.sh/chart: codetrans-1.0.0
app.kubernetes.io/name: codetrans
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -135,10 +132,10 @@ kind: Deployment
metadata:
name: codetrans-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -213,12 +210,13 @@ kind: Deployment
metadata:
name: codetrans-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
@@ -238,7 +236,15 @@ spec:
- configMapRef:
name: codetrans-tgi-config
securityContext:
{}
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
@@ -271,9 +277,7 @@ spec:
{}
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
emptyDir: {}
- name: tmp
emptyDir: {}
---
@@ -286,10 +290,10 @@ kind: Deployment
metadata:
name: codetrans
labels:
helm.sh/chart: codetrans-0.8.0
helm.sh/chart: codetrans-1.0.0
app.kubernetes.io/name: codetrans
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1

View File

@@ -8,10 +8,10 @@ kind: ConfigMap
metadata:
name: docsum-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://docsum-tgi"
@@ -20,9 +20,7 @@ data:
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: insert-your-langchain-key-here
LANGCHAIN_PROJECT: "opea-llm-uservice"
LOGFLAG: ""
---
# Source: docsum/charts/tgi/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -33,7 +31,7 @@ kind: ConfigMap
metadata:
name: docsum-tgi-config
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "2.1.0"
@@ -47,7 +45,6 @@ data:
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
@@ -61,10 +58,10 @@ kind: Service
metadata:
name: docsum-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -86,7 +83,7 @@ kind: Service
metadata:
name: docsum-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "2.1.0"
@@ -111,10 +108,10 @@ kind: Service
metadata:
name: docsum
labels:
helm.sh/chart: docsum-0.8.0
helm.sh/chart: docsum-1.0.0
app.kubernetes.io/name: docsum
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -136,10 +133,10 @@ kind: Deployment
metadata:
name: docsum-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -214,12 +211,13 @@ kind: Deployment
metadata:
name: docsum-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
@@ -239,7 +237,15 @@ spec:
- configMapRef:
name: docsum-tgi-config
securityContext:
{}
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
imagePullPolicy: IfNotPresent
volumeMounts:
@@ -273,9 +279,7 @@ spec:
habana.ai/gaudi: 1
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
emptyDir: {}
- name: tmp
emptyDir: {}
---
@@ -288,10 +292,10 @@ kind: Deployment
metadata:
name: docsum
labels:
helm.sh/chart: docsum-0.8.0
helm.sh/chart: docsum-1.0.0
app.kubernetes.io/name: docsum
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1

View File

@@ -8,10 +8,10 @@ kind: ConfigMap
metadata:
name: docsum-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://docsum-tgi"
@@ -20,9 +20,7 @@ data:
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: insert-your-langchain-key-here
LANGCHAIN_PROJECT: "opea-llm-uservice"
LOGFLAG: ""
---
# Source: docsum/charts/tgi/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -33,7 +31,7 @@ kind: ConfigMap
metadata:
name: docsum-tgi-config
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "2.1.0"
@@ -47,7 +45,6 @@ data:
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
CUDA_GRAPHS: "0"
---
@@ -60,10 +57,10 @@ kind: Service
metadata:
name: docsum-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -85,7 +82,7 @@ kind: Service
metadata:
name: docsum-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "2.1.0"
@@ -110,10 +107,10 @@ kind: Service
metadata:
name: docsum
labels:
helm.sh/chart: docsum-0.8.0
helm.sh/chart: docsum-1.0.0
app.kubernetes.io/name: docsum
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -135,10 +132,10 @@ kind: Deployment
metadata:
name: docsum-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -213,12 +210,13 @@ kind: Deployment
metadata:
name: docsum-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
@@ -238,7 +236,15 @@ spec:
- configMapRef:
name: docsum-tgi-config
securityContext:
{}
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
@@ -271,9 +277,7 @@ spec:
{}
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
emptyDir: {}
- name: tmp
emptyDir: {}
---
@@ -286,10 +290,10 @@ kind: Deployment
metadata:
name: docsum
labels:
helm.sh/chart: docsum-0.8.0
helm.sh/chart: docsum-1.0.0
app.kubernetes.io/name: docsum
app.kubernetes.io/instance: docsum
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1