K8S manifest: Update ChatQnA/CodeGen/CodeTrans/DocSum
- Update ChatQnA/CodeGen/CodeTrans/DocSum k8s manifest to avoid requiring creating directory for cache model. - Add chatqna-guardrails manifest files. - Fix bug #752 introduced by PR #669 Signed-off-by: Lianhao Lu <lianhao.lu@intel.com>
This commit is contained in:
8
.github/workflows/_manifest-e2e.yml
vendored
8
.github/workflows/_manifest-e2e.yml
vendored
@@ -80,6 +80,7 @@ jobs:
|
||||
echo "skip_validate=false" >> $GITHUB_ENV
|
||||
else
|
||||
echo "Timeout waiting for pods in namespace $NAMESPACE to be ready!"
|
||||
.github/workflows/scripts/k8s-utils.sh dump_pods_status $NAMESPACE
|
||||
exit 1
|
||||
fi
|
||||
sleep 60
|
||||
@@ -91,7 +92,12 @@ jobs:
|
||||
if $skip_validate; then
|
||||
echo "Skip validate"
|
||||
else
|
||||
${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh validate_${{ inputs.example }} $NAMESPACE
|
||||
if ${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh validate_${{ inputs.example }} $NAMESPACE ; then
|
||||
echo "Validate ${{ inputs.example }} successful!"
|
||||
else
|
||||
echo "Validate ${{ inputs.example }} failure!!!"
|
||||
.github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Kubectl uninstall
|
||||
|
||||
71
.github/workflows/scripts/k8s-utils.sh
vendored
Executable file
71
.github/workflows/scripts/k8s-utils.sh
vendored
Executable file
@@ -0,0 +1,71 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#set -xe
|
||||
|
||||
function dump_pod_log() {
|
||||
pod_name=$1
|
||||
namespace=$2
|
||||
echo "-----------Pod: $pod_name---------"
|
||||
echo "#kubectl describe pod $pod_name -n $namespace"
|
||||
kubectl describe pod $pod_name -n $namespace
|
||||
echo "-----------------------------------"
|
||||
echo "#kubectl logs $pod_name -n $namespace"
|
||||
kubectl logs $pod_name -n $namespace
|
||||
echo "-----------------------------------"
|
||||
}
|
||||
|
||||
function dump_pods_status() {
|
||||
namespace=$1
|
||||
echo "-----DUMP POD STATUS in NS $namespace------"
|
||||
kubectl get pods -n $namespace -o wide
|
||||
echo "-----------------------------------"
|
||||
|
||||
# Get all pods in the namespace and their statuses
|
||||
pods=$(kubectl get pods -n $namespace --no-headers)
|
||||
|
||||
# Loop through each pod
|
||||
echo "$pods" | while read -r line; do
|
||||
pod_name=$(echo $line | awk '{print $1}')
|
||||
ready=$(echo $line | awk '{print $2}')
|
||||
status=$(echo $line | awk '{print $3}')
|
||||
|
||||
# Extract the READY count
|
||||
ready_count=$(echo $ready | cut -d'/' -f1)
|
||||
required_count=$(echo $ready | cut -d'/' -f2)
|
||||
|
||||
# Check if the pod is not in "Running" status or READY count is less than required
|
||||
if [[ "$status" != "Running" || "$ready_count" -lt "$required_count" ]]; then
|
||||
dump_pod_log $pod_name $namespace
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
function dump_all_pod_logs() {
|
||||
namespace=$1
|
||||
echo "-----DUMP POD STATUS AND LOG in NS $namespace------"
|
||||
|
||||
pods=$(kubectl get pods -n $namespace -o jsonpath='{.items[*].metadata.name}')
|
||||
for pod_name in $pods
|
||||
do
|
||||
dump_pod_log $pod_name $namespace
|
||||
done
|
||||
}
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
echo "Usage: $0 <function_name>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
case "$1" in
|
||||
dump_pods_status)
|
||||
dump_pods_status $2
|
||||
;;
|
||||
dump_all_pod_logs)
|
||||
dump_all_pod_logs $2
|
||||
;;
|
||||
*)
|
||||
echo "Unknown function: $1"
|
||||
;;
|
||||
esac
|
||||
1518
ChatQnA/kubernetes/manifests/gaudi/chatqna-guardrails.yaml
Normal file
1518
ChatQnA/kubernetes/manifests/gaudi/chatqna-guardrails.yaml
Normal file
File diff suppressed because it is too large
Load Diff
@@ -8,24 +8,24 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-data-prep-config
|
||||
labels:
|
||||
helm.sh/chart: data-prep-0.8.0
|
||||
helm.sh/chart: data-prep-1.0.0
|
||||
app.kubernetes.io/name: data-prep
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_ENDPOINT: "http://chatqna-tei"
|
||||
EMBED_MODEL: ""
|
||||
REDIS_URL: "redis://chatqna-redis-vector-db:6379"
|
||||
INDEX_NAME: "rag-redis"
|
||||
KEY_INDEX_NAME: "file-keys"
|
||||
SEARCH_BATCH_SIZE: "10"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
|
||||
LANGCHAIN_PROJECT: "opea-dataprep-service"
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -36,19 +36,17 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc-config
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-0.8.0
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-embedding-service"
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -59,10 +57,10 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
|
||||
@@ -71,9 +69,7 @@ data:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-llm-uservice"
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -84,19 +80,17 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc-config
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-0.8.0
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
|
||||
LANGCHAIN_PROJECT: "opea-reranking-service"
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -107,10 +101,10 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-retriever-usvc-config
|
||||
labels:
|
||||
helm.sh/chart: retriever-usvc-0.8.0
|
||||
helm.sh/chart: retriever-usvc-1.0.0
|
||||
app.kubernetes.io/name: retriever-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
|
||||
@@ -121,11 +115,9 @@ data:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
|
||||
LANGCHAIN_PROJECT: "opea-retriever-service"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/tei/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -136,7 +128,7 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-tei-config
|
||||
labels:
|
||||
helm.sh/chart: tei-0.8.0
|
||||
helm.sh/chart: tei-1.0.0
|
||||
app.kubernetes.io/name: tei
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
@@ -161,7 +153,7 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-teirerank-config
|
||||
labels:
|
||||
helm.sh/chart: teirerank-0.8.0
|
||||
helm.sh/chart: teirerank-1.0.0
|
||||
app.kubernetes.io/name: teirerank
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
@@ -185,7 +177,7 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
@@ -199,7 +191,6 @@ data:
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
@@ -213,10 +204,10 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-data-prep
|
||||
labels:
|
||||
helm.sh/chart: data-prep-0.8.0
|
||||
helm.sh/chart: data-prep-1.0.0
|
||||
app.kubernetes.io/name: data-prep
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -238,10 +229,10 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-0.8.0
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -263,10 +254,10 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -288,7 +279,7 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-redis-vector-db
|
||||
labels:
|
||||
helm.sh/chart: redis-vector-db-0.8.0
|
||||
helm.sh/chart: redis-vector-db-1.0.0
|
||||
app.kubernetes.io/name: redis-vector-db
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "7.2.0-v9"
|
||||
@@ -317,10 +308,10 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-0.8.0
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -342,10 +333,10 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-retriever-usvc
|
||||
labels:
|
||||
helm.sh/chart: retriever-usvc-0.8.0
|
||||
helm.sh/chart: retriever-usvc-1.0.0
|
||||
app.kubernetes.io/name: retriever-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -367,7 +358,7 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-tei
|
||||
labels:
|
||||
helm.sh/chart: tei-0.8.0
|
||||
helm.sh/chart: tei-1.0.0
|
||||
app.kubernetes.io/name: tei
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
@@ -392,7 +383,7 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-teirerank
|
||||
labels:
|
||||
helm.sh/chart: teirerank-0.8.0
|
||||
helm.sh/chart: teirerank-1.0.0
|
||||
app.kubernetes.io/name: teirerank
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
@@ -417,7 +408,7 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
@@ -442,10 +433,10 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna
|
||||
labels:
|
||||
helm.sh/chart: chatqna-0.8.0
|
||||
helm.sh/chart: chatqna-1.0.0
|
||||
app.kubernetes.io/name: chatqna
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -467,10 +458,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-data-prep
|
||||
labels:
|
||||
helm.sh/chart: data-prep-0.8.0
|
||||
helm.sh/chart: data-prep-1.0.0
|
||||
app.kubernetes.io/name: data-prep
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -545,10 +536,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-0.8.0
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -623,10 +614,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -701,7 +692,7 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-redis-vector-db
|
||||
labels:
|
||||
helm.sh/chart: redis-vector-db-0.8.0
|
||||
helm.sh/chart: redis-vector-db-1.0.0
|
||||
app.kubernetes.io/name: redis-vector-db
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "7.2.0-v9"
|
||||
@@ -773,10 +764,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-0.8.0
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -851,10 +842,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-retriever-usvc
|
||||
labels:
|
||||
helm.sh/chart: retriever-usvc-0.8.0
|
||||
helm.sh/chart: retriever-usvc-1.0.0
|
||||
app.kubernetes.io/name: retriever-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -929,12 +920,13 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-tei
|
||||
labels:
|
||||
helm.sh/chart: tei-0.8.0
|
||||
helm.sh/chart: tei-1.0.0
|
||||
app.kubernetes.io/name: tei
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
@@ -954,7 +946,15 @@ spec:
|
||||
- configMapRef:
|
||||
name: chatqna-tei-config
|
||||
securityContext:
|
||||
{}
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tei-gaudi:synapse_1.16"
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
@@ -995,9 +995,7 @@ spec:
|
||||
habana.ai/gaudi: 1
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
emptyDir: {}
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
@@ -1014,12 +1012,13 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-teirerank
|
||||
labels:
|
||||
helm.sh/chart: teirerank-0.8.0
|
||||
helm.sh/chart: teirerank-1.0.0
|
||||
app.kubernetes.io/name: teirerank
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
@@ -1039,7 +1038,15 @@ spec:
|
||||
- configMapRef:
|
||||
name: chatqna-teirerank-config
|
||||
securityContext:
|
||||
{}
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
@@ -1079,9 +1086,7 @@ spec:
|
||||
{}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
emptyDir: {}
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
@@ -1098,12 +1103,13 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
@@ -1123,7 +1129,15 @@ spec:
|
||||
- configMapRef:
|
||||
name: chatqna-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
@@ -1157,9 +1171,7 @@ spec:
|
||||
habana.ai/gaudi: 1
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
emptyDir: {}
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
@@ -1172,10 +1184,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna
|
||||
labels:
|
||||
helm.sh/chart: chatqna-0.8.0
|
||||
helm.sh/chart: chatqna-1.0.0
|
||||
app.kubernetes.io/name: chatqna
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -1202,6 +1214,8 @@ spec:
|
||||
value: chatqna-retriever-usvc
|
||||
- name: EMBEDDING_SERVICE_HOST_IP
|
||||
value: chatqna-embedding-usvc
|
||||
- name: GUARDRAIL_SERVICE_HOST_IP
|
||||
value: chatqna-guardrails-usvc
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
|
||||
1513
ChatQnA/kubernetes/manifests/xeon/chatqna-guardrails.yaml
Normal file
1513
ChatQnA/kubernetes/manifests/xeon/chatqna-guardrails.yaml
Normal file
File diff suppressed because it is too large
Load Diff
@@ -25,7 +25,7 @@ data:
|
||||
}
|
||||
|
||||
location / {
|
||||
proxy_pass http://chatqna-ui.default.svc.cluster.local:5173;
|
||||
proxy_pass http://chatqna-ui:5173;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
@@ -33,7 +33,7 @@ data:
|
||||
}
|
||||
|
||||
location /v1/chatqna {
|
||||
proxy_pass http://chatqna.default.svc.cluster.local:8888;
|
||||
proxy_pass http://chatqna:8888;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
@@ -41,7 +41,7 @@ data:
|
||||
}
|
||||
|
||||
location /v1/dataprep {
|
||||
proxy_pass http://chatqna-data-prep.default.svc.cluster.local:6007;
|
||||
proxy_pass http://chatqna-data-prep:6007;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
@@ -49,7 +49,7 @@ data:
|
||||
}
|
||||
|
||||
location /v1/dataprep/get_file {
|
||||
proxy_pass http://chatqna-data-prep.default.svc.cluster.local:6007;
|
||||
proxy_pass http://chatqna-data-prep:6007;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
@@ -57,7 +57,7 @@ data:
|
||||
}
|
||||
|
||||
location /v1/dataprep/delete_file {
|
||||
proxy_pass http://chatqna-data-prep.default.svc.cluster.local:6007;
|
||||
proxy_pass http://chatqna-data-prep:6007;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
@@ -74,24 +74,24 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-data-prep-config
|
||||
labels:
|
||||
helm.sh/chart: data-prep-0.8.0
|
||||
helm.sh/chart: data-prep-1.0.0
|
||||
app.kubernetes.io/name: data-prep
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_ENDPOINT: "http://chatqna-tei"
|
||||
EMBED_MODEL: ""
|
||||
REDIS_URL: "redis://chatqna-redis-vector-db:6379"
|
||||
INDEX_NAME: "rag-redis"
|
||||
KEY_INDEX_NAME: "file-keys"
|
||||
SEARCH_BATCH_SIZE: "10"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
|
||||
LANGCHAIN_PROJECT: "opea-dataprep-service"
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -102,19 +102,17 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc-config
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-0.8.0
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-embedding-service"
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -125,10 +123,10 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
|
||||
@@ -137,9 +135,7 @@ data:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-llm-uservice"
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -150,19 +146,17 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc-config
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-0.8.0
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
|
||||
LANGCHAIN_PROJECT: "opea-reranking-service"
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -173,10 +167,10 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-retriever-usvc-config
|
||||
labels:
|
||||
helm.sh/chart: retriever-usvc-0.8.0
|
||||
helm.sh/chart: retriever-usvc-1.0.0
|
||||
app.kubernetes.io/name: retriever-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
|
||||
@@ -187,11 +181,9 @@ data:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
|
||||
LANGCHAIN_PROJECT: "opea-retriever-service"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/tei/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -202,7 +194,7 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-tei-config
|
||||
labels:
|
||||
helm.sh/chart: tei-0.8.0
|
||||
helm.sh/chart: tei-1.0.0
|
||||
app.kubernetes.io/name: tei
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
@@ -227,7 +219,7 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-teirerank-config
|
||||
labels:
|
||||
helm.sh/chart: teirerank-0.8.0
|
||||
helm.sh/chart: teirerank-1.0.0
|
||||
app.kubernetes.io/name: teirerank
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
@@ -251,7 +243,7 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
@@ -265,7 +257,6 @@ data:
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
CUDA_GRAPHS: "0"
|
||||
---
|
||||
@@ -278,10 +269,10 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-data-prep
|
||||
labels:
|
||||
helm.sh/chart: data-prep-0.8.0
|
||||
helm.sh/chart: data-prep-1.0.0
|
||||
app.kubernetes.io/name: data-prep
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -303,10 +294,10 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-0.8.0
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -328,10 +319,10 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -353,7 +344,7 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-redis-vector-db
|
||||
labels:
|
||||
helm.sh/chart: redis-vector-db-0.8.0
|
||||
helm.sh/chart: redis-vector-db-1.0.0
|
||||
app.kubernetes.io/name: redis-vector-db
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "7.2.0-v9"
|
||||
@@ -382,10 +373,10 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-0.8.0
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -407,10 +398,10 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-retriever-usvc
|
||||
labels:
|
||||
helm.sh/chart: retriever-usvc-0.8.0
|
||||
helm.sh/chart: retriever-usvc-1.0.0
|
||||
app.kubernetes.io/name: retriever-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -432,7 +423,7 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-tei
|
||||
labels:
|
||||
helm.sh/chart: tei-0.8.0
|
||||
helm.sh/chart: tei-1.0.0
|
||||
app.kubernetes.io/name: tei
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
@@ -457,7 +448,7 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-teirerank
|
||||
labels:
|
||||
helm.sh/chart: teirerank-0.8.0
|
||||
helm.sh/chart: teirerank-1.0.0
|
||||
app.kubernetes.io/name: teirerank
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
@@ -482,7 +473,7 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
@@ -507,10 +498,10 @@ kind: Service
|
||||
metadata:
|
||||
name: chatqna
|
||||
labels:
|
||||
helm.sh/chart: chatqna-0.8.0
|
||||
helm.sh/chart: chatqna-1.0.0
|
||||
app.kubernetes.io/name: chatqna
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -557,10 +548,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-data-prep
|
||||
labels:
|
||||
helm.sh/chart: data-prep-0.8.0
|
||||
helm.sh/chart: data-prep-1.0.0
|
||||
app.kubernetes.io/name: data-prep
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -635,10 +626,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-0.8.0
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -713,10 +704,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -791,7 +782,7 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-redis-vector-db
|
||||
labels:
|
||||
helm.sh/chart: redis-vector-db-0.8.0
|
||||
helm.sh/chart: redis-vector-db-1.0.0
|
||||
app.kubernetes.io/name: redis-vector-db
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "7.2.0-v9"
|
||||
@@ -863,10 +854,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-0.8.0
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -941,10 +932,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-retriever-usvc
|
||||
labels:
|
||||
helm.sh/chart: retriever-usvc-0.8.0
|
||||
helm.sh/chart: retriever-usvc-1.0.0
|
||||
app.kubernetes.io/name: retriever-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -1062,12 +1053,13 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-tei
|
||||
labels:
|
||||
helm.sh/chart: tei-0.8.0
|
||||
helm.sh/chart: tei-1.0.0
|
||||
app.kubernetes.io/name: tei
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
@@ -1087,7 +1079,15 @@ spec:
|
||||
- configMapRef:
|
||||
name: chatqna-tei-config
|
||||
securityContext:
|
||||
{}
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
@@ -1127,9 +1127,7 @@ spec:
|
||||
{}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
emptyDir: {}
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
@@ -1146,12 +1144,13 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-teirerank
|
||||
labels:
|
||||
helm.sh/chart: teirerank-0.8.0
|
||||
helm.sh/chart: teirerank-1.0.0
|
||||
app.kubernetes.io/name: teirerank
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
@@ -1171,7 +1170,15 @@ spec:
|
||||
- configMapRef:
|
||||
name: chatqna-teirerank-config
|
||||
securityContext:
|
||||
{}
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
@@ -1211,9 +1218,7 @@ spec:
|
||||
{}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
emptyDir: {}
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
@@ -1230,12 +1235,13 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
@@ -1255,7 +1261,15 @@ spec:
|
||||
- configMapRef:
|
||||
name: chatqna-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
@@ -1288,9 +1302,7 @@ spec:
|
||||
{}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
emptyDir: {}
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
@@ -1303,10 +1315,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: chatqna
|
||||
labels:
|
||||
helm.sh/chart: chatqna-0.8.0
|
||||
helm.sh/chart: chatqna-1.0.0
|
||||
app.kubernetes.io/name: chatqna
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -1333,6 +1345,8 @@ spec:
|
||||
value: chatqna-retriever-usvc
|
||||
- name: EMBEDDING_SERVICE_HOST_IP
|
||||
value: chatqna-embedding-usvc
|
||||
- name: GUARDRAIL_SERVICE_HOST_IP
|
||||
value: chatqna-guardrails-usvc
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
|
||||
@@ -9,6 +9,9 @@ MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
||||
IMAGE_REPO=${IMAGE_REPO:-}
|
||||
IMAGE_TAG=${IMAGE_TAG:-latest}
|
||||
|
||||
ROLLOUT_TIMEOUT_SECONDS="1800s"
|
||||
KUBECTL_TIMEOUT_SECONDS="60s"
|
||||
|
||||
function init_chatqna() {
|
||||
# replace the mount dir "path: /mnt/opea-models" with "path: $CHART_MOUNT"
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
|
||||
@@ -27,7 +30,7 @@ function init_chatqna() {
|
||||
|
||||
function install_chatqna {
|
||||
echo "namespace is $NAMESPACE"
|
||||
kubectl apply -f . -n $NAMESPACE
|
||||
kubectl apply -f chatqna.yaml -n $NAMESPACE
|
||||
# Sleep enough time for retreiver-usvc to be ready
|
||||
sleep 60
|
||||
}
|
||||
@@ -40,13 +43,15 @@ function get_end_point() {
|
||||
}
|
||||
|
||||
function validate_chatqna() {
|
||||
local ns=$1
|
||||
local log=$2
|
||||
max_retry=20
|
||||
# make sure microservice retriever-usvc is ready
|
||||
# try to curl retriever-svc for max_retry times
|
||||
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
||||
for ((i=1; i<=max_retry; i++))
|
||||
do
|
||||
endpoint_url=$(get_end_point "chatqna-retriever-usvc" $NAMESPACE)
|
||||
endpoint_url=$(get_end_point "chatqna-retriever-usvc" $ns)
|
||||
curl http://$endpoint_url/v1/retrieval -X POST \
|
||||
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" \
|
||||
-H 'Content-Type: application/json' && break
|
||||
@@ -55,32 +60,32 @@ function validate_chatqna() {
|
||||
# if i is bigger than max_retry, then exit with error
|
||||
if [ $i -gt $max_retry ]; then
|
||||
echo "Microservice retriever failed, exit with error."
|
||||
exit 1
|
||||
return 1
|
||||
fi
|
||||
# make sure microservice tgi-svc is ready
|
||||
for ((i=1; i<=max_retry; i++))
|
||||
do
|
||||
endpoint_url=$(get_end_point "chatqna-tgi" $NAMESPACE)
|
||||
endpoint_url=$(get_end_point "chatqna-tgi" $ns)
|
||||
curl http://$endpoint_url/generate -X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json' && break
|
||||
sleep 10
|
||||
sleep 30
|
||||
done
|
||||
# if i is bigger than max_retry, then exit with error
|
||||
if [ $i -gt $max_retry ]; then
|
||||
echo "Microservice tgi failed, exit with error."
|
||||
exit 1
|
||||
return 1
|
||||
fi
|
||||
|
||||
# check megaservice works
|
||||
# generate a random logfile name to avoid conflict among multiple runners
|
||||
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
|
||||
endpoint_url=$(get_end_point "chatqna" $NAMESPACE)
|
||||
LOGFILE=$LOG_PATH/curlmega_$log.log
|
||||
endpoint_url=$(get_end_point "chatqna" $ns)
|
||||
curl http://$endpoint_url/v1/chatqna -H "Content-Type: application/json" -d '{"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE
|
||||
exit_code=$?
|
||||
if [ $exit_code -ne 0 ]; then
|
||||
echo "Megaservice failed, please check the logs in $LOGFILE!"
|
||||
exit 1
|
||||
return ${exit_code}
|
||||
fi
|
||||
|
||||
echo "Checking response results, make sure the output is reasonable. "
|
||||
@@ -91,10 +96,49 @@ function validate_chatqna() {
|
||||
fi
|
||||
if [ $status == false ]; then
|
||||
echo "Response check failed, please check the logs in artifacts!"
|
||||
exit 1
|
||||
return 1
|
||||
else
|
||||
echo "Response check succeed!"
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
function _cleanup_ns() {
|
||||
local ns=$1
|
||||
if kubectl get ns $ns; then
|
||||
if ! kubectl delete ns $ns --timeout=$KUBECTL_TIMEOUT_SECONDS; then
|
||||
kubectl delete pods --namespace $ns --force --grace-period=0 --all
|
||||
kubectl delete ns $ns --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
function install_and_validate_chatqna_guardrail() {
|
||||
echo "Testing manifests chatqna_guardrils"
|
||||
local ns=${NAMESPACE}-gaurdrails
|
||||
_cleanup_ns $ns
|
||||
kubectl create namespace $ns
|
||||
# install guardrail
|
||||
kubectl apply -f chatqna-guardrails.yaml -n $ns
|
||||
# Sleep enough time for chatqna_guardrail to be ready
|
||||
sleep 60
|
||||
if kubectl rollout status deployment -n "$ns" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
|
||||
echo "Waiting for cahtqna_guardrail pod ready done!"
|
||||
else
|
||||
echo "Timeout waiting for chatqna_guardrail pod ready!"
|
||||
_cleanup_ns $ns
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# validate guardrail
|
||||
validate_chatqna $ns chatqna-guardrails
|
||||
local ret=$?
|
||||
if [ $ret -ne 0 ]; then
|
||||
_cleanup_ns $ns
|
||||
exit 1
|
||||
fi
|
||||
_cleanup_ns $ns
|
||||
}
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
@@ -117,7 +161,15 @@ case "$1" in
|
||||
validate_ChatQnA)
|
||||
NAMESPACE=$2
|
||||
SERVICE_NAME=chatqna
|
||||
validate_chatqna
|
||||
validate_chatqna $NAMESPACE chatqna
|
||||
ret=$?
|
||||
if [ $ret -ne 0 ]; then
|
||||
exit $ret
|
||||
fi
|
||||
pushd ChatQnA/kubernetes/manifests/gaudi
|
||||
set +e
|
||||
install_and_validate_chatqna_guardrail
|
||||
popd
|
||||
;;
|
||||
*)
|
||||
echo "Unknown function: $1"
|
||||
|
||||
@@ -9,6 +9,9 @@ MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
||||
IMAGE_REPO=${IMAGE_REPO:-}
|
||||
IMAGE_TAG=${IMAGE_TAG:-latest}
|
||||
|
||||
ROLLOUT_TIMEOUT_SECONDS="1800s"
|
||||
KUBECTL_TIMEOUT_SECONDS="60s"
|
||||
|
||||
function init_chatqna() {
|
||||
# replace the mount dir "path: /mnt/opea-models" with "path: $CHART_MOUNT"
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
|
||||
@@ -27,19 +30,29 @@ function init_chatqna() {
|
||||
|
||||
function install_chatqna {
|
||||
echo "namespace is $NAMESPACE"
|
||||
kubectl apply -f . -n $NAMESPACE
|
||||
kubectl apply -f chatqna.yaml -n $NAMESPACE
|
||||
# Sleep enough time for retreiver-usvc to be ready
|
||||
sleep 60
|
||||
}
|
||||
|
||||
function get_end_point() {
|
||||
# $1 is service name, $2 is namespace
|
||||
ip_address=$(kubectl get svc $1 -n $2 -o jsonpath='{.spec.clusterIP}')
|
||||
port=$(kubectl get svc $1 -n $2 -o jsonpath='{.spec.ports[0].port}')
|
||||
echo "$ip_address:$port"
|
||||
}
|
||||
|
||||
function validate_chatqna() {
|
||||
local ns=$1
|
||||
local log=$2
|
||||
max_retry=20
|
||||
# make sure microservice retriever-usvc is ready
|
||||
# try to curl retriever-svc for max_retry times
|
||||
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
||||
for ((i=1; i<=max_retry; i++))
|
||||
do
|
||||
curl http://chatqna-retriever-usvc.$NAMESPACE:7000/v1/retrieval -X POST \
|
||||
endpoint_url=$(get_end_point "chatqna-retriever-usvc" $ns)
|
||||
curl http://$endpoint_url/v1/retrieval -X POST \
|
||||
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" \
|
||||
-H 'Content-Type: application/json' && break
|
||||
sleep 30
|
||||
@@ -47,30 +60,32 @@ function validate_chatqna() {
|
||||
# if i is bigger than max_retry, then exit with error
|
||||
if [ $i -gt $max_retry ]; then
|
||||
echo "Microservice retriever failed, exit with error."
|
||||
exit 1
|
||||
return 1
|
||||
fi
|
||||
# make sure microservice tgi-svc is ready
|
||||
for ((i=1; i<=max_retry; i++))
|
||||
do
|
||||
curl http://chatqna-tgi.$NAMESPACE:80/generate -X POST \
|
||||
endpoint_url=$(get_end_point "chatqna-tgi" $ns)
|
||||
curl http://$endpoint_url/generate -X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json' && break
|
||||
sleep 10
|
||||
sleep 30
|
||||
done
|
||||
# if i is bigger than max_retry, then exit with error
|
||||
if [ $i -gt $max_retry ]; then
|
||||
echo "Microservice tgi failed, exit with error."
|
||||
exit 1
|
||||
return 1
|
||||
fi
|
||||
|
||||
# check megaservice works
|
||||
# generate a random logfile name to avoid conflict among multiple runners
|
||||
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
|
||||
curl http://chatqna.$NAMESPACE:8888/v1/chatqna -H "Content-Type: application/json" -d '{"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE
|
||||
LOGFILE=$LOG_PATH/curlmega_$log.log
|
||||
endpoint_url=$(get_end_point "chatqna" $ns)
|
||||
curl http://$endpoint_url/v1/chatqna -H "Content-Type: application/json" -d '{"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE
|
||||
exit_code=$?
|
||||
if [ $exit_code -ne 0 ]; then
|
||||
echo "Megaservice failed, please check the logs in $LOGFILE!"
|
||||
exit 1
|
||||
return ${exit_code}
|
||||
fi
|
||||
|
||||
echo "Checking response results, make sure the output is reasonable. "
|
||||
@@ -81,10 +96,49 @@ function validate_chatqna() {
|
||||
fi
|
||||
if [ $status == false ]; then
|
||||
echo "Response check failed, please check the logs in artifacts!"
|
||||
exit 1
|
||||
return 1
|
||||
else
|
||||
echo "Response check succeed!"
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
function _cleanup_ns() {
|
||||
local ns=$1
|
||||
if kubectl get ns $ns; then
|
||||
if ! kubectl delete ns $ns --timeout=$KUBECTL_TIMEOUT_SECONDS; then
|
||||
kubectl delete pods --namespace $ns --force --grace-period=0 --all
|
||||
kubectl delete ns $ns --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
function install_and_validate_chatqna_guardrail() {
|
||||
echo "Testing manifests chatqna_guardrils"
|
||||
local ns=${NAMESPACE}-gaurdrails
|
||||
_cleanup_ns $ns
|
||||
kubectl create namespace $ns
|
||||
# install guardrail
|
||||
kubectl apply -f chatqna-guardrails.yaml -n $ns
|
||||
# Sleep enough time for chatqna_guardrail to be ready
|
||||
sleep 60
|
||||
if kubectl rollout status deployment -n "$ns" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
|
||||
echo "Waiting for cahtqna_guardrail pod ready done!"
|
||||
else
|
||||
echo "Timeout waiting for chatqna_guardrail pod ready!"
|
||||
_cleanup_ns $ns
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# validate guardrail
|
||||
validate_chatqna $ns chatqna-guardrails
|
||||
local ret=$?
|
||||
if [ $ret -ne 0 ]; then
|
||||
_cleanup_ns $ns
|
||||
exit 1
|
||||
fi
|
||||
_cleanup_ns $ns
|
||||
}
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
@@ -107,7 +161,15 @@ case "$1" in
|
||||
validate_ChatQnA)
|
||||
NAMESPACE=$2
|
||||
SERVICE_NAME=chatqna
|
||||
validate_chatqna
|
||||
validate_chatqna $NAMESPACE chatqna
|
||||
ret=$?
|
||||
if [ $ret -ne 0 ]; then
|
||||
exit $ret
|
||||
fi
|
||||
pushd ChatQnA/kubernetes/manifests/xeon
|
||||
set +e
|
||||
install_and_validate_chatqna_guardrail
|
||||
popd
|
||||
;;
|
||||
*)
|
||||
echo "Unknown function: $1"
|
||||
|
||||
@@ -8,10 +8,10 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: codegen-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://codegen-tgi"
|
||||
@@ -20,9 +20,7 @@ data:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-llm-uservice"
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: codegen/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -33,7 +31,7 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: codegen-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
@@ -47,7 +45,6 @@ data:
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
@@ -61,10 +58,10 @@ kind: Service
|
||||
metadata:
|
||||
name: codegen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -86,7 +83,7 @@ kind: Service
|
||||
metadata:
|
||||
name: codegen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
@@ -111,10 +108,10 @@ kind: Service
|
||||
metadata:
|
||||
name: codegen
|
||||
labels:
|
||||
helm.sh/chart: codegen-0.8.0
|
||||
helm.sh/chart: codegen-1.0.0
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -136,10 +133,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: codegen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -214,12 +211,13 @@ kind: Deployment
|
||||
metadata:
|
||||
name: codegen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
@@ -239,7 +237,15 @@ spec:
|
||||
- configMapRef:
|
||||
name: codegen-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
@@ -275,9 +281,7 @@ spec:
|
||||
hugepages-2Mi: 500Mi
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
emptyDir: {}
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
@@ -290,10 +294,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: codegen
|
||||
labels:
|
||||
helm.sh/chart: codegen-0.8.0
|
||||
helm.sh/chart: codegen-1.0.0
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
@@ -8,10 +8,10 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: codegen-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://codegen-tgi"
|
||||
@@ -20,9 +20,7 @@ data:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-llm-uservice"
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: codegen/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -33,7 +31,7 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: codegen-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
@@ -47,7 +45,6 @@ data:
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
CUDA_GRAPHS: "0"
|
||||
---
|
||||
@@ -60,10 +57,10 @@ kind: Service
|
||||
metadata:
|
||||
name: codegen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -85,7 +82,7 @@ kind: Service
|
||||
metadata:
|
||||
name: codegen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
@@ -110,10 +107,10 @@ kind: Service
|
||||
metadata:
|
||||
name: codegen
|
||||
labels:
|
||||
helm.sh/chart: codegen-0.8.0
|
||||
helm.sh/chart: codegen-1.0.0
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -135,10 +132,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: codegen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -213,12 +210,13 @@ kind: Deployment
|
||||
metadata:
|
||||
name: codegen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
@@ -238,7 +236,15 @@ spec:
|
||||
- configMapRef:
|
||||
name: codegen-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
@@ -271,9 +277,7 @@ spec:
|
||||
{}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
emptyDir: {}
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
@@ -286,10 +290,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: codegen
|
||||
labels:
|
||||
helm.sh/chart: codegen-0.8.0
|
||||
helm.sh/chart: codegen-1.0.0
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
@@ -8,10 +8,10 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: codetrans-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://codetrans-tgi"
|
||||
@@ -20,6 +20,7 @@ data:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: codetrans/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -30,7 +31,7 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: codetrans-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
@@ -44,7 +45,6 @@ data:
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
@@ -58,10 +58,10 @@ kind: Service
|
||||
metadata:
|
||||
name: codetrans-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -83,7 +83,7 @@ kind: Service
|
||||
metadata:
|
||||
name: codetrans-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
@@ -108,10 +108,10 @@ kind: Service
|
||||
metadata:
|
||||
name: codetrans
|
||||
labels:
|
||||
helm.sh/chart: codetrans-0.8.0
|
||||
helm.sh/chart: codetrans-1.0.0
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -133,10 +133,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: codetrans-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -211,12 +211,13 @@ kind: Deployment
|
||||
metadata:
|
||||
name: codetrans-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
@@ -236,7 +237,15 @@ spec:
|
||||
- configMapRef:
|
||||
name: codetrans-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
@@ -270,9 +279,7 @@ spec:
|
||||
habana.ai/gaudi: 1
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
emptyDir: {}
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
@@ -285,10 +292,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: codetrans
|
||||
labels:
|
||||
helm.sh/chart: codetrans-0.8.0
|
||||
helm.sh/chart: codetrans-1.0.0
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
@@ -8,10 +8,10 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: codetrans-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://codetrans-tgi"
|
||||
@@ -20,9 +20,7 @@ data:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-llm-uservice"
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: codetrans/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -33,7 +31,7 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: codetrans-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
@@ -47,7 +45,6 @@ data:
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
CUDA_GRAPHS: "0"
|
||||
---
|
||||
@@ -60,10 +57,10 @@ kind: Service
|
||||
metadata:
|
||||
name: codetrans-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -85,7 +82,7 @@ kind: Service
|
||||
metadata:
|
||||
name: codetrans-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
@@ -110,10 +107,10 @@ kind: Service
|
||||
metadata:
|
||||
name: codetrans
|
||||
labels:
|
||||
helm.sh/chart: codetrans-0.8.0
|
||||
helm.sh/chart: codetrans-1.0.0
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -135,10 +132,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: codetrans-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -213,12 +210,13 @@ kind: Deployment
|
||||
metadata:
|
||||
name: codetrans-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
@@ -238,7 +236,15 @@ spec:
|
||||
- configMapRef:
|
||||
name: codetrans-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
@@ -271,9 +277,7 @@ spec:
|
||||
{}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
emptyDir: {}
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
@@ -286,10 +290,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: codetrans
|
||||
labels:
|
||||
helm.sh/chart: codetrans-0.8.0
|
||||
helm.sh/chart: codetrans-1.0.0
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
@@ -8,10 +8,10 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: docsum-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://docsum-tgi"
|
||||
@@ -20,9 +20,7 @@ data:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-llm-uservice"
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: docsum/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -33,7 +31,7 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: docsum-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
@@ -47,7 +45,6 @@ data:
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
@@ -61,10 +58,10 @@ kind: Service
|
||||
metadata:
|
||||
name: docsum-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -86,7 +83,7 @@ kind: Service
|
||||
metadata:
|
||||
name: docsum-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
@@ -111,10 +108,10 @@ kind: Service
|
||||
metadata:
|
||||
name: docsum
|
||||
labels:
|
||||
helm.sh/chart: docsum-0.8.0
|
||||
helm.sh/chart: docsum-1.0.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -136,10 +133,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: docsum-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -214,12 +211,13 @@ kind: Deployment
|
||||
metadata:
|
||||
name: docsum-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
@@ -239,7 +237,15 @@ spec:
|
||||
- configMapRef:
|
||||
name: docsum-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
@@ -273,9 +279,7 @@ spec:
|
||||
habana.ai/gaudi: 1
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
emptyDir: {}
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
@@ -288,10 +292,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: docsum
|
||||
labels:
|
||||
helm.sh/chart: docsum-0.8.0
|
||||
helm.sh/chart: docsum-1.0.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
@@ -8,10 +8,10 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: docsum-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://docsum-tgi"
|
||||
@@ -20,9 +20,7 @@ data:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-llm-uservice"
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: docsum/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -33,7 +31,7 @@ kind: ConfigMap
|
||||
metadata:
|
||||
name: docsum-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
@@ -47,7 +45,6 @@ data:
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
CUDA_GRAPHS: "0"
|
||||
---
|
||||
@@ -60,10 +57,10 @@ kind: Service
|
||||
metadata:
|
||||
name: docsum-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -85,7 +82,7 @@ kind: Service
|
||||
metadata:
|
||||
name: docsum-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
@@ -110,10 +107,10 @@ kind: Service
|
||||
metadata:
|
||||
name: docsum
|
||||
labels:
|
||||
helm.sh/chart: docsum-0.8.0
|
||||
helm.sh/chart: docsum-1.0.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -135,10 +132,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: docsum-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -213,12 +210,13 @@ kind: Deployment
|
||||
metadata:
|
||||
name: docsum-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
@@ -238,7 +236,15 @@ spec:
|
||||
- configMapRef:
|
||||
name: docsum-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
@@ -271,9 +277,7 @@ spec:
|
||||
{}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
emptyDir: {}
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
@@ -286,10 +290,10 @@ kind: Deployment
|
||||
metadata:
|
||||
name: docsum
|
||||
labels:
|
||||
helm.sh/chart: docsum-0.8.0
|
||||
helm.sh/chart: docsum-1.0.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
Reference in New Issue
Block a user