K8S manifest: Update ChatQnA/CodeGen/CodeTrans/DocSum

- Update ChatQnA/CodeGen/CodeTrans/DocSum k8s manifest
  to avoid requiring creating directory for cache model.

- Add chatqna-guardrails manifest files.

- Fix bug #752 introduced by PR #669

Signed-off-by: Lianhao Lu <lianhao.lu@intel.com>
This commit is contained in:
Lianhao Lu
2024-09-05 18:05:11 +08:00
committed by dolpher
parent 4bd7841f17
commit 0629696333
14 changed files with 3574 additions and 297 deletions

View File

@@ -8,10 +8,10 @@ kind: ConfigMap
metadata:
name: codetrans-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://codetrans-tgi"
@@ -20,6 +20,7 @@ data:
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: codetrans/charts/tgi/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -30,7 +31,7 @@ kind: ConfigMap
metadata:
name: codetrans-tgi-config
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "2.1.0"
@@ -44,7 +45,6 @@ data:
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
@@ -58,10 +58,10 @@ kind: Service
metadata:
name: codetrans-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -83,7 +83,7 @@ kind: Service
metadata:
name: codetrans-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "2.1.0"
@@ -108,10 +108,10 @@ kind: Service
metadata:
name: codetrans
labels:
helm.sh/chart: codetrans-0.8.0
helm.sh/chart: codetrans-1.0.0
app.kubernetes.io/name: codetrans
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -133,10 +133,10 @@ kind: Deployment
metadata:
name: codetrans-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -211,12 +211,13 @@ kind: Deployment
metadata:
name: codetrans-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
@@ -236,7 +237,15 @@ spec:
- configMapRef:
name: codetrans-tgi-config
securityContext:
{}
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
imagePullPolicy: IfNotPresent
volumeMounts:
@@ -270,9 +279,7 @@ spec:
habana.ai/gaudi: 1
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
emptyDir: {}
- name: tmp
emptyDir: {}
---
@@ -285,10 +292,10 @@ kind: Deployment
metadata:
name: codetrans
labels:
helm.sh/chart: codetrans-0.8.0
helm.sh/chart: codetrans-1.0.0
app.kubernetes.io/name: codetrans
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1

View File

@@ -8,10 +8,10 @@ kind: ConfigMap
metadata:
name: codetrans-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://codetrans-tgi"
@@ -20,9 +20,7 @@ data:
http_proxy: ""
https_proxy: ""
no_proxy: ""
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: insert-your-langchain-key-here
LANGCHAIN_PROJECT: "opea-llm-uservice"
LOGFLAG: ""
---
# Source: codetrans/charts/tgi/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
@@ -33,7 +31,7 @@ kind: ConfigMap
metadata:
name: codetrans-tgi-config
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "2.1.0"
@@ -47,7 +45,6 @@ data:
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
CUDA_GRAPHS: "0"
---
@@ -60,10 +57,10 @@ kind: Service
metadata:
name: codetrans-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -85,7 +82,7 @@ kind: Service
metadata:
name: codetrans-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "2.1.0"
@@ -110,10 +107,10 @@ kind: Service
metadata:
name: codetrans
labels:
helm.sh/chart: codetrans-0.8.0
helm.sh/chart: codetrans-1.0.0
app.kubernetes.io/name: codetrans
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
@@ -135,10 +132,10 @@ kind: Deployment
metadata:
name: codetrans-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
@@ -213,12 +210,13 @@ kind: Deployment
metadata:
name: codetrans-tgi
labels:
helm.sh/chart: tgi-0.8.0
helm.sh/chart: tgi-1.0.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
@@ -238,7 +236,15 @@ spec:
- configMapRef:
name: codetrans-tgi-config
securityContext:
{}
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
@@ -271,9 +277,7 @@ spec:
{}
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
emptyDir: {}
- name: tmp
emptyDir: {}
---
@@ -286,10 +290,10 @@ kind: Deployment
metadata:
name: codetrans
labels:
helm.sh/chart: codetrans-0.8.0
helm.sh/chart: codetrans-1.0.0
app.kubernetes.io/name: codetrans
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1