Update K8S manifest for ChatQnA/CodeGen/CodeTrans/DocSum
- Sync with docker-compose changes since v0.8 release - Add K8S probes Signed-off-by: Lianhao Lu <lianhao.lu@intel.com>
This commit is contained in:
@@ -11,17 +11,18 @@ metadata:
|
||||
helm.sh/chart: data-prep-0.8.0
|
||||
app.kubernetes.io/name: data-prep
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_ENDPOINT: "http://chatqna-tei"
|
||||
EMBED_MODEL: ""
|
||||
REDIS_URL: "redis://chatqna-redis-vector-db:6379"
|
||||
INDEX_NAME: "rag-redis"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
|
||||
LANGCHAIN_PROJECT: "opea-dataprep-service"
|
||||
@@ -38,13 +39,13 @@ metadata:
|
||||
helm.sh/chart: embedding-usvc-0.8.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-embedding-service"
|
||||
@@ -61,15 +62,15 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-llm-uservice"
|
||||
@@ -86,13 +87,13 @@ metadata:
|
||||
helm.sh/chart: reranking-usvc-0.8.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
|
||||
LANGCHAIN_PROJECT: "opea-reranking-service"
|
||||
@@ -109,16 +110,17 @@ metadata:
|
||||
helm.sh/chart: retriever-usvc-0.8.0
|
||||
app.kubernetes.io/name: retriever-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
|
||||
EMBED_MODEL: ""
|
||||
REDIS_URL: "redis://chatqna-redis-vector-db:6379"
|
||||
INDEX_NAME: "rag-redis"
|
||||
EASYOCR_MODULE_PATH: "/tmp/.EasyOCR"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
|
||||
LANGCHAIN_PROJECT: "opea-retriever-service"
|
||||
@@ -136,14 +138,14 @@ metadata:
|
||||
helm.sh/chart: tei-0.8.0
|
||||
app.kubernetes.io/name: tei
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.2"
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "BAAI/bge-base-en-v1.5"
|
||||
PORT: "2081"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
@@ -161,14 +163,14 @@ metadata:
|
||||
helm.sh/chart: teirerank-0.8.0
|
||||
app.kubernetes.io/name: teirerank
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.2"
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "BAAI/bge-reranker-base"
|
||||
PORT: "2082"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
@@ -190,17 +192,16 @@ metadata:
|
||||
data:
|
||||
MODEL_ID: "Intel/neural-chat-7b-v3-3"
|
||||
PORT: "2080"
|
||||
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
MAX_INPUT_TOKENS: "1024"
|
||||
MAX_TOTAL_TOKENS: "4096"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
---
|
||||
# Source: chatqna/charts/data-prep/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -214,7 +215,7 @@ metadata:
|
||||
helm.sh/chart: data-prep-0.8.0
|
||||
app.kubernetes.io/name: data-prep
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -239,7 +240,7 @@ metadata:
|
||||
helm.sh/chart: embedding-usvc-0.8.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -264,7 +265,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -318,7 +319,7 @@ metadata:
|
||||
helm.sh/chart: reranking-usvc-0.8.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -343,7 +344,7 @@ metadata:
|
||||
helm.sh/chart: retriever-usvc-0.8.0
|
||||
app.kubernetes.io/name: retriever-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -368,7 +369,7 @@ metadata:
|
||||
helm.sh/chart: tei-0.8.0
|
||||
app.kubernetes.io/name: tei
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.2"
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -393,7 +394,7 @@ metadata:
|
||||
helm.sh/chart: teirerank-0.8.0
|
||||
app.kubernetes.io/name: teirerank
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.2"
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -443,7 +444,7 @@ metadata:
|
||||
helm.sh/chart: chatqna-0.8.0
|
||||
app.kubernetes.io/name: chatqna
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -468,7 +469,7 @@ metadata:
|
||||
helm.sh/chart: data-prep-0.8.0
|
||||
app.kubernetes.io/name: data-prep
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -508,6 +509,26 @@ spec:
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: data-prep
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: data-prep
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: data-prep
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -526,7 +547,7 @@ metadata:
|
||||
helm.sh/chart: embedding-usvc-0.8.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -566,6 +587,26 @@ spec:
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -584,7 +625,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -624,14 +665,26 @@ spec:
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://chatqna-tgi
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -722,7 +775,7 @@ metadata:
|
||||
helm.sh/chart: reranking-usvc-0.8.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -762,6 +815,26 @@ spec:
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -780,7 +853,7 @@ metadata:
|
||||
helm.sh/chart: retriever-usvc-0.8.0
|
||||
app.kubernetes.io/name: retriever-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -820,14 +893,26 @@ spec:
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://chatqna-tei
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: retriever-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: retriever-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: retriever-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -846,7 +931,7 @@ metadata:
|
||||
helm.sh/chart: tei-0.8.0
|
||||
app.kubernetes.io/name: tei
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.2"
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -871,6 +956,8 @@ spec:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/tei-gaudi:synapse_1.16"
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
- "--auto-truncate"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
@@ -882,6 +969,26 @@ spec:
|
||||
- name: http
|
||||
containerPort: 2081
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
@@ -909,7 +1016,7 @@ metadata:
|
||||
helm.sh/chart: teirerank-0.8.0
|
||||
app.kubernetes.io/name: teirerank
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.2"
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -932,8 +1039,10 @@ spec:
|
||||
name: chatqna-teirerank-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.2"
|
||||
image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
- "--auto-truncate"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
@@ -945,6 +1054,26 @@ spec:
|
||||
- name: http
|
||||
containerPort: 2082
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -1005,6 +1134,23 @@ spec:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
@@ -1028,7 +1174,7 @@ metadata:
|
||||
helm.sh/chart: chatqna-0.8.0
|
||||
app.kubernetes.io/name: chatqna
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
@@ -11,17 +11,18 @@ metadata:
|
||||
helm.sh/chart: data-prep-0.8.0
|
||||
app.kubernetes.io/name: data-prep
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_ENDPOINT: "http://chatqna-tei"
|
||||
EMBED_MODEL: ""
|
||||
REDIS_URL: "redis://chatqna-redis-vector-db:6379"
|
||||
INDEX_NAME: "rag-redis"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
|
||||
LANGCHAIN_PROJECT: "opea-dataprep-service"
|
||||
@@ -38,13 +39,13 @@ metadata:
|
||||
helm.sh/chart: embedding-usvc-0.8.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-embedding-service"
|
||||
@@ -61,15 +62,15 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-llm-uservice"
|
||||
@@ -86,13 +87,13 @@ metadata:
|
||||
helm.sh/chart: reranking-usvc-0.8.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
|
||||
LANGCHAIN_PROJECT: "opea-reranking-service"
|
||||
@@ -109,16 +110,17 @@ metadata:
|
||||
helm.sh/chart: retriever-usvc-0.8.0
|
||||
app.kubernetes.io/name: retriever-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
|
||||
EMBED_MODEL: ""
|
||||
REDIS_URL: "redis://chatqna-redis-vector-db:6379"
|
||||
INDEX_NAME: "rag-redis"
|
||||
EASYOCR_MODULE_PATH: "/tmp/.EasyOCR"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
|
||||
LANGCHAIN_PROJECT: "opea-retriever-service"
|
||||
@@ -136,14 +138,14 @@ metadata:
|
||||
helm.sh/chart: tei-0.8.0
|
||||
app.kubernetes.io/name: tei
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.2"
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "BAAI/bge-base-en-v1.5"
|
||||
PORT: "2081"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
@@ -161,14 +163,14 @@ metadata:
|
||||
helm.sh/chart: teirerank-0.8.0
|
||||
app.kubernetes.io/name: teirerank
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.2"
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "BAAI/bge-reranker-base"
|
||||
PORT: "2082"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
@@ -190,18 +192,15 @@ metadata:
|
||||
data:
|
||||
MODEL_ID: "Intel/neural-chat-7b-v3-3"
|
||||
PORT: "2080"
|
||||
CUDA_GRAPHS: "0"
|
||||
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
MAX_INPUT_TOKENS: "1024"
|
||||
MAX_TOTAL_TOKENS: "4096"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
CUDA_GRAPHS: "0"
|
||||
---
|
||||
# Source: chatqna/charts/data-prep/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -215,7 +214,7 @@ metadata:
|
||||
helm.sh/chart: data-prep-0.8.0
|
||||
app.kubernetes.io/name: data-prep
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -240,7 +239,7 @@ metadata:
|
||||
helm.sh/chart: embedding-usvc-0.8.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -265,7 +264,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -319,7 +318,7 @@ metadata:
|
||||
helm.sh/chart: reranking-usvc-0.8.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -344,7 +343,7 @@ metadata:
|
||||
helm.sh/chart: retriever-usvc-0.8.0
|
||||
app.kubernetes.io/name: retriever-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -369,7 +368,7 @@ metadata:
|
||||
helm.sh/chart: tei-0.8.0
|
||||
app.kubernetes.io/name: tei
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.2"
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -394,7 +393,7 @@ metadata:
|
||||
helm.sh/chart: teirerank-0.8.0
|
||||
app.kubernetes.io/name: teirerank
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.2"
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -444,7 +443,7 @@ metadata:
|
||||
helm.sh/chart: chatqna-0.8.0
|
||||
app.kubernetes.io/name: chatqna
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -469,7 +468,7 @@ metadata:
|
||||
helm.sh/chart: data-prep-0.8.0
|
||||
app.kubernetes.io/name: data-prep
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -509,6 +508,26 @@ spec:
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: data-prep
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: data-prep
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: data-prep
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -527,7 +546,7 @@ metadata:
|
||||
helm.sh/chart: embedding-usvc-0.8.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -567,6 +586,26 @@ spec:
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -585,7 +624,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -625,14 +664,26 @@ spec:
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://chatqna-tgi
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -723,7 +774,7 @@ metadata:
|
||||
helm.sh/chart: reranking-usvc-0.8.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -763,6 +814,26 @@ spec:
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -781,7 +852,7 @@ metadata:
|
||||
helm.sh/chart: retriever-usvc-0.8.0
|
||||
app.kubernetes.io/name: retriever-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -821,14 +892,26 @@ spec:
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://chatqna-tei
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: retriever-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: retriever-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: retriever-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -847,7 +930,7 @@ metadata:
|
||||
helm.sh/chart: tei-0.8.0
|
||||
app.kubernetes.io/name: tei
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.2"
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -870,8 +953,10 @@ spec:
|
||||
name: chatqna-tei-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.2"
|
||||
image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
- "--auto-truncate"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
@@ -883,6 +968,26 @@ spec:
|
||||
- name: http
|
||||
containerPort: 2081
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -909,7 +1014,7 @@ metadata:
|
||||
helm.sh/chart: teirerank-0.8.0
|
||||
app.kubernetes.io/name: teirerank
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.2"
|
||||
app.kubernetes.io/version: "cpu-1.5"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -932,8 +1037,10 @@ spec:
|
||||
name: chatqna-teirerank-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.2"
|
||||
image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
- "--auto-truncate"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
@@ -945,6 +1052,26 @@ spec:
|
||||
- name: http
|
||||
containerPort: 2082
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -1005,6 +1132,23 @@ spec:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -1027,7 +1171,7 @@ metadata:
|
||||
helm.sh/chart: chatqna-0.8.0
|
||||
app.kubernetes.io/name: chatqna
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
@@ -86,7 +86,7 @@ function validate_chatqna() {
|
||||
echo "Checking response results, make sure the output is reasonable. "
|
||||
local status=false
|
||||
if [[ -f $LOGFILE ]] &&
|
||||
[[ $(grep -c "billion" $LOGFILE) != 0 ]]; then
|
||||
[[ $(grep -c "\[DONE\]" $LOGFILE) != 0 ]]; then
|
||||
status=true
|
||||
fi
|
||||
if [ $status == false ]; then
|
||||
|
||||
@@ -76,7 +76,7 @@ function validate_chatqna() {
|
||||
echo "Checking response results, make sure the output is reasonable. "
|
||||
local status=false
|
||||
if [[ -f $LOGFILE ]] &&
|
||||
[[ $(grep -c "billion" $LOGFILE) != 0 ]]; then
|
||||
[[ $(grep -c "\[DONE\]" $LOGFILE) != 0 ]]; then
|
||||
status=true
|
||||
fi
|
||||
if [ $status == false ]; then
|
||||
|
||||
@@ -11,15 +11,15 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://codegen-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-llm-uservice"
|
||||
@@ -36,22 +36,21 @@ metadata:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "meta-llama/CodeLlama-7b-hf"
|
||||
PORT: "2080"
|
||||
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
MAX_INPUT_TOKENS: "1024"
|
||||
MAX_TOTAL_TOKENS: "4096"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
---
|
||||
# Source: codegen/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -65,7 +64,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -90,7 +89,7 @@ metadata:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -115,7 +114,7 @@ metadata:
|
||||
helm.sh/chart: codegen-0.8.0
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -140,7 +139,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -180,14 +179,26 @@ spec:
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://codegen-tgi
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -206,7 +217,7 @@ metadata:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -240,6 +251,23 @@ spec:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
@@ -263,7 +291,7 @@ metadata:
|
||||
helm.sh/chart: codegen-0.8.0
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
@@ -11,15 +11,15 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://codegen-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-llm-uservice"
|
||||
@@ -36,23 +36,20 @@ metadata:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "meta-llama/CodeLlama-7b-hf"
|
||||
PORT: "2080"
|
||||
CUDA_GRAPHS: "0"
|
||||
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
MAX_INPUT_TOKENS: "1024"
|
||||
MAX_TOTAL_TOKENS: "4096"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
CUDA_GRAPHS: "0"
|
||||
---
|
||||
# Source: codegen/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -66,7 +63,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -91,7 +88,7 @@ metadata:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -116,7 +113,7 @@ metadata:
|
||||
helm.sh/chart: codegen-0.8.0
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -141,7 +138,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -181,14 +178,26 @@ spec:
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://codegen-tgi
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -207,7 +216,7 @@ metadata:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -241,6 +250,23 @@ spec:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -263,7 +289,7 @@ metadata:
|
||||
helm.sh/chart: codegen-0.8.0
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
@@ -11,7 +11,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://codetrans-tgi"
|
||||
@@ -41,10 +41,7 @@ metadata:
|
||||
data:
|
||||
MODEL_ID: "HuggingFaceH4/mistral-7b-grok"
|
||||
PORT: "2080"
|
||||
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
MAX_INPUT_TOKENS: "1024"
|
||||
MAX_TOTAL_TOKENS: "4096"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
@@ -52,6 +49,8 @@ data:
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
---
|
||||
# Source: codetrans/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -65,7 +64,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -115,7 +114,7 @@ metadata:
|
||||
helm.sh/chart: codetrans-0.8.0
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -140,7 +139,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -180,14 +179,26 @@ spec:
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://codetrans-tgi
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -240,6 +251,23 @@ spec:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
@@ -263,7 +291,7 @@ metadata:
|
||||
helm.sh/chart: codetrans-0.8.0
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
@@ -11,7 +11,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://codetrans-tgi"
|
||||
@@ -41,11 +41,7 @@ metadata:
|
||||
data:
|
||||
MODEL_ID: "HuggingFaceH4/mistral-7b-grok"
|
||||
PORT: "2080"
|
||||
CUDA_GRAPHS: "0"
|
||||
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
MAX_INPUT_TOKENS: "1024"
|
||||
MAX_TOTAL_TOKENS: "4096"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
@@ -53,6 +49,7 @@ data:
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
CUDA_GRAPHS: "0"
|
||||
---
|
||||
# Source: codetrans/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -66,7 +63,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -116,7 +113,7 @@ metadata:
|
||||
helm.sh/chart: codetrans-0.8.0
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -141,7 +138,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -181,14 +178,26 @@ spec:
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://codetrans-tgi
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -241,6 +250,23 @@ spec:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -263,7 +289,7 @@ metadata:
|
||||
helm.sh/chart: codetrans-0.8.0
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
@@ -11,7 +11,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://docsum-tgi"
|
||||
@@ -41,10 +41,7 @@ metadata:
|
||||
data:
|
||||
MODEL_ID: "Intel/neural-chat-7b-v3-3"
|
||||
PORT: "2080"
|
||||
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
MAX_INPUT_TOKENS: "1024"
|
||||
MAX_TOTAL_TOKENS: "4096"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
@@ -52,6 +49,8 @@ data:
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
---
|
||||
# Source: docsum/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -65,7 +64,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -115,7 +114,7 @@ metadata:
|
||||
helm.sh/chart: docsum-0.8.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -140,7 +139,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -180,14 +179,26 @@ spec:
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://docsum-tgi
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -240,6 +251,23 @@ spec:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
@@ -263,7 +291,7 @@ metadata:
|
||||
helm.sh/chart: docsum-0.8.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
@@ -11,7 +11,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://docsum-tgi"
|
||||
@@ -41,11 +41,7 @@ metadata:
|
||||
data:
|
||||
MODEL_ID: "Intel/neural-chat-7b-v3-3"
|
||||
PORT: "2080"
|
||||
CUDA_GRAPHS: "0"
|
||||
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
MAX_INPUT_TOKENS: "1024"
|
||||
MAX_TOTAL_TOKENS: "4096"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
@@ -53,6 +49,7 @@ data:
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
CUDA_GRAPHS: "0"
|
||||
---
|
||||
# Source: docsum/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
@@ -66,7 +63,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -116,7 +113,7 @@ metadata:
|
||||
helm.sh/chart: docsum-0.8.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
@@ -141,7 +138,7 @@ metadata:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
@@ -181,14 +178,26 @@ spec:
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://docsum-tgi
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -241,6 +250,23 @@ spec:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
@@ -263,7 +289,7 @@ metadata:
|
||||
helm.sh/chart: docsum-0.8.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/version: "v0.8"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
Reference in New Issue
Block a user