Add helm deployment instructions for GenAIExamples (#1373)
Add helm deployment instructions for ChatQnA, AgentQnA, AudioQnA, CodeTrans, DocSum, FaqGen and VisualQnA Signed-off-by: Dolpher Du <dolpher.du@intel.com>
This commit is contained in:
@@ -186,6 +186,10 @@ docker build -t opea/agent:latest --build-arg https_proxy=$https_proxy --build-a
|
||||
:::
|
||||
::::
|
||||
|
||||
## Deploy using Helm Chart
|
||||
|
||||
Refer to the [AgentQnA helm chart](./kubernetes/helm/README.md) for instructions on deploying AgentQnA on Kubernetes.
|
||||
|
||||
## Validate services
|
||||
|
||||
First look at logs of the agent docker containers:
|
||||
|
||||
11
AgentQnA/kubernetes/helm/README.md
Normal file
11
AgentQnA/kubernetes/helm/README.md
Normal file
@@ -0,0 +1,11 @@
|
||||
# Deploy AgentQnA on Kubernetes cluster
|
||||
|
||||
- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
|
||||
|
||||
## Deploy on Gaudi
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install agentqna oci://ghcr.io/opea-project/charts/agentqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
|
||||
```
|
||||
38
AgentQnA/kubernetes/helm/gaudi-values.yaml
Normal file
38
AgentQnA/kubernetes/helm/gaudi-values.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Accelerate inferencing in heaviest components to improve performance
|
||||
# by overriding their subchart values
|
||||
|
||||
tgi:
|
||||
enabled: true
|
||||
accelDevice: "gaudi"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/tgi-gaudi
|
||||
tag: "2.0.6"
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 4
|
||||
MAX_INPUT_LENGTH: "4096"
|
||||
MAX_TOTAL_TOKENS: "8192"
|
||||
CUDA_GRAPHS: ""
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
|
||||
ENABLE_HPU_GRAPH: "true"
|
||||
LIMIT_HPU_GRAPH: "true"
|
||||
USE_FLASH_ATTENTION: "true"
|
||||
FLASH_ATTENTION_RECOMPUTE: "true"
|
||||
extraCmdArgs: ["--sharded","true","--num-shard","4"]
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
startupProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
@@ -71,6 +71,10 @@ Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) for instr
|
||||
|
||||
Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for instructions on deploying AudioQnA on Xeon.
|
||||
|
||||
## Deploy using Helm Chart
|
||||
|
||||
Refer to the [AudioQnA helm chart](./kubernetes/helm/README.md) for instructions on deploying AudioQnA on Kubernetes.
|
||||
|
||||
## Supported Models
|
||||
|
||||
### ASR
|
||||
|
||||
18
AudioQnA/kubernetes/helm/README.md
Normal file
18
AudioQnA/kubernetes/helm/README.md
Normal file
@@ -0,0 +1,18 @@
|
||||
# Deploy AudioQnA on Kubernetes cluster
|
||||
|
||||
- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
|
||||
|
||||
## Deploy on Xeon
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install audioqna oci://ghcr.io/opea-project/charts/audioqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
|
||||
```
|
||||
|
||||
## Deploy on Gaudi
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install audioqna oci://ghcr.io/opea-project/charts/audioqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
|
||||
```
|
||||
5
AudioQnA/kubernetes/helm/cpu-values.yaml
Normal file
5
AudioQnA/kubernetes/helm/cpu-values.yaml
Normal file
@@ -0,0 +1,5 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
tgi:
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
43
AudioQnA/kubernetes/helm/gaudi-values.yaml
Normal file
43
AudioQnA/kubernetes/helm/gaudi-values.yaml
Normal file
@@ -0,0 +1,43 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
tgi:
|
||||
accelDevice: "gaudi"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/tgi-gaudi
|
||||
tag: "2.0.6"
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
CUDA_GRAPHS: ""
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
startupProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
|
||||
whisper:
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
|
||||
speecht5:
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
@@ -1,32 +0,0 @@
|
||||
# Deploy AudioQnA in a Kubernetes Cluster
|
||||
|
||||
> [NOTE]
|
||||
> The following values must be set before you can deploy:
|
||||
> HUGGINGFACEHUB_API_TOKEN
|
||||
> You can also customize the "MODEL_ID" and "model-volume"
|
||||
|
||||
## Deploy On Xeon
|
||||
```
|
||||
cd GenAIExamples/AudioQnA/kubernetes/intel/cpu/xeon/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
|
||||
kubectl apply -f audioqna.yaml
|
||||
```
|
||||
## Deploy On Gaudi
|
||||
```
|
||||
cd GenAIExamples/AudioQnA/kubernetes/intel/hpu/gaudi/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
|
||||
kubectl apply -f audioqna.yaml
|
||||
```
|
||||
|
||||
|
||||
## Verify Services
|
||||
|
||||
Make sure all the pods are running, and restart the audioqna-xxxx pod if necessary.
|
||||
|
||||
```bash
|
||||
kubectl get pods
|
||||
|
||||
curl http://${host_ip}:3008/v1/audioqna -X POST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json'
|
||||
```
|
||||
@@ -1,241 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: audio-qna-config
|
||||
namespace: default
|
||||
data:
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
|
||||
|
||||
WHISPER_SERVER_HOST_IP: whisper-svc
|
||||
WHISPER_SERVER_PORT: 7066
|
||||
SPEECHT5_SERVER_HOST_IP: speecht5-svc
|
||||
SPEECHT5_SERVER_PORT: 7055
|
||||
LLM_SERVER_HOST_IP: llm-svc
|
||||
LLM_SERVER_PORT: 3006
|
||||
|
||||
---
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: whisper-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: whisper-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: whisper-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: whisper-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/whisper:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: whisper-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7066
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: whisper-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: whisper-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7066
|
||||
targetPort: 7066
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: speecht5-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: speecht5-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: speecht5-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: speecht5-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/speecht5:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: speecht5-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7055
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: speecht5-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: speecht5-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7055
|
||||
targetPort: 7055
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '2048'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /home/sdp/cesg
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 3006
|
||||
targetPort: 80
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: audioqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: audioqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: audioqna-backend-server-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: audioqna-backend-server-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/audioqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: audioqna-backend-server-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: audioqna-backend-server-svc
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: audioqna-backend-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 3008
|
||||
targetPort: 8888
|
||||
nodePort: 30666
|
||||
@@ -1,293 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: audio-qna-config
|
||||
namespace: default
|
||||
data:
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
|
||||
|
||||
WHISPER_SERVER_HOST_IP: whisper-svc
|
||||
WHISPER_SERVER_PORT: 7066
|
||||
SPEECHT5_SERVER_HOST_IP: speecht5-svc
|
||||
SPEECHT5_SERVER_PORT: 7055
|
||||
LLM_SERVER_HOST_IP: llm-svc
|
||||
LLM_SERVER_PORT: 3006
|
||||
|
||||
---
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: whisper-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: whisper-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: whisper-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: whisper-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/whisper-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: whisper-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7066
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: whisper-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: whisper-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7066
|
||||
targetPort: 7066
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: speecht5-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: speecht5-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: speecht5-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: speecht5-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/speecht5-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: speecht5-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7055
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: speecht5-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: speecht5-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7055
|
||||
targetPort: 7055
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '2048'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: PREFILL_BATCH_BUCKET_SIZE
|
||||
value: "1"
|
||||
- name: BATCH_BUCKET_SIZE
|
||||
value: "8"
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 3006
|
||||
targetPort: 80
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: audioqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: audioqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: audioqna-backend-server-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: audioqna-backend-server-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/audioqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: audioqna-backend-server-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: audioqna-backend-server-svc
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: audioqna-backend-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 3008
|
||||
targetPort: 8888
|
||||
nodePort: 30666
|
||||
@@ -96,12 +96,12 @@ fi
|
||||
|
||||
case "$1" in
|
||||
install_AudioQnA)
|
||||
pushd AudioQnA/kubernetes/intel/hpu/gaudi/gmc
|
||||
pushd AudioQnA/kubernetes/gmc
|
||||
install_audioqa
|
||||
popd
|
||||
;;
|
||||
validate_AudioQnA)
|
||||
pushd AudioQnA/kubernetes/intel/hpu/gaudi/gmc
|
||||
pushd AudioQnA/kubernetes/gmc
|
||||
validate_audioqa
|
||||
popd
|
||||
;;
|
||||
|
||||
@@ -96,12 +96,12 @@ fi
|
||||
|
||||
case "$1" in
|
||||
install_AudioQnA)
|
||||
pushd AudioQnA/kubernetes/intel/cpu/xeon/gmc
|
||||
pushd AudioQnA/kubernetes/gmc
|
||||
install_audioqa
|
||||
popd
|
||||
;;
|
||||
validate_AudioQnA)
|
||||
pushd AudioQnA/kubernetes/intel/cpu/xeon/gmc
|
||||
pushd AudioQnA/kubernetes/gmc
|
||||
validate_audioqa
|
||||
popd
|
||||
;;
|
||||
|
||||
@@ -250,19 +250,9 @@ docker compose up -d
|
||||
|
||||
Refer to the [NVIDIA GPU Guide](./docker_compose/nvidia/gpu/README.md) for more instructions on building docker images from source.
|
||||
|
||||
### Deploy ChatQnA into Kubernetes on Xeon & Gaudi with GMC
|
||||
### Deploy ChatQnA on Kubernetes using Helm Chart
|
||||
|
||||
Refer to the [Kubernetes Guide](./kubernetes/intel/README_gmc.md) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi with GMC.
|
||||
|
||||
### Deploy ChatQnA into Kubernetes on Xeon & Gaudi without GMC
|
||||
|
||||
Refer to the [Kubernetes Guide](./kubernetes/intel/README.md) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi without GMC.
|
||||
|
||||
### Deploy ChatQnA into Kubernetes using Helm Chart
|
||||
|
||||
Install Helm (version >= 3.15) first. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
|
||||
Refer to the [ChatQnA helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna/README.md) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi.
|
||||
Refer to the [ChatQnA helm chart](./kubernetes/helm/README.md) for instructions on deploying ChatQnA on Kubernetes.
|
||||
|
||||
### Deploy ChatQnA on AI PC
|
||||
|
||||
|
||||
30
ChatQnA/kubernetes/helm/README.md
Normal file
30
ChatQnA/kubernetes/helm/README.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# Deploy ChatQnA on Kubernetes cluster
|
||||
|
||||
- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
|
||||
|
||||
## Deploy on Xeon
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install chatqna oci://ghcr.io/opea-project/charts/chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
|
||||
```
|
||||
|
||||
## Deploy on Gaudi
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install chatqna oci://ghcr.io/opea-project/charts/chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
|
||||
```
|
||||
|
||||
## Deploy variants of ChatQnA
|
||||
|
||||
ChatQnA is configurable and you can enable/disable features by providing values.yaml file.
|
||||
For example, to run with vllm instead of tgi on Gaudi hardware, use gaudi-vllm-values.yaml file:
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install chatqna oci://ghcr.io/opea-project/charts/chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-vllm-values.yaml
|
||||
```
|
||||
|
||||
See other *-values.yaml files in this directory for more reference.
|
||||
109
ChatQnA/kubernetes/helm/cpu-values.yaml
Normal file
109
ChatQnA/kubernetes/helm/cpu-values.yaml
Normal file
@@ -0,0 +1,109 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Override CPU resource request and probe timing values in specific subcharts
|
||||
#
|
||||
# RESOURCES
|
||||
#
|
||||
# Resource request matching actual resource usage (with enough slack)
|
||||
# is important when service is scaled up, so that right amount of pods
|
||||
# get scheduled to right nodes.
|
||||
#
|
||||
# Because resource usage depends on the used devices, model, data type
|
||||
# and SW versions, and this top-level chart has overrides for them,
|
||||
# resource requests need to be specified here too.
|
||||
#
|
||||
# To test service without resource request, use "resources: {}".
|
||||
#
|
||||
# PROBES
|
||||
#
|
||||
# Inferencing pods startup / warmup takes *much* longer on CPUs than
|
||||
# with acceleration devices, and their responses are also slower,
|
||||
# especially when node is running several instances of these services.
|
||||
#
|
||||
# Kubernetes restarting pod before its startup finishes, or not
|
||||
# sending it queries because it's not in ready state due to slow
|
||||
# readiness responses, does really NOT help in getting faster responses.
|
||||
#
|
||||
# => probe timings need to be increased when running on CPU.
|
||||
|
||||
tgi:
|
||||
# TODO: add Helm value also for TGI data type option:
|
||||
# https://github.com/opea-project/GenAIExamples/issues/330
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
|
||||
# Potentially suitable values for scaling CPU TGI 2.2 with Intel/neural-chat-7b-v3-3 @ 32-bit:
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 70Gi
|
||||
requests:
|
||||
cpu: 6
|
||||
memory: 65Gi
|
||||
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 8
|
||||
periodSeconds: 8
|
||||
failureThreshold: 24
|
||||
timeoutSeconds: 4
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 16
|
||||
periodSeconds: 8
|
||||
timeoutSeconds: 4
|
||||
startupProbe:
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 5
|
||||
failureThreshold: 180
|
||||
timeoutSeconds: 2
|
||||
|
||||
teirerank:
|
||||
RERANK_MODEL_ID: "BAAI/bge-reranker-base"
|
||||
|
||||
# Potentially suitable values for scaling CPU TEI v1.5 with BAAI/bge-reranker-base model:
|
||||
resources:
|
||||
limits:
|
||||
cpu: 4
|
||||
memory: 30Gi
|
||||
requests:
|
||||
cpu: 2
|
||||
memory: 25Gi
|
||||
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 8
|
||||
periodSeconds: 8
|
||||
failureThreshold: 24
|
||||
timeoutSeconds: 4
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 8
|
||||
periodSeconds: 8
|
||||
timeoutSeconds: 4
|
||||
startupProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
failureThreshold: 120
|
||||
|
||||
tei:
|
||||
EMBEDDING_MODEL_ID: "BAAI/bge-base-en-v1.5"
|
||||
|
||||
# Potentially suitable values for scaling CPU TEI 1.5 with BAAI/bge-base-en-v1.5 model:
|
||||
resources:
|
||||
limits:
|
||||
cpu: 4
|
||||
memory: 4Gi
|
||||
requests:
|
||||
cpu: 2
|
||||
memory: 3Gi
|
||||
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
failureThreshold: 24
|
||||
timeoutSeconds: 2
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 2
|
||||
startupProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
failureThreshold: 120
|
||||
76
ChatQnA/kubernetes/helm/gaudi-values.yaml
Normal file
76
ChatQnA/kubernetes/helm/gaudi-values.yaml
Normal file
@@ -0,0 +1,76 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Accelerate inferencing in heaviest components to improve performance
|
||||
# by overriding their subchart values
|
||||
|
||||
# TGI: largest bottleneck for ChatQnA
|
||||
tgi:
|
||||
accelDevice: "gaudi"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/tgi-gaudi
|
||||
tag: "2.0.6"
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
# higher limits are needed with extra input tokens added by rerank
|
||||
MAX_INPUT_LENGTH: "2048"
|
||||
MAX_TOTAL_TOKENS: "4096"
|
||||
CUDA_GRAPHS: ""
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||
ENABLE_HPU_GRAPH: "true"
|
||||
LIMIT_HPU_GRAPH: "true"
|
||||
USE_FLASH_ATTENTION: "true"
|
||||
FLASH_ATTENTION_RECOMPUTE: "true"
|
||||
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
startupProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
|
||||
# Reranking: second largest bottleneck when reranking is in use
|
||||
# (i.e. query context docs have been uploaded with data-prep)
|
||||
teirerank:
|
||||
accelDevice: "gaudi"
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: "512"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/tei-gaudi
|
||||
tag: 1.5.0
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
readOnlyRootFilesystem: false
|
||||
livenessProbe:
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
timeoutSeconds: 1
|
||||
|
||||
# Embedding: Second largest bottleneck without rerank
|
||||
# By default tei on gaudi is disabled.
|
||||
# tei:
|
||||
# accelDevice: "gaudi"
|
||||
# OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||
# MAX_WARMUP_SEQUENCE_LENGTH: "512"
|
||||
# image:
|
||||
# repository: ghcr.io/huggingface/tei-gaudi
|
||||
# tag: 1.5.0
|
||||
# resources:
|
||||
# limits:
|
||||
# habana.ai/gaudi: 1
|
||||
# securityContext:
|
||||
# readOnlyRootFilesystem: false
|
||||
# livenessProbe:
|
||||
# timeoutSeconds: 1
|
||||
# readinessProbe:
|
||||
# timeoutSeconds: 1
|
||||
62
ChatQnA/kubernetes/helm/gaudi-vllm-values.yaml
Normal file
62
ChatQnA/kubernetes/helm/gaudi-vllm-values.yaml
Normal file
@@ -0,0 +1,62 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Accelerate inferencing in heaviest components to improve performance
|
||||
# by overriding their subchart values
|
||||
|
||||
tgi:
|
||||
enabled: false
|
||||
|
||||
vllm:
|
||||
enabled: true
|
||||
accelDevice: "gaudi"
|
||||
image:
|
||||
repository: opea/vllm-gaudi
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
startupProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
|
||||
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||
|
||||
extraCmdArgs: [
|
||||
"--tensor-parallel-size", "1",
|
||||
"--block-size", "128",
|
||||
"--max-num-seqs", "256",
|
||||
"--max-seq_len-to-capture", "2048"
|
||||
]
|
||||
|
||||
|
||||
# Reranking: second largest bottleneck when reranking is in use
|
||||
# (i.e. query context docs have been uploaded with data-prep)
|
||||
#
|
||||
# TODO: could vLLM be used also for reranking / embedding?
|
||||
teirerank:
|
||||
accelDevice: "gaudi"
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: "512"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/tei-gaudi
|
||||
tag: 1.5.0
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
readOnlyRootFilesystem: false
|
||||
livenessProbe:
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
timeoutSeconds: 1
|
||||
108
ChatQnA/kubernetes/helm/guardrails-gaudi-values.yaml
Normal file
108
ChatQnA/kubernetes/helm/guardrails-gaudi-values.yaml
Normal file
@@ -0,0 +1,108 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
image:
|
||||
repository: opea/chatqna-guardrails
|
||||
|
||||
# guardrails related config
|
||||
guardrails-usvc:
|
||||
enabled: true
|
||||
# SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails"
|
||||
SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
|
||||
|
||||
# gaudi related config
|
||||
# tei running on CPU by default
|
||||
# tei:
|
||||
# accelDevice: "gaudi"
|
||||
# image:
|
||||
# repository: ghcr.io/huggingface/tei-gaudi
|
||||
# tag: 1.5.0
|
||||
# resources:
|
||||
# limits:
|
||||
# habana.ai/gaudi: 1
|
||||
# securityContext:
|
||||
# readOnlyRootFilesystem: false
|
||||
# livenessProbe:
|
||||
# timeoutSeconds: 1
|
||||
# readinessProbe:
|
||||
# timeoutSeconds: 1
|
||||
|
||||
teirerank:
|
||||
accelDevice: "gaudi"
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: "512"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/tei-gaudi
|
||||
tag: "1.5.0"
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
readOnlyRootFilesystem: false
|
||||
livenessProbe:
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
timeoutSeconds: 1
|
||||
|
||||
tgi:
|
||||
accelDevice: "gaudi"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/tgi-gaudi
|
||||
tag: "2.0.6"
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
# higher limits are needed with extra input tokens added by rerank
|
||||
MAX_INPUT_LENGTH: "2048"
|
||||
MAX_TOTAL_TOKENS: "4096"
|
||||
CUDA_GRAPHS: ""
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||
ENABLE_HPU_GRAPH: "true"
|
||||
LIMIT_HPU_GRAPH: "true"
|
||||
USE_FLASH_ATTENTION: "true"
|
||||
FLASH_ATTENTION_RECOMPUTE: "true"
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
startupProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
|
||||
tgi-guardrails:
|
||||
enabled: true
|
||||
accelDevice: "gaudi"
|
||||
LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/tgi-gaudi
|
||||
tag: "2.0.6"
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
CUDA_GRAPHS: ""
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||
ENABLE_HPU_GRAPH: "true"
|
||||
LIMIT_HPU_GRAPH: "true"
|
||||
USE_FLASH_ATTENTION: "true"
|
||||
FLASH_ATTENTION_RECOMPUTE: "true"
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
startupProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
14
ChatQnA/kubernetes/helm/guardrails-values.yaml
Normal file
14
ChatQnA/kubernetes/helm/guardrails-values.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
image:
|
||||
repository: opea/chatqna-guardrails
|
||||
|
||||
# guardrails related config
|
||||
guardrails-usvc:
|
||||
enabled: true
|
||||
# SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails"
|
||||
SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
|
||||
tgi-guardrails:
|
||||
enabled: true
|
||||
LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
|
||||
11
ChatQnA/kubernetes/helm/norerank-values.yaml
Normal file
11
ChatQnA/kubernetes/helm/norerank-values.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Accelerate inferencing in heaviest components to improve performance
|
||||
# by overriding their subchart values
|
||||
|
||||
image:
|
||||
repository: opea/chatqna-without-rerank
|
||||
|
||||
teirerank:
|
||||
enabled: false
|
||||
25
ChatQnA/kubernetes/helm/nv-values.yaml
Normal file
25
ChatQnA/kubernetes/helm/nv-values.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# To override values in subchart tgi
|
||||
tgi:
|
||||
accelDevice: "nvidia"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/text-generation-inference
|
||||
tag: "2.2.0"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: 1
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
startupProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
12
ChatQnA/kubernetes/helm/withwrapper-values.yaml
Normal file
12
ChatQnA/kubernetes/helm/withwrapper-values.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
image:
|
||||
repository: opea/chatqna-wrapper
|
||||
|
||||
llm-uservice:
|
||||
enabled: true
|
||||
embedding-usvc:
|
||||
enabled: true
|
||||
reranking-usvc:
|
||||
enabled: true
|
||||
@@ -1,99 +0,0 @@
|
||||
# Deploy ChatQnA in Kubernetes Cluster
|
||||
|
||||
> [NOTE]
|
||||
> The following values must be set before you can deploy:
|
||||
> HUGGINGFACEHUB_API_TOKEN
|
||||
>
|
||||
> You can also customize the "MODEL_ID" if needed.
|
||||
>
|
||||
> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the ChatQnA workload is running. Otherwise, you need to modify the `chatqna.yaml` file to change the `model-volume` to a directory that exists on the node.
|
||||
>
|
||||
> File upload size limit: The maximum size for uploaded files is 10GB.
|
||||
|
||||
## Deploy On Xeon
|
||||
|
||||
```
|
||||
cd GenAIExamples/ChatQnA/kubernetes/intel/cpu/xeon/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s|insert-your-huggingface-token-here|${HUGGINGFACEHUB_API_TOKEN}|g" chatqna.yaml
|
||||
kubectl apply -f chatqna.yaml
|
||||
```
|
||||
|
||||
Newer CPUs such as Intel Cooper Lake, Sapphire Rapids, support [`bfloat16` data type](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format). If you have such CPUs, and given model supports `bfloat16`, adding `--dtype bfloat16` argument for `huggingface/text-generation-inference` server halves its memory usage and speeds it a bit. To use it, run the following commands:
|
||||
|
||||
```
|
||||
# label your node for scheduling the service on it automatically
|
||||
kubectl label node 'your-node-name' node-type=node-bfloat16
|
||||
|
||||
# add `nodeSelector` for the `huggingface/text-generation-inference` server at `chatqna_bf16.yaml`
|
||||
# create
|
||||
kubectl apply -f chatqna_bf16.yaml
|
||||
```
|
||||
|
||||
## Deploy On Gaudi
|
||||
|
||||
```
|
||||
cd GenAIExamples/ChatQnA/kubernetes/intel/hpu/gaudi/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s|insert-your-huggingface-token-here|${HUGGINGFACEHUB_API_TOKEN}|g" chatqna.yaml
|
||||
kubectl apply -f chatqna.yaml
|
||||
```
|
||||
|
||||
## Deploy on Xeon with Remote LLM Model
|
||||
|
||||
```
|
||||
cd GenAIExamples/ChatQnA/kubernetes/intel/cpu/xeon/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
export vLLM_ENDPOINT="Your Remote Inference Endpoint"
|
||||
sed -i "s|insert-your-huggingface-token-here|${HUGGINGFACEHUB_API_TOKEN}|g" chatqna-remote-inference.yaml
|
||||
sed -i "s|insert-your-remote-inference-endpoint|${vLLM_ENDPOINT}|g" chatqna-remote-inference.yaml
|
||||
```
|
||||
|
||||
### Additional Steps for Remote Endpoints with Authentication (If No Authentication Skip This Step)
|
||||
|
||||
If your remote inference endpoint is protected with OAuth Client Credentials authentication, update CLIENTID, CLIENT_SECRET and TOKEN_URL with the correct values in "chatqna-llm-uservice-config" ConfigMap
|
||||
|
||||
|
||||
|
||||
### Deploy
|
||||
```
|
||||
kubectl apply -f chatqna-remote-inference.yaml
|
||||
```
|
||||
|
||||
## Deploy on Gaudi with TEI, Rerank, and vLLM Models Running Remotely
|
||||
|
||||
```
|
||||
cd GenAIExamples/ChatQnA/kubernetes/intel/hpu/gaudi/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
export vLLM_ENDPOINT="Your Remote Inference Endpoint"
|
||||
export TEI_EMBEDDING_ENDPOINT="Your Remote TEI Embedding Endpoint"
|
||||
export TEI_RERANKING_ENDPOINT="Your Remote Reranking Endpoint"
|
||||
|
||||
sed -i "s|insert-your-huggingface-token-here|${HUGGINGFACEHUB_API_TOKEN}|g" chatqna-vllm-remote-inference.yaml
|
||||
sed -i "s|insert-your-remote-vllm-inference-endpoint|${vLLM_ENDPOINT}|g" chatqna-vllm-remote-inference.yaml
|
||||
sed -i "s|insert-your-remote-embedding-endpoint|${TEI_EMBEDDING_ENDPOINT}|g" chatqna-vllm-remote-inference.yaml
|
||||
sed -i "s|insert-your-remote-reranking-endpoint|${TEI_RERANKING_ENDPOINT}|g" chatqna-vllm-remote-inference.yaml
|
||||
```
|
||||
|
||||
### Additional Steps for Remote Endpoints with Authentication (If No Authentication Skip This Step)
|
||||
|
||||
If your remote inference endpoint is protected with OAuth Client Credentials authentication, update CLIENTID, CLIENT_SECRET and TOKEN_URL with the correct values in "chatqna-llm-uservice-config", "chatqna-data-prep-config", "chatqna-embedding-usvc-config", "chatqna-reranking-usvc-config", "chatqna-retriever-usvc-config" ConfigMaps
|
||||
|
||||
### Deploy
|
||||
```
|
||||
kubectl apply -f chatqna-vllm-remote-inference.yaml
|
||||
```
|
||||
|
||||
## Verify Services
|
||||
|
||||
To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
|
||||
|
||||
Then run the command `kubectl port-forward svc/chatqna 8888:8888` to expose the ChatQnA service for access.
|
||||
|
||||
Open another terminal and run the following command to verify the service if working:
|
||||
|
||||
```console
|
||||
curl http://localhost:8888/v1/chatqna \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"messages": "What is the revenue of Nike in 2023?"}'
|
||||
```
|
||||
@@ -1,53 +0,0 @@
|
||||
# Deploy ChatQnA in Kubernetes Cluster on Single Node environment (Minikube)
|
||||
|
||||
The following instructions are to deploy the ChatQnA example on a single Node using Kubernetes for testing purposes.
|
||||
## Minikube setup
|
||||
1. Install [Minikube](https://minikube.sigs.k8s.io/docs/start/) following the quickstart guide
|
||||
2. Install [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/)
|
||||
3. Build the container images, following the steps under "Build Docker Images" section in the [docker-compose README](../../docker_compose/intel/cpu/xeon/README.md) to checkout [GenAIComps](https://github.com/opea-project/GenAIComps.git) and build other images with your changes for development.
|
||||
```bash
|
||||
# Example on building frontend Docker image
|
||||
cd GenAIExamples/ChatQnA/ui
|
||||
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
|
||||
# etc...
|
||||
```
|
||||
The built images should be visible in the local Docker registry. Other images which have not been built with your changes (or not present in your local Docker registry) will be pulled from [docker hub](https://hub.docker.com/u/opea) by Minikube later in step 6.
|
||||
```bash
|
||||
docker images | grep opea
|
||||
# REPOSITORY TAG IMAGE ID CREATED SIZE
|
||||
# opea/chatqna-ui latest 8f2fa2523b85 6 days ago 1.56GB
|
||||
# opea/chatqna latest 7f2602a7a266 6 days ago 821MB
|
||||
# ...
|
||||
```
|
||||
4. The built images must be imported into the Minikube registry from the local Docker registry. This can be done using `minikube load `image.
|
||||
```bash
|
||||
minikube image load opea/chatqna
|
||||
minikube image load opea/chatqna-ui
|
||||
# etc...
|
||||
```
|
||||
5. Start the minikube cluster with `minikube start`, check that the minikube container (kicbase) is up with `docker ps`
|
||||
```bash
|
||||
docker ps
|
||||
# CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
|
||||
# de088666cef2 gcr.io/k8s-minikube/kicbase:v0.0.45 "/usr/local/bin/entr…" 2 days ago Up 2 days 127.0.0.1:49157->22/tcp... minikube
|
||||
```
|
||||
6. Deploy the ChatQnA application with `kubectl apply -f chatqna.yaml`, check that the opea pods are in a running state with `kubectl get pods`
|
||||
```bash
|
||||
kubectl get pods
|
||||
# NAME READY STATUS RESTARTS AGE
|
||||
# chatqna-78b4f5865-qbzms 1/1 Running 0 2d3h
|
||||
# chatqna-chatqna-ui-54c8dfb6cf-fll5g 1/1 Running 0 2d3h
|
||||
# etc...
|
||||
```
|
||||
|
||||
7. Forward the port of the chatqna service from Minikube to the host, and test the service as you would a normal k8s cluster deployment
|
||||
```bash
|
||||
# port-forward to expose the chatqna endpoint from within the minikube cluster
|
||||
kubectl port-forward svc/chatqna 8888:8888
|
||||
curl http://localhost:8888/v1/chatqna \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"messages": "What is the revenue of Nike in 2023?"}'
|
||||
|
||||
# Similarly port-forward to expose the chatqna-ui endpoint and use the UI at <machine-external-ip>:5173 in your browser
|
||||
kubectl port-forward svc/chatqna-chatqna-ui 5173:5173
|
||||
```
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -197,12 +197,12 @@ fi
|
||||
|
||||
case "$1" in
|
||||
install_ChatQnA)
|
||||
pushd ChatQnA/kubernetes/intel/hpu/gaudi/gmc
|
||||
pushd ChatQnA/kubernetes/gmc
|
||||
install_chatqna
|
||||
popd
|
||||
;;
|
||||
validate_ChatQnA)
|
||||
pushd ChatQnA/kubernetes/intel/hpu/gaudi/gmc
|
||||
pushd ChatQnA/kubernetes/gmc
|
||||
validate_chatqna
|
||||
validate_chatqna_dataprep
|
||||
popd
|
||||
|
||||
@@ -199,12 +199,12 @@ fi
|
||||
|
||||
case "$1" in
|
||||
install_ChatQnA)
|
||||
pushd ChatQnA/kubernetes/intel/cpu/xeon/gmc
|
||||
pushd ChatQnA/kubernetes/gmc
|
||||
install_chatqna
|
||||
popd
|
||||
;;
|
||||
validate_ChatQnA)
|
||||
pushd ChatQnA/kubernetes/intel/cpu/xeon/gmc
|
||||
pushd ChatQnA/kubernetes/gmc
|
||||
validate_chatqna
|
||||
validate_chatqna_dataprep
|
||||
popd
|
||||
|
||||
@@ -145,9 +145,9 @@ docker compose up -d
|
||||
|
||||
Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for more instructions on building docker images from source.
|
||||
|
||||
### Deploy CodeGen into Kubernetes using Helm Chart
|
||||
### Deploy CodeGen on Kubernetes using Helm Chart
|
||||
|
||||
Refer to the [CodeGen helm chart](./kubernetes/helm/README.md) for instructions on deploying CodeGen into Kubernetes on Xeon & Gaudi.
|
||||
Refer to the [CodeGen helm chart](./kubernetes/helm/README.md) for instructions on deploying CodeGen on Kubernetes.
|
||||
|
||||
## Consume CodeGen Service
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Deploy CodeGen on kubernetes cluster
|
||||
# Deploy CodeGen on Kubernetes cluster
|
||||
|
||||
- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
image:
|
||||
repository: opea/codegen
|
||||
tag: "latest"
|
||||
|
||||
tgi:
|
||||
LLM_MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
|
||||
@@ -136,19 +136,9 @@ docker compose up -d
|
||||
|
||||
Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for more instructions on building docker images from source.
|
||||
|
||||
### Deploy using Kubernetes with GMC
|
||||
### Deploy CodeTrans on Kubernetes using Helm Chart
|
||||
|
||||
Refer to the [Code Translation Kubernetes Guide](./kubernetes/intel/README_gmc.md)
|
||||
|
||||
### Deploy using Kubernetes without GMC
|
||||
|
||||
Refer to the [Code Translation Kubernetes Guide](./kubernetes/intel/README.md)
|
||||
|
||||
### Deploy CodeTrans into Kubernetes using Helm Chart
|
||||
|
||||
Install Helm (version >= 3.15) first. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
|
||||
Refer to the [CodeTrans helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codetrans/README.md) for instructions on deploying CodeTrans into Kubernetes on Xeon & Gaudi.
|
||||
Refer to the [CodeTrans helm chart](./kubernetes/helm/README.md) for instructions on deploying CodeTrans on Kubernetes.
|
||||
|
||||
## Consume Code Translation Service
|
||||
|
||||
|
||||
18
CodeTrans/kubernetes/helm/README.md
Normal file
18
CodeTrans/kubernetes/helm/README.md
Normal file
@@ -0,0 +1,18 @@
|
||||
# Deploy CodeTrans on Kubernetes cluster
|
||||
|
||||
- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
|
||||
|
||||
## Deploy on Xeon
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install codetrans oci://ghcr.io/opea-project/charts/codetrans --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
|
||||
```
|
||||
|
||||
## Deploy on Gaudi
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install codetrans oci://ghcr.io/opea-project/charts/codetrans --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
|
||||
```
|
||||
5
CodeTrans/kubernetes/helm/cpu-values.yaml
Normal file
5
CodeTrans/kubernetes/helm/cpu-values.yaml
Normal file
@@ -0,0 +1,5 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
tgi:
|
||||
LLM_MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3
|
||||
27
CodeTrans/kubernetes/helm/gaudi-values.yaml
Normal file
27
CodeTrans/kubernetes/helm/gaudi-values.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
tgi:
|
||||
accelDevice: "gaudi"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/tgi-gaudi
|
||||
tag: "2.0.6"
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
CUDA_GRAPHS: ""
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
startupProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
@@ -1,59 +0,0 @@
|
||||
# Deploy CodeTrans in Kubernetes Cluster
|
||||
|
||||
> [NOTE]
|
||||
> The following values must be set before you can deploy:
|
||||
> HUGGINGFACEHUB_API_TOKEN
|
||||
>
|
||||
> You can also customize the "MODEL_ID" if needed.
|
||||
>
|
||||
> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the CodeTrans workload is running. Otherwise, you need to modify the `codetrans.yaml` file to change the `model-volume` to a directory that exists on the node.
|
||||
|
||||
## Required Models
|
||||
|
||||
By default, the LLM model is set to a default value as listed below:
|
||||
|
||||
|Service |Model |
|
||||
|---------|-------------------------|
|
||||
|LLM |mistralai/Mistral-7B-Instruct-v0.3|
|
||||
|
||||
Change the `MODEL_ID` in `codetrans.yaml` for your needs.
|
||||
|
||||
## Deploy On Xeon
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/CodeTrans/kubernetes/intel/cpu/xeon/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codetrans.yaml
|
||||
kubectl apply -f codetrans.yaml
|
||||
```
|
||||
|
||||
## Deploy On Gaudi
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/CodeTrans/kubernetes/intel/hpu/gaudi/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codetrans.yaml
|
||||
kubectl apply -f codetrans.yaml
|
||||
```
|
||||
|
||||
## Verify Services
|
||||
|
||||
To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
|
||||
|
||||
Then run the command `kubectl port-forward svc/codetrans 7777:7777` to expose the CodeTrans service for access.
|
||||
|
||||
Open another terminal and run the following command to verify the service if working:
|
||||
|
||||
```bash
|
||||
curl http://localhost:7777/v1/codetrans \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}"}'
|
||||
```
|
||||
|
||||
To consume the service using nginx, run the command below. The `${host_ip}` is the external ip of your server.
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:30789/v1/codetrans \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}"}'
|
||||
```
|
||||
@@ -1,543 +0,0 @@
|
||||
---
|
||||
# Source: codetrans/charts/codetrans-ui/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: codetrans-codetrans-ui-config
|
||||
labels:
|
||||
helm.sh/chart: codetrans-ui-1.0.0
|
||||
app.kubernetes.io/name: codetrans-ui
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
APP_BACKEND_SERVICE_ENDPOINT: "/v1/codetrans"
|
||||
APP_DATA_PREP_SERVICE_URL: "/v1/dataprep"
|
||||
CHAT_BASE_URL: "/v1/codetrans"
|
||||
UPLOAD_FILE_BASE_URL: "/v1/dataprep"
|
||||
GET_FILE: "/v1/dataprep/get_file"
|
||||
DELETE_FILE: "/v1/dataprep/delete_file"
|
||||
BASE_URL: "/v1/codetrans"
|
||||
DOC_BASE_URL: "/v1/codetrans"
|
||||
BASIC_URL: "/v1/codetrans"
|
||||
VITE_CODE_GEN_URL: "/v1/codetrans"
|
||||
VITE_DOC_SUM_URL: "/v1/codetrans"
|
||||
---
|
||||
# Source: codetrans/charts/llm-uservice/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: codetrans-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://codetrans-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: codetrans/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: codetrans-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "mistralai/Mistral-7B-Instruct-v0.3"
|
||||
PORT: "2080"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
CUDA_GRAPHS: "0"
|
||||
---
|
||||
# Source: codetrans/templates/nginx-deployment.yaml
|
||||
apiVersion: v1
|
||||
data:
|
||||
default.conf: |+
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
listen [::]:80;
|
||||
|
||||
location /home {
|
||||
alias /usr/share/nginx/html/index.html;
|
||||
}
|
||||
|
||||
location / {
|
||||
proxy_pass http://codetrans-codetrans-ui:5174;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
location /v1/codetrans {
|
||||
proxy_pass http://codetrans:7777;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
}
|
||||
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: codetrans-nginx-config
|
||||
---
|
||||
# Source: codetrans/charts/codetrans-ui/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codetrans-codetrans-ui
|
||||
labels:
|
||||
helm.sh/chart: codetrans-ui-1.0.0
|
||||
app.kubernetes.io/name: codetrans-ui
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 5174
|
||||
targetPort: ui
|
||||
protocol: TCP
|
||||
name: ui
|
||||
selector:
|
||||
app.kubernetes.io/name: codetrans-ui
|
||||
app.kubernetes.io/instance: codetrans
|
||||
---
|
||||
# Source: codetrans/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codetrans-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
---
|
||||
# Source: codetrans/charts/tgi/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codetrans-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 2080
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
---
|
||||
# Source: codetrans/templates/nginx-deployment.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codetrans-nginx
|
||||
spec:
|
||||
ports:
|
||||
- port: 80
|
||||
protocol: TCP
|
||||
targetPort: 80
|
||||
selector:
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app: codetrans-nginx
|
||||
type: NodePort
|
||||
---
|
||||
# Source: codetrans/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codetrans
|
||||
labels:
|
||||
helm.sh/chart: codetrans-1.0.0
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 7777
|
||||
targetPort: 7777
|
||||
protocol: TCP
|
||||
name: codetrans
|
||||
selector:
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app: codetrans
|
||||
---
|
||||
# Source: codetrans/charts/codetrans-ui/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codetrans-codetrans-ui
|
||||
labels:
|
||||
helm.sh/chart: codetrans-ui-1.0.0
|
||||
app.kubernetes.io/name: codetrans-ui
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: codetrans-ui
|
||||
app.kubernetes.io/instance: codetrans
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
helm.sh/chart: codetrans-ui-1.0.0
|
||||
app.kubernetes.io/name: codetrans-ui
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: codetrans-ui
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: codetrans-codetrans-ui-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "opea/codetrans-ui:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: ui
|
||||
containerPort: 5173
|
||||
protocol: TCP
|
||||
resources:
|
||||
{}
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: codetrans/charts/llm-uservice/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codetrans-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: codetrans
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: codetrans-llm-uservice-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-textgen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: codetrans/charts/tgi/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codetrans-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: tgi
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: codetrans-tgi-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
emptyDir: {}
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: codetrans/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codetrans
|
||||
labels:
|
||||
helm.sh/chart: codetrans-1.0.0
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app: codetrans
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app: codetrans
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app: codetrans
|
||||
spec:
|
||||
securityContext:
|
||||
null
|
||||
containers:
|
||||
- name: codetrans
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: codetrans-llm-uservice
|
||||
#- name: MEGA_SERVICE_PORT
|
||||
# value: 7777
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/codetrans:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: codetrans
|
||||
containerPort: 7777
|
||||
protocol: TCP
|
||||
resources:
|
||||
null
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: codetrans/templates/nginx-deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codetrans-nginx
|
||||
labels:
|
||||
helm.sh/chart: codetrans-1.0.0
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app: codetrans-nginx
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app: codetrans-nginx
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app: codetrans-nginx
|
||||
spec:
|
||||
containers:
|
||||
- image: nginx:1.27.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: nginx
|
||||
volumeMounts:
|
||||
- mountPath: /etc/nginx/conf.d
|
||||
name: nginx-config-volume
|
||||
securityContext: {}
|
||||
volumes:
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: codetrans-nginx-config
|
||||
name: nginx-config-volume
|
||||
@@ -1,545 +0,0 @@
|
||||
---
|
||||
# Source: codetrans/charts/codetrans-ui/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: codetrans-codetrans-ui-config
|
||||
labels:
|
||||
helm.sh/chart: codetrans-ui-1.0.0
|
||||
app.kubernetes.io/name: codetrans-ui
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
APP_BACKEND_SERVICE_ENDPOINT: "/v1/codetrans"
|
||||
APP_DATA_PREP_SERVICE_URL: "/v1/dataprep"
|
||||
CHAT_BASE_URL: "/v1/codetrans"
|
||||
UPLOAD_FILE_BASE_URL: "/v1/dataprep"
|
||||
GET_FILE: "/v1/dataprep/get_file"
|
||||
DELETE_FILE: "/v1/dataprep/delete_file"
|
||||
BASE_URL: "/v1/codetrans"
|
||||
DOC_BASE_URL: "/v1/codetrans"
|
||||
BASIC_URL: "/v1/codetrans"
|
||||
VITE_CODE_GEN_URL: "/v1/codetrans"
|
||||
VITE_DOC_SUM_URL: "/v1/codetrans"
|
||||
---
|
||||
# Source: codetrans/charts/llm-uservice/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: codetrans-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://codetrans-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: codetrans/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: codetrans-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "mistralai/Mistral-7B-Instruct-v0.3"
|
||||
PORT: "2080"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
---
|
||||
# Source: codetrans/templates/nginx-deployment.yaml
|
||||
apiVersion: v1
|
||||
data:
|
||||
default.conf: |+
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
listen [::]:80;
|
||||
|
||||
location /home {
|
||||
alias /usr/share/nginx/html/index.html;
|
||||
}
|
||||
|
||||
location / {
|
||||
proxy_pass http://codetrans-codetrans-ui:5174;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
location /v1/codetrans {
|
||||
proxy_pass http://codetrans:7777;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
}
|
||||
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: codetrans-nginx-config
|
||||
---
|
||||
# Source: codetrans/charts/codetrans-ui/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codetrans-codetrans-ui
|
||||
labels:
|
||||
helm.sh/chart: codetrans-ui-1.0.0
|
||||
app.kubernetes.io/name: codetrans-ui
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 5174
|
||||
targetPort: ui
|
||||
protocol: TCP
|
||||
name: ui
|
||||
selector:
|
||||
app.kubernetes.io/name: codetrans-ui
|
||||
app.kubernetes.io/instance: codetrans
|
||||
---
|
||||
# Source: codetrans/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codetrans-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
---
|
||||
# Source: codetrans/charts/tgi/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codetrans-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 2080
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
---
|
||||
# Source: codetrans/templates/nginx-deployment.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codetrans-nginx
|
||||
spec:
|
||||
ports:
|
||||
- port: 80
|
||||
protocol: TCP
|
||||
targetPort: 80
|
||||
selector:
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app: codetrans-nginx
|
||||
type: NodePort
|
||||
---
|
||||
# Source: codetrans/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codetrans
|
||||
labels:
|
||||
helm.sh/chart: codetrans-1.0.0
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 7777
|
||||
targetPort: 7777
|
||||
protocol: TCP
|
||||
name: codetrans
|
||||
selector:
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app: codetrans
|
||||
---
|
||||
# Source: codetrans/charts/codetrans-ui/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codetrans-codetrans-ui
|
||||
labels:
|
||||
helm.sh/chart: codetrans-ui-1.0.0
|
||||
app.kubernetes.io/name: codetrans-ui
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: codetrans-ui
|
||||
app.kubernetes.io/instance: codetrans
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
helm.sh/chart: codetrans-ui-1.0.0
|
||||
app.kubernetes.io/name: codetrans-ui
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: codetrans-ui
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: codetrans-codetrans-ui-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "opea/codetrans-ui:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: ui
|
||||
containerPort: 5173
|
||||
protocol: TCP
|
||||
resources:
|
||||
{}
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: codetrans/charts/llm-uservice/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codetrans-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codetrans
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: codetrans
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: codetrans-llm-uservice-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-textgen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: codetrans/charts/tgi/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codetrans-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codetrans
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: tgi
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: codetrans-tgi-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumes:
|
||||
- name: model-volume
|
||||
emptyDir: {}
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: codetrans/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codetrans
|
||||
labels:
|
||||
helm.sh/chart: codetrans-1.0.0
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app: codetrans
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app: codetrans
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app: codetrans
|
||||
spec:
|
||||
securityContext:
|
||||
null
|
||||
containers:
|
||||
- name: codetrans
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: codetrans-llm-uservice
|
||||
#- name: MEGA_SERVICE_PORT
|
||||
# value: 7777
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/codetrans:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: codetrans
|
||||
containerPort: 7777
|
||||
protocol: TCP
|
||||
resources:
|
||||
null
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: codetrans/templates/nginx-deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codetrans-nginx
|
||||
labels:
|
||||
helm.sh/chart: codetrans-1.0.0
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app: codetrans-nginx
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app: codetrans-nginx
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: codetrans
|
||||
app.kubernetes.io/instance: codetrans
|
||||
app: codetrans-nginx
|
||||
spec:
|
||||
containers:
|
||||
- image: nginx:1.27.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: nginx
|
||||
volumeMounts:
|
||||
- mountPath: /etc/nginx/conf.d
|
||||
name: nginx-config-volume
|
||||
securityContext: {}
|
||||
volumes:
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: codetrans-nginx-config
|
||||
name: nginx-config-volume
|
||||
@@ -111,12 +111,12 @@ fi
|
||||
|
||||
case "$1" in
|
||||
install_CodeTrans)
|
||||
pushd CodeTrans/kubernetes/intel/hpu/gaudi/gmc
|
||||
pushd CodeTrans/kubernetes/gmc
|
||||
install_codetrans
|
||||
popd
|
||||
;;
|
||||
validate_CodeTrans)
|
||||
pushd CodeTrans/kubernetes/intel/hpu/gaudi/gmc
|
||||
pushd CodeTrans/kubernetes/gmc
|
||||
validate_codetrans
|
||||
popd
|
||||
;;
|
||||
|
||||
@@ -111,12 +111,12 @@ fi
|
||||
|
||||
case "$1" in
|
||||
install_CodeTrans)
|
||||
pushd CodeTrans/kubernetes/intel/cpu/xeon/gmc
|
||||
pushd CodeTrans/kubernetes/gmc
|
||||
install_codetrans
|
||||
popd
|
||||
;;
|
||||
validate_CodeTrans)
|
||||
pushd CodeTrans/kubernetes/intel/cpu/xeon/gmc
|
||||
pushd CodeTrans/kubernetes/gmc
|
||||
validate_codetrans
|
||||
popd
|
||||
;;
|
||||
|
||||
@@ -72,19 +72,9 @@ docker compose -f compose.yaml up -d
|
||||
|
||||
Find the corresponding [compose.yaml](./docker_compose/intel/cpu/xeon/compose.yaml).
|
||||
|
||||
### Deploy using Kubernetes with GMC
|
||||
### Deploy DocSum on Kubernetes using Helm Chart
|
||||
|
||||
Refer to [Kubernetes deployment](./kubernetes/intel/README_gmc.md)
|
||||
|
||||
### Deploy using Kubernetes without GMC
|
||||
|
||||
Refer to [Kubernetes deployment](./kubernetes/intel/README.md)
|
||||
|
||||
### Deploy DocSum into Kubernetes using Helm Chart
|
||||
|
||||
Install Helm (version >= 3.15) first. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
|
||||
Refer to the [DocSum helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/docsum/README.md) for instructions on deploying DocSum into Kubernetes on Xeon & Gaudi.
|
||||
Refer to the [DocSum helm chart](./kubernetes/helm/README.md) for instructions on deploying DocSum on Kubernetes.
|
||||
|
||||
### Workflow of the deployed Document Summarization Service
|
||||
|
||||
|
||||
18
DocSum/kubernetes/helm/README.md
Normal file
18
DocSum/kubernetes/helm/README.md
Normal file
@@ -0,0 +1,18 @@
|
||||
# Deploy DocSum on Kubernetes cluster
|
||||
|
||||
- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
|
||||
|
||||
## Deploy on Xeon
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install docsum oci://ghcr.io/opea-project/charts/docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
|
||||
```
|
||||
|
||||
## Deploy on Gaudi
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install docsum oci://ghcr.io/opea-project/charts/docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
|
||||
```
|
||||
5
DocSum/kubernetes/helm/cpu-values.yaml
Normal file
5
DocSum/kubernetes/helm/cpu-values.yaml
Normal file
@@ -0,0 +1,5 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
tgi:
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
31
DocSum/kubernetes/helm/gaudi-values.yaml
Normal file
31
DocSum/kubernetes/helm/gaudi-values.yaml
Normal file
@@ -0,0 +1,31 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
tgi:
|
||||
accelDevice: "gaudi"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/tgi-gaudi
|
||||
tag: "2.0.6"
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
CUDA_GRAPHS: ""
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
startupProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
@@ -1,41 +0,0 @@
|
||||
# Deploy DocSum in Kubernetes Cluster
|
||||
|
||||
> [NOTE]
|
||||
> The following values must be set before you can deploy:
|
||||
> HUGGINGFACEHUB_API_TOKEN
|
||||
>
|
||||
> You can also customize the "MODEL_ID" and "model-volume"
|
||||
>
|
||||
> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the DocSum workload is running. Otherwise, you need to modify the `docsum.yaml` file to change the `model-volume` to a directory that exists on the node.
|
||||
|
||||
## Deploy On Xeon
|
||||
|
||||
```
|
||||
cd GenAIExamples/DocSum/kubernetes/intel/cpu/xeon/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" docsum.yaml
|
||||
kubectl apply -f docsum.yaml
|
||||
```
|
||||
|
||||
## Deploy On Gaudi
|
||||
|
||||
```
|
||||
cd GenAIExamples/DocSum/kubernetes/intel/hpu/gaudi/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" docsum.yaml
|
||||
kubectl apply -f docsum.yaml
|
||||
```
|
||||
|
||||
## Verify Services
|
||||
|
||||
To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
|
||||
|
||||
Then run the command `kubectl port-forward svc/docsum 8888:8888` to expose the DocSum service for access.
|
||||
|
||||
Open another terminal and run the following command to verify the service if working:
|
||||
|
||||
```console
|
||||
curl http://localhost:8888/v1/docsum \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
|
||||
```
|
||||
@@ -1,667 +0,0 @@
|
||||
---
|
||||
# Source: docsum/charts/docsum-ui/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: docsum-docsum-ui-config
|
||||
labels:
|
||||
helm.sh/chart: docsum-ui-1.0.0
|
||||
app.kubernetes.io/name: docsum-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
APP_BACKEND_SERVICE_ENDPOINT: "/v1/docsum"
|
||||
APP_DATA_PREP_SERVICE_URL: "/v1/dataprep"
|
||||
CHAT_BASE_URL: "/v1/docsum"
|
||||
UPLOAD_FILE_BASE_URL: "/v1/dataprep"
|
||||
GET_FILE: "/v1/dataprep/get_file"
|
||||
DELETE_FILE: "/v1/dataprep/delete_file"
|
||||
BASE_URL: "/v1/docsum"
|
||||
DOC_BASE_URL: "/v1/docsum"
|
||||
BASIC_URL: "/v1/docsum"
|
||||
VITE_CODE_GEN_URL: "/v1/docsum"
|
||||
VITE_DOC_SUM_URL: "/v1/docsum"
|
||||
---
|
||||
# Source: docsum/charts/llm-uservice/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: docsum-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://docsum-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: docsum/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: docsum-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "Intel/neural-chat-7b-v3-3"
|
||||
PORT: "2080"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
CUDA_GRAPHS: "0"
|
||||
---
|
||||
# Source: docsum/templates/nginx-deployment.yaml
|
||||
apiVersion: v1
|
||||
data:
|
||||
default.conf: |+
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
listen [::]:80;
|
||||
|
||||
location /home {
|
||||
alias /usr/share/nginx/html/index.html;
|
||||
}
|
||||
|
||||
location / {
|
||||
proxy_pass http://docsum-docsum-ui:5174;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
location /v1/docsum {
|
||||
proxy_pass http://docsum:8888;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
}
|
||||
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: docsum-nginx-config
|
||||
---
|
||||
# Source: docsum/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: docsum-whisper-config
|
||||
labels:
|
||||
helm.sh/chart: whisper-1.0.0
|
||||
app.kubernetes.io/name: whisper
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
---
|
||||
# Source: docsum/charts/tgi/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum-whisper
|
||||
labels:
|
||||
helm.sh/chart: whisper-1.0.0
|
||||
app.kubernetes.io/name: whisper
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 7066
|
||||
targetPort: 7066
|
||||
protocol: TCP
|
||||
name: whisper
|
||||
selector:
|
||||
app.kubernetes.io/name: whisper
|
||||
app.kubernetes.io/instance: docsum
|
||||
---
|
||||
# Source: docsum/charts/docsum-ui/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum-docsum-ui
|
||||
labels:
|
||||
helm.sh/chart: docsum-ui-1.0.0
|
||||
app.kubernetes.io/name: docsum-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 5174
|
||||
targetPort: ui
|
||||
protocol: TCP
|
||||
name: ui
|
||||
selector:
|
||||
app.kubernetes.io/name: docsum-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
---
|
||||
# Source: docsum/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
---
|
||||
# Source: docsum/charts/tgi/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 2080
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
---
|
||||
# Source: docsum/templates/nginx-deployment.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum-nginx
|
||||
spec:
|
||||
ports:
|
||||
- port: 80
|
||||
protocol: TCP
|
||||
targetPort: 80
|
||||
selector:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app: docsum-nginx
|
||||
type: NodePort
|
||||
---
|
||||
# Source: docsum/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum
|
||||
labels:
|
||||
helm.sh/chart: docsum-1.0.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 8888
|
||||
targetPort: 8888
|
||||
protocol: TCP
|
||||
name: docsum
|
||||
selector:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app: docsum
|
||||
---
|
||||
# Source: docsum/charts/docsum-ui/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum-docsum-ui
|
||||
labels:
|
||||
helm.sh/chart: docsum-ui-1.0.0
|
||||
app.kubernetes.io/name: docsum-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: docsum-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
helm.sh/chart: docsum-ui-1.0.0
|
||||
app.kubernetes.io/name: docsum-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: docsum-ui
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: docsum-docsum-ui-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "opea/docsum-ui:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: ui
|
||||
containerPort: 5173
|
||||
protocol: TCP
|
||||
resources:
|
||||
{}
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: docsum/charts/llm-uservice/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: docsum
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: docsum-llm-uservice-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-docsum-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: docsum/charts/tgi/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: tgi
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: docsum-tgi-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
emptyDir: {}
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: docsum/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum
|
||||
labels:
|
||||
helm.sh/chart: docsum-1.0.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app: docsum
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app: docsum
|
||||
spec:
|
||||
securityContext:
|
||||
null
|
||||
containers:
|
||||
- name: docsum
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: docsum-llm-uservice
|
||||
- name: DATA_SERVICE_HOST_IP
|
||||
value: docsum-multimedia2text
|
||||
#- name: MEGA_SERVICE_PORT
|
||||
# value: 8888
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/docsum:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: docsum
|
||||
containerPort: 8888
|
||||
protocol: TCP
|
||||
resources:
|
||||
null
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: docsum/templates/nginx-deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum-nginx
|
||||
labels:
|
||||
helm.sh/chart: docsum-1.0.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app: docsum-nginx
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app: docsum-nginx
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app: docsum-nginx
|
||||
spec:
|
||||
containers:
|
||||
- image: nginx:1.27.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: nginx
|
||||
volumeMounts:
|
||||
- mountPath: /etc/nginx/conf.d
|
||||
name: nginx-config-volume
|
||||
securityContext: {}
|
||||
volumes:
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: docsum-nginx-config
|
||||
name: nginx-config-volume
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum-whisper
|
||||
labels:
|
||||
helm.sh/chart: whisper-1.0.0
|
||||
app.kubernetes.io/name: whisper
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: whisper
|
||||
app.kubernetes.io/instance: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: whisper
|
||||
app.kubernetes.io/instance: docsum
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: whisper
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: docsum-whisper-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/whisper:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 7066
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
@@ -1,37 +0,0 @@
|
||||
# Deploy DocSum with ReactUI
|
||||
|
||||
The README provides a step-by-step guide on how to deploy DocSum with ReactUI, a popular React-based user interface library in Kubernetes cluster.
|
||||
|
||||
You can use react-docsum.yaml to deploy Docsum with reactUI.
|
||||
```
|
||||
kubectl apply -f react-docsum.yaml
|
||||
```
|
||||
|
||||
## Prerequisites for Deploying DocSum with ReactUI
|
||||
Before deploying the react-docsum.yaml file, ensure that you have the following prerequisites in place:
|
||||
|
||||
1. Kubernetes installation: Make sure that you have Kubernetes installed.
|
||||
2. Configuration Values: Set the following values in react-docsum.yaml before proceeding with the deployment:
|
||||
a. HUGGINGFACEHUB_API_TOKEN (Your HuggingFace token to download your desired model from HuggingFace):
|
||||
```
|
||||
# You may set the HUGGINGFACEHUB_API_TOKEN via method:
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
cd GenAIExamples/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" react-docsum.yaml
|
||||
```
|
||||
b. Set the proxies based on your network configuration
|
||||
```
|
||||
# Look for http_proxy, https_proxy, no_proxy key and fill up the value with your proxy configuration.
|
||||
```
|
||||
3. MODEL_ID and model-volume (OPTIONAL): You may as well customize the "MODEL_ID" to use different model and model-volume for the volume to be mounted.
|
||||
4. After completing these, you can proceed with the deployment of the react-docsum.yaml file.
|
||||
|
||||
## Verify Services
|
||||
Make sure all the pods are running, you should see total of 4 pods running:
|
||||
|
||||
- docsum
|
||||
- docsum-llm-uservice
|
||||
- docsum-react-ui
|
||||
- docsum-tgi
|
||||
|
||||
You may open up the UI by using the docsum-react-ui endpoint in the browser.
|
||||
@@ -1,283 +0,0 @@
|
||||
---
|
||||
# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.1.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.1.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum
|
||||
labels:
|
||||
helm.sh/chart: docsum-0.1.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 8888
|
||||
targetPort: 8888
|
||||
protocol: TCP
|
||||
name: docsum
|
||||
selector:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum-react-ui
|
||||
labels:
|
||||
helm.sh/chart: docsum-react-ui-0.1.0
|
||||
app.kubernetes.io/name: react-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
||||
name: react-ui
|
||||
selector:
|
||||
app.kubernetes.io/name: react-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.1.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
spec:
|
||||
securityContext: {}
|
||||
containers:
|
||||
- name: tgi
|
||||
env:
|
||||
- name: MODEL_ID
|
||||
value: Intel/neural-chat-7b-v3-3
|
||||
- name: PORT
|
||||
value: "80"
|
||||
- name: CUDA_GRAPHS
|
||||
value: "0"
|
||||
- name: http_proxy
|
||||
value:
|
||||
- name: https_proxy
|
||||
value:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: {}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 80
|
||||
protocol: TCP
|
||||
resources: {}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt
|
||||
type: Directory
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.1.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
spec:
|
||||
securityContext: {}
|
||||
containers:
|
||||
- name: docsum
|
||||
env:
|
||||
- name: TGI_LLM_ENDPOINT
|
||||
value: "http://docsum-tgi:80"
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
value: "insert-your-huggingface-token-here"
|
||||
- name: http_proxy
|
||||
value:
|
||||
- name: https_proxy
|
||||
value:
|
||||
- name: no_proxy
|
||||
value:
|
||||
|
||||
securityContext: {}
|
||||
image: "opea/llm-docsum-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://docsum-tgi:80
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
failureThreshold: 120
|
||||
resources: {}
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum
|
||||
labels:
|
||||
helm.sh/chart: docsum-0.1.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
spec:
|
||||
securityContext: null
|
||||
containers:
|
||||
- name: docsum
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: docsum-llm-uservice
|
||||
- name: http_proxy
|
||||
value:
|
||||
- name: https_proxy
|
||||
value:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: null
|
||||
image: "opea/docsum:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: docsum
|
||||
containerPort: 8888
|
||||
protocol: TCP
|
||||
resources: null
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum-react-ui
|
||||
labels:
|
||||
helm.sh/chart: docsum-react-ui-0.1.0
|
||||
app.kubernetes.io/name: react-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: react-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: react-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
spec:
|
||||
securityContext: null
|
||||
containers:
|
||||
- name: docsum-react-ui
|
||||
env:
|
||||
- name: DOC_BASE_URL
|
||||
value: "http://docsum:8888/v1/docsum"
|
||||
- name: http_proxy
|
||||
value:
|
||||
- name: https_proxy
|
||||
value:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: null
|
||||
image: "opea/docsum-react-ui:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: react-ui
|
||||
containerPort: 80
|
||||
protocol: TCP
|
||||
resources: null
|
||||
@@ -1,671 +0,0 @@
|
||||
---
|
||||
# Source: docsum/charts/docsum-ui/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: docsum-docsum-ui-config
|
||||
labels:
|
||||
helm.sh/chart: docsum-ui-1.0.0
|
||||
app.kubernetes.io/name: docsum-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
APP_BACKEND_SERVICE_ENDPOINT: "/v1/docsum"
|
||||
APP_DATA_PREP_SERVICE_URL: "/v1/dataprep"
|
||||
CHAT_BASE_URL: "/v1/docsum"
|
||||
UPLOAD_FILE_BASE_URL: "/v1/dataprep"
|
||||
GET_FILE: "/v1/dataprep/get_file"
|
||||
DELETE_FILE: "/v1/dataprep/delete_file"
|
||||
BASE_URL: "/v1/docsum"
|
||||
DOC_BASE_URL: "/v1/docsum"
|
||||
BASIC_URL: "/v1/docsum"
|
||||
VITE_CODE_GEN_URL: "/v1/docsum"
|
||||
VITE_DOC_SUM_URL: "/v1/docsum"
|
||||
---
|
||||
# Source: docsum/charts/llm-uservice/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: docsum-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://docsum-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: docsum/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: docsum-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "Intel/neural-chat-7b-v3-3"
|
||||
PORT: "2080"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
---
|
||||
# Source: docsum/templates/nginx-deployment.yaml
|
||||
apiVersion: v1
|
||||
data:
|
||||
default.conf: |+
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
listen [::]:80;
|
||||
|
||||
location /home {
|
||||
alias /usr/share/nginx/html/index.html;
|
||||
}
|
||||
|
||||
location / {
|
||||
proxy_pass http://docsum-docsum-ui:5174;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
location /v1/docsum {
|
||||
proxy_pass http://docsum:8888;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
}
|
||||
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: docsum-nginx-config
|
||||
---
|
||||
# Source: docsum/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: docsum-whisper-config
|
||||
labels:
|
||||
helm.sh/chart: whisper-1.0.0
|
||||
app.kubernetes.io/name: whisper
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
---
|
||||
# Source: docsum/charts/tgi/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum-whisper
|
||||
labels:
|
||||
helm.sh/chart: whisper-1.0.0
|
||||
app.kubernetes.io/name: whisper
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 7066
|
||||
targetPort: 7066
|
||||
protocol: TCP
|
||||
name: whisper
|
||||
selector:
|
||||
app.kubernetes.io/name: whisper
|
||||
app.kubernetes.io/instance: docsum
|
||||
---
|
||||
# Source: docsum/charts/docsum-ui/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum-docsum-ui
|
||||
labels:
|
||||
helm.sh/chart: docsum-ui-1.0.0
|
||||
app.kubernetes.io/name: docsum-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 5174
|
||||
targetPort: ui
|
||||
protocol: TCP
|
||||
name: ui
|
||||
selector:
|
||||
app.kubernetes.io/name: docsum-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
---
|
||||
# Source: docsum/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
---
|
||||
# Source: docsum/charts/tgi/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 2080
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
---
|
||||
# Source: docsum/templates/nginx-deployment.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum-nginx
|
||||
spec:
|
||||
ports:
|
||||
- port: 80
|
||||
protocol: TCP
|
||||
targetPort: 80
|
||||
selector:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app: docsum-nginx
|
||||
type: NodePort
|
||||
---
|
||||
# Source: docsum/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum
|
||||
labels:
|
||||
helm.sh/chart: docsum-1.0.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 8888
|
||||
targetPort: 8888
|
||||
protocol: TCP
|
||||
name: docsum
|
||||
selector:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app: docsum
|
||||
---
|
||||
# Source: docsum/charts/docsum-ui/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum-docsum-ui
|
||||
labels:
|
||||
helm.sh/chart: docsum-ui-1.0.0
|
||||
app.kubernetes.io/name: docsum-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: docsum-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
helm.sh/chart: docsum-ui-1.0.0
|
||||
app.kubernetes.io/name: docsum-ui
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: docsum-ui
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: docsum-docsum-ui-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "opea/docsum-ui:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: ui
|
||||
containerPort: 5173
|
||||
protocol: TCP
|
||||
resources:
|
||||
{}
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: docsum/charts/llm-uservice/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: docsum
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: docsum-llm-uservice-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-docsum-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: docsum/charts/tgi/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-1.0.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: tgi
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: docsum-tgi-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumes:
|
||||
- name: model-volume
|
||||
emptyDir: {}
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: docsum/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum
|
||||
labels:
|
||||
helm.sh/chart: docsum-1.0.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app: docsum
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app: docsum
|
||||
spec:
|
||||
securityContext:
|
||||
null
|
||||
containers:
|
||||
- name: docsum
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: docsum-llm-uservice
|
||||
- name: DATA_SERVICE_HOST_IP
|
||||
value: docsum-multimedia2text
|
||||
#- name: MEGA_SERVICE_PORT
|
||||
# value: 8888
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/docsum:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: docsum
|
||||
containerPort: 8888
|
||||
protocol: TCP
|
||||
resources:
|
||||
null
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: docsum/templates/nginx-deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum-nginx
|
||||
labels:
|
||||
helm.sh/chart: docsum-1.0.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app: docsum-nginx
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app: docsum-nginx
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app: docsum-nginx
|
||||
spec:
|
||||
containers:
|
||||
- image: nginx:1.27.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: nginx
|
||||
volumeMounts:
|
||||
- mountPath: /etc/nginx/conf.d
|
||||
name: nginx-config-volume
|
||||
securityContext: {}
|
||||
volumes:
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: docsum-nginx-config
|
||||
name: nginx-config-volume
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum-whisper
|
||||
labels:
|
||||
helm.sh/chart: whisper-1.0.0
|
||||
app.kubernetes.io/name: whisper
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: whisper
|
||||
app.kubernetes.io/instance: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: whisper
|
||||
app.kubernetes.io/instance: docsum
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: whisper
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: docsum-whisper-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/whisper:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 7066
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
@@ -112,12 +112,12 @@ fi
|
||||
|
||||
case "$1" in
|
||||
install_DocSum)
|
||||
pushd DocSum/kubernetes/intel/cpu/xeon/gmc
|
||||
pushd DocSum/kubernetes/gmc
|
||||
install_docsum
|
||||
popd
|
||||
;;
|
||||
validate_DocSum)
|
||||
pushd DocSum/kubernetes/intel/cpu/xeon/gmc
|
||||
pushd DocSum/kubernetes/gmc
|
||||
validate_docsum
|
||||
popd
|
||||
;;
|
||||
|
||||
@@ -64,3 +64,7 @@ Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) for instr
|
||||
### Deploy FAQ Generation on Xeon
|
||||
|
||||
Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for instructions on deploying FAQ Generation on Xeon.
|
||||
|
||||
### Deploy FaqGen on Kubernetes using Helm Chart
|
||||
|
||||
Refer to the [FaqGen helm chart](./kubernetes/helm/README.md) for instructions on deploying FaqGen on Kubernetes.
|
||||
|
||||
18
FaqGen/kubernetes/helm/README.md
Normal file
18
FaqGen/kubernetes/helm/README.md
Normal file
@@ -0,0 +1,18 @@
|
||||
# Deploy FaqGen on Kubernetes cluster
|
||||
|
||||
- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
|
||||
|
||||
## Deploy on Xeon
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install faqgen oci://ghcr.io/opea-project/charts/faqgen --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
|
||||
```
|
||||
|
||||
## Deploy on Gaudi
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install faqgen oci://ghcr.io/opea-project/charts/faqgen --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
|
||||
```
|
||||
5
FaqGen/kubernetes/helm/cpu-values.yaml
Normal file
5
FaqGen/kubernetes/helm/cpu-values.yaml
Normal file
@@ -0,0 +1,5 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
tgi:
|
||||
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
27
FaqGen/kubernetes/helm/gaudi-values.yaml
Normal file
27
FaqGen/kubernetes/helm/gaudi-values.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
tgi:
|
||||
accelDevice: "gaudi"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/tgi-gaudi
|
||||
tag: "2.0.6"
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
MAX_INPUT_LENGTH: "4096"
|
||||
MAX_TOTAL_TOKENS: "8192"
|
||||
CUDA_GRAPHS: "0"
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
startupProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
failureThreshold: 120
|
||||
@@ -1,55 +0,0 @@
|
||||
# Deploy FaqGen in Kubernetes Cluster
|
||||
|
||||
> [NOTE]
|
||||
> The following values must be set before you can deploy:
|
||||
> HUGGINGFACEHUB_API_TOKEN
|
||||
> You can also customize the "MODEL_ID" and "model-volume".
|
||||
|
||||
## Required Models
|
||||
We set "meta-llama/Meta-Llama-3-8B-Instruct" as default model, if you want to use other models, change arguments "--model-id" in `xeon/faqgen.yaml` or `gaudi/faqgen.yaml`.
|
||||
```
|
||||
- --model-id
|
||||
- 'meta-llama/Meta-Llama-3-8B-Instruct'
|
||||
```
|
||||
|
||||
If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.
|
||||
|
||||
## Deploy On Xeon
|
||||
|
||||
```
|
||||
cd GenAIExamples/FaqGen/kubernetes/intel/cpu/xeon/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" faqgen.yaml
|
||||
kubectl apply -f faqgen.yaml
|
||||
```
|
||||
|
||||
## Deploy On Gaudi
|
||||
|
||||
```
|
||||
cd GenAIExamples/FaqGen/kubernetes/intel/hpu/gaudi/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" faqgen.yaml
|
||||
kubectl apply -f faqgen.yaml
|
||||
```
|
||||
|
||||
## Deploy UI
|
||||
|
||||
```
|
||||
cd GenAIExamples/FaqGen/kubernetes/manifests/
|
||||
kubectl get svc # get ip address
|
||||
ip_address="" # according to your svc address
|
||||
sed -i "s/insert_your_ip_here/${ip_address}/g" ui.yaml
|
||||
kubectl apply -f ui.yaml
|
||||
```
|
||||
|
||||
## Verify Services
|
||||
|
||||
Make sure all the pods are running, and restart the faqgen-xxxx pod if necessary.
|
||||
|
||||
```
|
||||
kubectl get pods
|
||||
port=7779 # 7779 for gaudi, 7778 for xeon
|
||||
curl http://${host_ip}:7779/v1/faqgen -H "Content-Type: application/json" -d '{
|
||||
"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
|
||||
}'
|
||||
```
|
||||
@@ -1,36 +0,0 @@
|
||||
# Deploy FaqGen with ReactUI
|
||||
|
||||
The README provides a step-by-step guide on how to deploy FaqGen with ReactUI, a popular React-based user interface library in Kubernetes cluster.
|
||||
|
||||
You can use react-faqgen.yaml to deploy FaqGen with reactUI.
|
||||
```
|
||||
kubectl apply -f react-faqgen.yaml
|
||||
```
|
||||
|
||||
## Prerequisites for Deploying FaqGen with ReactUI
|
||||
Before deploying the react-faqgen.yaml file, ensure that you have the following prerequisites in place:
|
||||
|
||||
1. Kubernetes installation: Make sure that you have Kubernetes installed.
|
||||
2. Configuration Values: Set the following values in react-faqgen.yaml before proceeding with the deployment:
|
||||
a. HUGGINGFACEHUB_API_TOKEN (Your HuggingFace token to download your desired model from HuggingFace):
|
||||
```
|
||||
# You may set the HUGGINGFACEHUB_API_TOKEN via method:
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
cd GenAIExamples/FaqGen/kubernetes/intel/cpu/xeon/manifest/ui/
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" react-faqgen.yaml
|
||||
```
|
||||
b. Set the proxies based on your network configuration
|
||||
```
|
||||
# Look for http_proxy, https_proxy, no_proxy key and fill up the value with your proxy configuration.
|
||||
```
|
||||
3. MODEL_ID and model-volume (OPTIONAL): You may as well customize the "MODEL_ID" to use different model and model-volume for the volume to be mounted.
|
||||
4. After completing these, you can proceed with the deployment of the react-faqgen.yaml file.
|
||||
|
||||
## Verify Services
|
||||
Make sure all the pods are running, you should see total of 4 pods running:
|
||||
1. faqgen
|
||||
2. faqgen-llm-uservice
|
||||
3. faqgen-react-ui
|
||||
4. faqgen-tgi
|
||||
|
||||
You may open up the UI by using the faqgen-react-ui endpoint in the browser.
|
||||
@@ -1,167 +0,0 @@
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faq-tgi-cpu-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: faq-tgi-cpu-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: faq-tgi-cpu-deploy
|
||||
spec:
|
||||
hostIPC: true
|
||||
securityContext: {}
|
||||
containers:
|
||||
- name: faq-tgi-cpu-deploy-demo
|
||||
env:
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: "insert-your-huggingface-token-here"
|
||||
- name: PORT
|
||||
value: "80"
|
||||
image: ghcr.io/huggingface/text-generation-inference:1.4
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext: {}
|
||||
args:
|
||||
- --model-id
|
||||
- 'meta-llama/Meta-Llama-3-8B-Instruct'
|
||||
- --cuda_graphs
|
||||
- '0'
|
||||
- --max-input-length
|
||||
- '3096'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /home/sdp/cesg
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: faq-tgi-cpu-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: faq-tgi-cpu-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8011
|
||||
targetPort: 80
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faq-micro-cpu-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: faq-micro-cpu-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: faq-micro-cpu-deploy
|
||||
spec:
|
||||
hostIPC: true
|
||||
containers:
|
||||
- name: faq-micro-cpu-deploy
|
||||
env:
|
||||
- name: TGI_LLM_ENDPOINT
|
||||
value: "http://faq-tgi-cpu-svc.default.svc.cluster.local:8011"
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
value: "insert-your-huggingface-token-here"
|
||||
image: opea/llm-faqgen-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: faq-micro-cpu-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: faq-micro-cpu-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9004
|
||||
targetPort: 9000
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faq-mega-server-cpu-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: faq-mega-server-cpu-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: faq-mega-server-cpu-deploy
|
||||
spec:
|
||||
hostIPC: true
|
||||
containers:
|
||||
- name: faq-mega-server-cpu-deploy
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: faq-micro-cpu-svc
|
||||
- name: LLM_SERVICE_PORT
|
||||
value: "9004"
|
||||
- name: MEGA_SERVICE_HOST_IP
|
||||
value: faq-mega-server-cpu-svc
|
||||
- name: MEGA_SERVICE_PORT
|
||||
value: "7777"
|
||||
image: opea/faqgen:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7777
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: faq-mega-server-cpu-svc
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: faq-mega-server-cpu-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7778
|
||||
targetPort: 7777
|
||||
nodePort: 30778
|
||||
@@ -1,282 +0,0 @@
|
||||
---
|
||||
# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: faqgen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.1.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: faqgen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.1.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: faqgen
|
||||
labels:
|
||||
helm.sh/chart: faqgen-0.1.0
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 8888
|
||||
targetPort: 8888
|
||||
protocol: TCP
|
||||
name: faqgen
|
||||
selector:
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: faqgen-react-ui
|
||||
labels:
|
||||
helm.sh/chart: faqgen-react-ui-0.1.0
|
||||
app.kubernetes.io/name: react-ui
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
||||
name: react-ui
|
||||
selector:
|
||||
app.kubernetes.io/name: react-ui
|
||||
app.kubernetes.io/instance: faqgen
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faqgen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.1.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
spec:
|
||||
securityContext: {}
|
||||
containers:
|
||||
- name: tgi
|
||||
env:
|
||||
- name: MODEL_ID
|
||||
value: Intel/neural-chat-7b-v3-3
|
||||
- name: PORT
|
||||
value: "80"
|
||||
- name: CUDA_GRAPHS
|
||||
value: "0"
|
||||
- name: http_proxy
|
||||
value:
|
||||
- name: https_proxy
|
||||
value:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: {}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 80
|
||||
protocol: TCP
|
||||
resources: {}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt
|
||||
type: Directory
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faqgen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.1.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
spec:
|
||||
securityContext: {}
|
||||
containers:
|
||||
- name: faqgen
|
||||
env:
|
||||
- name: TGI_LLM_ENDPOINT
|
||||
value: "http://faqgen-tgi:80"
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
value: "insert-your-huggingface-token-here"
|
||||
- name: http_proxy
|
||||
value:
|
||||
- name: https_proxy
|
||||
value:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: {}
|
||||
image: "opea/llm-faqgen-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://faqgen-tgi:80
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
failureThreshold: 120
|
||||
resources: {}
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faqgen
|
||||
labels:
|
||||
helm.sh/chart: faqgen-0.1.0
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
spec:
|
||||
securityContext: null
|
||||
containers:
|
||||
- name: faqgen
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: faqgen-llm-uservice
|
||||
- name: http_proxy
|
||||
value:
|
||||
- name: https_proxy
|
||||
value:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: null
|
||||
image: "opea/faqgen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: faqgen
|
||||
containerPort: 8888
|
||||
protocol: TCP
|
||||
resources: null
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faqgen-react-ui
|
||||
labels:
|
||||
helm.sh/chart: faqgen-react-ui-0.1.0
|
||||
app.kubernetes.io/name: react-ui
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: react-ui
|
||||
app.kubernetes.io/instance: faqgen
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: react-ui
|
||||
app.kubernetes.io/instance: faqgen
|
||||
spec:
|
||||
securityContext: null
|
||||
containers:
|
||||
- name: faqgen-react-ui
|
||||
env:
|
||||
- name: FAQ_BASE_URL
|
||||
value: "http://faqgen:8888/v1/faqgen"
|
||||
- name: http_proxy
|
||||
value:
|
||||
- name: https_proxy
|
||||
value:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: null
|
||||
image: "opea/faqgen-react-ui:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: react-ui
|
||||
containerPort: 80
|
||||
protocol: TCP
|
||||
resources: null
|
||||
@@ -1,46 +0,0 @@
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faq-mega-ui-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: faq-mega-ui-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: faq-mega-ui-deploy
|
||||
spec:
|
||||
hostIPC: true
|
||||
containers:
|
||||
- name: faq-mega-ui-deploy
|
||||
env:
|
||||
- name: FAQ_BASE_URL
|
||||
value: http://{insert_your_ip_here}:7779/v1/faqgen
|
||||
image: opea/faqgen-ui:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 5173
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: faq-mega-ui-svc
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: faq-mega-ui-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 5175
|
||||
targetPort: 5173
|
||||
nodePort: 30175
|
||||
@@ -1,196 +0,0 @@
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faq-tgi-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: faq-tgi-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: faq-tgi-deploy
|
||||
spec:
|
||||
hostIPC: true
|
||||
containers:
|
||||
- name: faq-tgi-deploy-demo
|
||||
env:
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: "insert-your-huggingface-token-here"
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: PREFILL_BATCH_BUCKET_SIZE
|
||||
value: "1"
|
||||
- name: BATCH_BUCKET_SIZE
|
||||
value: "8"
|
||||
- name: PORT
|
||||
value: "80"
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- 'meta-llama/Meta-Llama-3-8B-Instruct'
|
||||
- --cuda_graphs
|
||||
- '0'
|
||||
- --max-input-length
|
||||
- '3096'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: faq-tgi-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: faq-tgi-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8010
|
||||
targetPort: 80
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faq-micro-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: faq-micro-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: faq-micro-deploy
|
||||
spec:
|
||||
hostIPC: true
|
||||
containers:
|
||||
- name: faq-micro-deploy
|
||||
env:
|
||||
- name: TGI_LLM_ENDPOINT
|
||||
value: "http://faq-tgi-svc.default.svc.cluster.local:8010"
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
value: "insert-your-huggingface-token-here"
|
||||
image: opea/llm-faqgen-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: faq-micro-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: faq-micro-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9003
|
||||
targetPort: 9000
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faq-mega-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: faq-mega-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: faq-mega-server-deploy
|
||||
spec:
|
||||
hostIPC: true
|
||||
containers:
|
||||
- name: faq-mega-server-deploy
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: faq-micro-svc
|
||||
- name: LLM_SERVICE_PORT
|
||||
value: "9003"
|
||||
- name: MEGA_SERVICE_HOST_IP
|
||||
value: faq-mega-server-svc
|
||||
- name: MEGA_SERVICE_PORT
|
||||
value: "7777"
|
||||
image: opea/faqgen:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7777
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: faq-mega-server-svc
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: faq-mega-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7779
|
||||
targetPort: 7777
|
||||
nodePort: 30779
|
||||
@@ -1,46 +0,0 @@
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faq-mega-ui-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: faq-mega-ui-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: faq-mega-ui-deploy
|
||||
spec:
|
||||
hostIPC: true
|
||||
containers:
|
||||
- name: faq-mega-ui-deploy
|
||||
env:
|
||||
- name: FAQ_BASE_URL
|
||||
value: http://{insert_your_ip_here}:7779/v1/faqgen
|
||||
image: opea/faqgen-ui:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 5173
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: faq-mega-ui-svc
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: faq-mega-ui-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 5175
|
||||
targetPort: 5173
|
||||
nodePort: 30175
|
||||
24
README.md
24
README.md
@@ -25,11 +25,11 @@ Deployment are based on released docker images by default, check [docker image l
|
||||
#### Prerequisite
|
||||
|
||||
- For Docker Compose based deployment, you should have docker compose installed. Refer to [docker compose install](https://docs.docker.com/compose/install/).
|
||||
- For Kubernetes based deployment, we provide 3 ways from the easiest manifests to powerful [GMC](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md) based deployment.
|
||||
- For Kubernetes based deployment, you can use [Helm](https://helm.sh) or [GMC](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md) based deployment.
|
||||
|
||||
- You should have a kubernetes cluster ready for use. If not, you can refer to [k8s install](https://github.com/opea-project/docs/tree/main/guide/installation/k8s_install/README.md) to deploy one.
|
||||
- (Optional) You should have GMC installed to your kubernetes cluster if you want to try with GMC. Refer to [GMC install](https://github.com/opea-project/docs/blob/main/guide/installation/gmc_install/gmc_install.md) for more information.
|
||||
- (Optional) You should have Helm (version >= 3.15) installed if you want to deploy with Helm Charts. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
- (Optional) You should have GMC installed to your kubernetes cluster if you want to try with GMC. Refer to [GMC install](https://github.com/opea-project/docs/blob/main/guide/installation/gmc_install/gmc_install.md) for more information.
|
||||
|
||||
- Recommended Hardware Reference
|
||||
|
||||
@@ -44,16 +44,16 @@ Deployment are based on released docker images by default, check [docker image l
|
||||
#### Deploy Examples
|
||||
|
||||
| Use Case | Docker Compose<br/>Deployment on Xeon | Docker Compose<br/>Deployment on Gaudi | Kubernetes with Helm Charts | Kubernetes with GMC |
|
||||
| ----------------- | ------------------------------------------------------------------------------ | ---------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------ |
|
||||
| ChatQnA | [Xeon Instructions](ChatQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](ChatQnA/docker_compose/intel/hpu/gaudi/README.md) | [ChatQnA with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna/README.md) | [ChatQnA with GMC](ChatQnA/kubernetes/intel/README_gmc.md) |
|
||||
| CodeGen | [Xeon Instructions](CodeGen/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](CodeGen/docker_compose/intel/hpu/gaudi/README.md) | [CodeGen with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codegen/README.md) | [CodeGen with GMC](CodeGen/kubernetes/gmc/README.md) |
|
||||
| CodeTrans | [Xeon Instructions](CodeTrans/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](CodeTrans/docker_compose/intel/hpu/gaudi/README.md) | [CodeTrans with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codetrans/README.md) | [CodeTrans with GMC](CodeTrans/kubernetes/intel/README_gmc.md) |
|
||||
| DocSum | [Xeon Instructions](DocSum/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](DocSum/docker_compose/intel/hpu/gaudi/README.md) | [DocSum with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/docsum/README.md) | [DocSum with GMC](DocSum/kubernetes/intel/README_gmc.md) |
|
||||
| SearchQnA | [Xeon Instructions](SearchQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](SearchQnA/docker_compose/intel/hpu/gaudi/README.md) | Not Supported | [SearchQnA with GMC](SearchQnA/kubernetes/intel/README_gmc.md) |
|
||||
| FaqGen | [Xeon Instructions](FaqGen/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](FaqGen/docker_compose/intel/hpu/gaudi/README.md) | Not Supported | [FaqGen with GMC](FaqGen/kubernetes/intel/README_gmc.md) |
|
||||
| Translation | [Xeon Instructions](Translation/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](Translation/docker_compose/intel/hpu/gaudi/README.md) | Not Supported | [Translation with GMC](Translation/kubernetes/intel/README_gmc.md) |
|
||||
| AudioQnA | [Xeon Instructions](AudioQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](AudioQnA/docker_compose/intel/hpu/gaudi/README.md) | Not Supported | [AudioQnA with GMC](AudioQnA/kubernetes/intel/README_gmc.md) |
|
||||
| VisualQnA | [Xeon Instructions](VisualQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](VisualQnA/docker_compose/intel/hpu/gaudi/README.md) | Not Supported | [VisualQnA with GMC](VisualQnA/kubernetes/intel/README_gmc.md) |
|
||||
| ----------------- | ------------------------------------------------------------------------------ | ---------------------------------------------------------------------------- | ----------------------------------------------------------------- | ------------------------------------------------------------ |
|
||||
| ChatQnA | [Xeon Instructions](ChatQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](ChatQnA/docker_compose/intel/hpu/gaudi/README.md) | [ChatQnA with Helm Charts](ChatQnA/kubernetes/helm/README.md) | [ChatQnA with GMC](ChatQnA/kubernetes/gmc/README.md) |
|
||||
| CodeGen | [Xeon Instructions](CodeGen/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](CodeGen/docker_compose/intel/hpu/gaudi/README.md) | [CodeGen with Helm Charts](CodeGen/kubernetes/helm/README.md) | [CodeGen with GMC](CodeGen/kubernetes/gmc/README.md) |
|
||||
| CodeTrans | [Xeon Instructions](CodeTrans/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](CodeTrans/docker_compose/intel/hpu/gaudi/README.md) | [CodeTrans with Helm Charts](CodeTrans/kubernetes/helm/README.md) | [CodeTrans with GMC](CodeTrans/kubernetes/gmc/README.md) |
|
||||
| DocSum | [Xeon Instructions](DocSum/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](DocSum/docker_compose/intel/hpu/gaudi/README.md) | [DocSum with Helm Charts](DocSum/kubernetes/helm/README.md) | [DocSum with GMC](DocSum/kubernetes/gmc/README.md) |
|
||||
| SearchQnA | [Xeon Instructions](SearchQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](SearchQnA/docker_compose/intel/hpu/gaudi/README.md) | Not Supported | [SearchQnA with GMC](SearchQnA/kubernetes/gmc/README.md) |
|
||||
| FaqGen | [Xeon Instructions](FaqGen/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](FaqGen/docker_compose/intel/hpu/gaudi/README.md) | [FaqGen with Helm Charts](FaqGen/kubernetes/helm/README.md) | [FaqGen with GMC](FaqGen/kubernetes/gmc/README.md) |
|
||||
| Translation | [Xeon Instructions](Translation/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](Translation/docker_compose/intel/hpu/gaudi/README.md) | Not Supported | [Translation with GMC](Translation/kubernetes/gmc/README.md) |
|
||||
| AudioQnA | [Xeon Instructions](AudioQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](AudioQnA/docker_compose/intel/hpu/gaudi/README.md) | [AudioQnA with Helm Charts](AudioQnA/kubernetes/helm/README.md) | [AudioQnA with GMC](AudioQnA/kubernetes/gmc/README.md) |
|
||||
| VisualQnA | [Xeon Instructions](VisualQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](VisualQnA/docker_compose/intel/hpu/gaudi/README.md) | [VisualQnA with Helm Charts](VisualQnA/kubernetes/helm/README.md) | [VisualQnA with GMC](VisualQnA/kubernetes/gmc/README.md) |
|
||||
| MultimodalQnA | [Xeon Instructions](MultimodalQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md) | Not supported | Not supported |
|
||||
| ProductivitySuite | [Xeon Instructions](ProductivitySuite/docker_compose/intel/cpu/xeon/README.md) | Not Supported | Not Supported | Not Supported |
|
||||
|
||||
|
||||
@@ -112,12 +112,12 @@ fi
|
||||
|
||||
case "$1" in
|
||||
install_Translation)
|
||||
pushd Translation/kubernetes/intel/hpu/gaudi/gmc
|
||||
pushd Translation/kubernetes/gmc
|
||||
install_translation
|
||||
popd
|
||||
;;
|
||||
validate_Translation)
|
||||
pushd Translation/kubernetes/intel/hpu/gaudi/gmc
|
||||
pushd Translation/kubernetes/gmc
|
||||
validate_translation
|
||||
popd
|
||||
;;
|
||||
|
||||
@@ -140,3 +140,7 @@ Find the corresponding [compose.yaml](./docker_compose/intel/cpu/xeon/compose.ya
|
||||
cd GenAIExamples/VisualQnA/docker_compose/intel/cpu/xeon/
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Deploy VisualQnA on Kubernetes using Helm Chart
|
||||
|
||||
Refer to the [VisualQnA helm chart](./kubernetes/helm/README.md) for instructions on deploying VisualQnA on Kubernetes.
|
||||
|
||||
18
VisualQnA/kubernetes/helm/README.md
Normal file
18
VisualQnA/kubernetes/helm/README.md
Normal file
@@ -0,0 +1,18 @@
|
||||
# Deploy VisualQnA on Kubernetes cluster
|
||||
|
||||
- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
|
||||
|
||||
## Deploy on Xeon
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install visualqna oci://ghcr.io/opea-project/charts/visualqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
|
||||
```
|
||||
|
||||
## Deploy on Gaudi
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install visualqna oci://ghcr.io/opea-project/charts/visualqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
|
||||
```
|
||||
7
VisualQnA/kubernetes/helm/cpu-values.yaml
Normal file
7
VisualQnA/kubernetes/helm/cpu-values.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
tgi:
|
||||
MAX_INPUT_LENGTH: "4096"
|
||||
MAX_TOTAL_TOKENS: "8192"
|
||||
LLM_MODEL_ID: llava-hf/llava-v1.6-mistral-7b-hf
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user