Update mainifest for FaqGen (#582)
* update tgi version Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * add k8s for faq Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * add benchmark for faq Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * refine k8s for faq Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * add tuning for faq Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * add prompts with different length for faq Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * add tgi docker for llama3.1 Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * remove useless code Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * remove nodeselector Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * remove hg token Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * refine code structure Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix readme Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> --------- Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -16,7 +16,7 @@ cd GenAIComps
|
||||
As TGI Gaudi has been officially published as a Docker image, we simply need to pull it:
|
||||
|
||||
```bash
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
```
|
||||
|
||||
### 2. Build LLM Image
|
||||
@@ -56,7 +56,7 @@ docker build -t opea/faqgen-react-ui:latest --build-arg https_proxy=$https_proxy
|
||||
|
||||
Then run the command `docker images`, you will have the following Docker Images:
|
||||
|
||||
1. `ghcr.io/huggingface/tgi-gaudi:1.2.1`
|
||||
1. `ghcr.io/huggingface/tgi-gaudi:2.0.1`
|
||||
2. `opea/llm-faqgen-tgi:latest`
|
||||
3. `opea/faqgen:latest`
|
||||
4. `opea/faqgen-ui:latest`
|
||||
|
||||
@@ -17,12 +17,14 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
PREFILL_BATCH_BUCKET_SIZE: 1
|
||||
BATCH_BUCKET_SIZE: 8
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 4096
|
||||
llm_faqgen:
|
||||
image: opea/llm-faqgen-tgi:latest
|
||||
container_name: llm-faqgen-server
|
||||
|
||||
@@ -23,13 +23,24 @@ sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" faqg
|
||||
kubectl apply -f faqgen.yaml
|
||||
```
|
||||
|
||||
## Deploy UI
|
||||
|
||||
```
|
||||
cd GenAIExamples/FaqGen/kubernetes/manifests/
|
||||
kubectl get svc # get ip address
|
||||
ip_address="" # according to your svc address
|
||||
sed -i "s/insert_your_ip_here/${ip_address}/g" ui.yaml
|
||||
kubectl apply -f ui.yaml
|
||||
```
|
||||
|
||||
## Verify Services
|
||||
|
||||
Make sure all the pods are running, and restart the faqgen-xxxx pod if necessary.
|
||||
|
||||
```
|
||||
kubectl get pods
|
||||
curl http://${host_ip}:8888/v1/faqgen -H "Content-Type: application/json" -d '{
|
||||
port=7779 # 7779 for gaudi, 7778 for xeon
|
||||
curl http://${host_ip}:7779/v1/faqgen -H "Content-Type: application/json" -d '{
|
||||
"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -1,216 +1,186 @@
|
||||
---
|
||||
# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: faqgen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.1.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: faqgen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.1.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: faqgen
|
||||
labels:
|
||||
helm.sh/chart: faqgen-0.1.0
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 8888
|
||||
targetPort: 8888
|
||||
protocol: TCP
|
||||
name: faqgen
|
||||
selector:
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faqgen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.1.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
name: faq-tgi-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app: faq-tgi-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app: faq-tgi-deploy
|
||||
spec:
|
||||
securityContext: {}
|
||||
hostIPC: true
|
||||
containers:
|
||||
- name: tgi
|
||||
env:
|
||||
- name: MODEL_ID
|
||||
value: Intel/neural-chat-7b-v3-3
|
||||
- name: PORT
|
||||
value: "80"
|
||||
- name: http_proxy
|
||||
value:
|
||||
- name: https_proxy
|
||||
value:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: {}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:1.4"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 80
|
||||
protocol: TCP
|
||||
resources: {}
|
||||
- name: faq-tgi-deploy-demo
|
||||
env:
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: "insert-your-huggingface-token-here"
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: PREFILL_BATCH_BUCKET_SIZE
|
||||
value: "1"
|
||||
- name: BATCH_BUCKET_SIZE
|
||||
value: "8"
|
||||
- name: PORT
|
||||
value: "80"
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- 'meta-llama/Meta-Llama-3-8B-Instruct'
|
||||
- --max-input-length
|
||||
- '3096'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt
|
||||
type: Directory
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /home/sdp/cesg
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: faq-tgi-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: faq-tgi-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8010
|
||||
targetPort: 80
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faqgen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.1.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
name: faq-micro-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app: faq-micro-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app: faq-micro-deploy
|
||||
spec:
|
||||
securityContext: {}
|
||||
hostIPC: true
|
||||
containers:
|
||||
- name: faqgen
|
||||
- name: faq-micro-deploy
|
||||
env:
|
||||
- name: TGI_LLM_ENDPOINT
|
||||
value: "http://faqgen-tgi:80"
|
||||
value: "http://faq-tgi-svc.default.svc.cluster.local:8010"
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
value: "insert-your-huggingface-token-here"
|
||||
- name: http_proxy
|
||||
value:
|
||||
- name: https_proxy
|
||||
value:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: {}
|
||||
image: "opea/llm-faqgen-tgi:latest"
|
||||
image: opea/llm-faqgen-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: null
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://faqgen-tgi:80
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
failureThreshold: 120
|
||||
resources: {}
|
||||
- containerPort: 9000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: faq-micro-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: faq-micro-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9003
|
||||
targetPort: 9000
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faqgen
|
||||
labels:
|
||||
helm.sh/chart: faqgen-0.1.0
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
name: faq-mega-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app: faq-mega-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app: faq-mega-server-deploy
|
||||
spec:
|
||||
securityContext: null
|
||||
hostIPC: true
|
||||
containers:
|
||||
- name: faqgen
|
||||
- name: faq-mega-server-deploy
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: faqgen-llm-uservice
|
||||
- name: http_proxy
|
||||
value:
|
||||
- name: https_proxy
|
||||
value:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: null
|
||||
image: "opea/faqgen:latest"
|
||||
value: faq-micro-svc
|
||||
- name: LLM_SERVICE_PORT
|
||||
value: "9003"
|
||||
- name: MEGA_SERVICE_HOST_IP
|
||||
value: faq-mega-server-svc
|
||||
- name: MEGA_SERVICE_PORT
|
||||
value: "7777"
|
||||
image: opea/faqgen:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: null
|
||||
ports:
|
||||
- name: faqgen
|
||||
containerPort: 8888
|
||||
protocol: TCP
|
||||
resources: null
|
||||
- containerPort: 7777
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: faq-mega-server-svc
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: faq-mega-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7779
|
||||
targetPort: 7777
|
||||
nodePort: 30779
|
||||
|
||||
46
FaqGen/kubernetes/manifests/ui.yaml
Normal file
46
FaqGen/kubernetes/manifests/ui.yaml
Normal file
@@ -0,0 +1,46 @@
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faq-mega-ui-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: faq-mega-ui-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: faq-mega-ui-deploy
|
||||
spec:
|
||||
hostIPC: true
|
||||
containers:
|
||||
- name: faq-mega-ui-deploy
|
||||
env:
|
||||
- name: DOC_BASE_URL
|
||||
value: http://{insert_your_ip_here}:7779/v1/faqgen
|
||||
image: opea/faqgen-ui:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 5173
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: faq-mega-ui-svc
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: faq-mega-ui-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 5175
|
||||
targetPort: 5173
|
||||
nodePort: 30175
|
||||
@@ -1,216 +1,165 @@
|
||||
---
|
||||
# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: faqgen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.1.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: faqgen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.1.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: faqgen
|
||||
labels:
|
||||
helm.sh/chart: faqgen-0.1.0
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 8888
|
||||
targetPort: 8888
|
||||
protocol: TCP
|
||||
name: faqgen
|
||||
selector:
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faqgen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.1.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
name: faq-tgi-cpu-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app: faq-tgi-cpu-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app: faq-tgi-cpu-deploy
|
||||
spec:
|
||||
hostIPC: true
|
||||
securityContext: {}
|
||||
containers:
|
||||
- name: tgi
|
||||
env:
|
||||
- name: MODEL_ID
|
||||
value: Intel/neural-chat-7b-v3-3
|
||||
- name: PORT
|
||||
value: "80"
|
||||
- name: http_proxy
|
||||
value:
|
||||
- name: https_proxy
|
||||
value:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: {}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:1.4"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 80
|
||||
protocol: TCP
|
||||
resources: {}
|
||||
- name: faq-tgi-cpu-deploy-demo
|
||||
env:
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: "insert-your-huggingface-token-here"
|
||||
- name: PORT
|
||||
value: "80"
|
||||
image: ghcr.io/huggingface/text-generation-inference:1.4
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext: {}
|
||||
args:
|
||||
- --model-id
|
||||
- 'meta-llama/Meta-Llama-3-8B-Instruct'
|
||||
- --max-input-length
|
||||
- '3096'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt
|
||||
type: Directory
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /home/sdp/cesg
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: faq-tgi-cpu-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: faq-tgi-cpu-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8011
|
||||
targetPort: 80
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faqgen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.1.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
name: faq-micro-cpu-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app: faq-micro-cpu-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app: faq-micro-cpu-deploy
|
||||
spec:
|
||||
securityContext: {}
|
||||
hostIPC: true
|
||||
containers:
|
||||
- name: faqgen
|
||||
- name: faq-micro-cpu-deploy
|
||||
env:
|
||||
- name: TGI_LLM_ENDPOINT
|
||||
value: "http://faqgen-tgi:80"
|
||||
value: "http://faq-tgi-cpu-svc.default.svc.cluster.local:8011"
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
value: "insert-your-huggingface-token-here"
|
||||
- name: http_proxy
|
||||
value:
|
||||
- name: https_proxy
|
||||
value:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: {}
|
||||
image: "opea/llm-faqgen-tgi:latest"
|
||||
image: opea/llm-faqgen-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: null
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://faqgen-tgi:80
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
failureThreshold: 120
|
||||
resources: {}
|
||||
- containerPort: 9000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: faq-micro-cpu-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: faq-micro-cpu-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9004
|
||||
targetPort: 9000
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faqgen
|
||||
labels:
|
||||
helm.sh/chart: faqgen-0.1.0
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
name: faq-mega-server-cpu-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app: faq-mega-server-cpu-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app: faq-mega-server-cpu-deploy
|
||||
spec:
|
||||
securityContext: null
|
||||
hostIPC: true
|
||||
containers:
|
||||
- name: faqgen
|
||||
- name: faq-mega-server-cpu-deploy
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: faqgen-llm-uservice
|
||||
- name: http_proxy
|
||||
value:
|
||||
- name: https_proxy
|
||||
value:
|
||||
- name: no_proxy
|
||||
value:
|
||||
securityContext: null
|
||||
image: "opea/faqgen:latest"
|
||||
value: faq-micro-cpu-svc
|
||||
- name: LLM_SERVICE_PORT
|
||||
value: "9004"
|
||||
- name: MEGA_SERVICE_HOST_IP
|
||||
value: faq-mega-server-cpu-svc
|
||||
- name: MEGA_SERVICE_PORT
|
||||
value: "7777"
|
||||
image: opea/faqgen:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: null
|
||||
ports:
|
||||
- name: faqgen
|
||||
containerPort: 8888
|
||||
protocol: TCP
|
||||
resources: null
|
||||
- containerPort: 7777
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: faq-mega-server-cpu-svc
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: faq-mega-server-cpu-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7778
|
||||
targetPort: 7777
|
||||
nodePort: 30778
|
||||
|
||||
Reference in New Issue
Block a user