Update mainifest for FaqGen (#582)
* update tgi version Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * add k8s for faq Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * add benchmark for faq Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * refine k8s for faq Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * add tuning for faq Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * add prompts with different length for faq Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * add tgi docker for llama3.1 Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * remove useless code Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * remove nodeselector Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * remove hg token Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * refine code structure Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix readme Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> --------- Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -16,7 +16,7 @@ cd GenAIComps
|
|||||||
As TGI Gaudi has been officially published as a Docker image, we simply need to pull it:
|
As TGI Gaudi has been officially published as a Docker image, we simply need to pull it:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
|
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||||
```
|
```
|
||||||
|
|
||||||
### 2. Build LLM Image
|
### 2. Build LLM Image
|
||||||
@@ -56,7 +56,7 @@ docker build -t opea/faqgen-react-ui:latest --build-arg https_proxy=$https_proxy
|
|||||||
|
|
||||||
Then run the command `docker images`, you will have the following Docker Images:
|
Then run the command `docker images`, you will have the following Docker Images:
|
||||||
|
|
||||||
1. `ghcr.io/huggingface/tgi-gaudi:1.2.1`
|
1. `ghcr.io/huggingface/tgi-gaudi:2.0.1`
|
||||||
2. `opea/llm-faqgen-tgi:latest`
|
2. `opea/llm-faqgen-tgi:latest`
|
||||||
3. `opea/faqgen:latest`
|
3. `opea/faqgen:latest`
|
||||||
4. `opea/faqgen-ui:latest`
|
4. `opea/faqgen-ui:latest`
|
||||||
|
|||||||
@@ -17,12 +17,14 @@ services:
|
|||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
HABANA_VISIBLE_DEVICES: all
|
HABANA_VISIBLE_DEVICES: all
|
||||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
PREFILL_BATCH_BUCKET_SIZE: 1
|
||||||
|
BATCH_BUCKET_SIZE: 8
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
ipc: host
|
ipc: host
|
||||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
|
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 4096
|
||||||
llm_faqgen:
|
llm_faqgen:
|
||||||
image: opea/llm-faqgen-tgi:latest
|
image: opea/llm-faqgen-tgi:latest
|
||||||
container_name: llm-faqgen-server
|
container_name: llm-faqgen-server
|
||||||
|
|||||||
@@ -23,13 +23,24 @@ sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" faqg
|
|||||||
kubectl apply -f faqgen.yaml
|
kubectl apply -f faqgen.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Deploy UI
|
||||||
|
|
||||||
|
```
|
||||||
|
cd GenAIExamples/FaqGen/kubernetes/manifests/
|
||||||
|
kubectl get svc # get ip address
|
||||||
|
ip_address="" # according to your svc address
|
||||||
|
sed -i "s/insert_your_ip_here/${ip_address}/g" ui.yaml
|
||||||
|
kubectl apply -f ui.yaml
|
||||||
|
```
|
||||||
|
|
||||||
## Verify Services
|
## Verify Services
|
||||||
|
|
||||||
Make sure all the pods are running, and restart the faqgen-xxxx pod if necessary.
|
Make sure all the pods are running, and restart the faqgen-xxxx pod if necessary.
|
||||||
|
|
||||||
```
|
```
|
||||||
kubectl get pods
|
kubectl get pods
|
||||||
curl http://${host_ip}:8888/v1/faqgen -H "Content-Type: application/json" -d '{
|
port=7779 # 7779 for gaudi, 7778 for xeon
|
||||||
|
curl http://${host_ip}:7779/v1/faqgen -H "Content-Type: application/json" -d '{
|
||||||
"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
|
"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -1,216 +1,186 @@
|
|||||||
---
|
---
|
||||||
# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: faqgen-tgi
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: tgi-0.1.0
|
|
||||||
app.kubernetes.io/name: tgi
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
app.kubernetes.io/version: "1.4"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- port: 80
|
|
||||||
targetPort: 80
|
|
||||||
protocol: TCP
|
|
||||||
name: tgi
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: tgi
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: faqgen-llm-uservice
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: llm-uservice-0.1.0
|
|
||||||
app.kubernetes.io/name: llm-uservice
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
app.kubernetes.io/version: "1.0.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- port: 9000
|
|
||||||
targetPort: 9000
|
|
||||||
protocol: TCP
|
|
||||||
name: llm-uservice
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: llm-uservice
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: faqgen
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: faqgen-0.1.0
|
|
||||||
app.kubernetes.io/name: faqgen
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
app.kubernetes.io/version: "1.0.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- port: 8888
|
|
||||||
targetPort: 8888
|
|
||||||
protocol: TCP
|
|
||||||
name: faqgen
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: faqgen
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
---
|
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: faqgen-tgi
|
name: faq-tgi-deploy
|
||||||
labels:
|
namespace: default
|
||||||
helm.sh/chart: tgi-0.1.0
|
|
||||||
app.kubernetes.io/name: tgi
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
app.kubernetes.io/version: "1.4"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app.kubernetes.io/name: tgi
|
app: faq-tgi-deploy
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/name: tgi
|
app: faq-tgi-deploy
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
spec:
|
spec:
|
||||||
securityContext: {}
|
hostIPC: true
|
||||||
containers:
|
containers:
|
||||||
- name: tgi
|
- name: faq-tgi-deploy-demo
|
||||||
env:
|
env:
|
||||||
- name: MODEL_ID
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
value: Intel/neural-chat-7b-v3-3
|
value: "insert-your-huggingface-token-here"
|
||||||
- name: PORT
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
value: "80"
|
value: none
|
||||||
- name: http_proxy
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
value:
|
value: 'true'
|
||||||
- name: https_proxy
|
- name: runtime
|
||||||
value:
|
value: habana
|
||||||
- name: no_proxy
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
value:
|
value: all
|
||||||
securityContext: {}
|
- name: PREFILL_BATCH_BUCKET_SIZE
|
||||||
image: "ghcr.io/huggingface/text-generation-inference:1.4"
|
value: "1"
|
||||||
imagePullPolicy: IfNotPresent
|
- name: BATCH_BUCKET_SIZE
|
||||||
volumeMounts:
|
value: "8"
|
||||||
- mountPath: /data
|
- name: PORT
|
||||||
name: model-volume
|
value: "80"
|
||||||
ports:
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||||
- name: http
|
imagePullPolicy: IfNotPresent
|
||||||
containerPort: 80
|
securityContext:
|
||||||
protocol: TCP
|
capabilities:
|
||||||
resources: {}
|
add:
|
||||||
|
- SYS_NICE
|
||||||
|
args:
|
||||||
|
- --model-id
|
||||||
|
- 'meta-llama/Meta-Llama-3-8B-Instruct'
|
||||||
|
- --max-input-length
|
||||||
|
- '3096'
|
||||||
|
- --max-total-tokens
|
||||||
|
- '4096'
|
||||||
|
- --max-batch-total-tokens
|
||||||
|
- '65536'
|
||||||
|
- --max-batch-prefill-tokens
|
||||||
|
- '4096'
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
serviceAccountName: default
|
||||||
volumes:
|
volumes:
|
||||||
- name: model-volume
|
- name: model-volume
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /mnt
|
path: /home/sdp/cesg
|
||||||
type: Directory
|
type: Directory
|
||||||
|
- name: shm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: faq-tgi-svc
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: faq-tgi-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8010
|
||||||
|
targetPort: 80
|
||||||
---
|
---
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: faqgen-llm-uservice
|
name: faq-micro-deploy
|
||||||
labels:
|
namespace: default
|
||||||
helm.sh/chart: llm-uservice-0.1.0
|
|
||||||
app.kubernetes.io/name: llm-uservice
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
app.kubernetes.io/version: "1.0.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app.kubernetes.io/name: llm-uservice
|
app: faq-micro-deploy
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/name: llm-uservice
|
app: faq-micro-deploy
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
spec:
|
spec:
|
||||||
securityContext: {}
|
hostIPC: true
|
||||||
containers:
|
containers:
|
||||||
- name: faqgen
|
- name: faq-micro-deploy
|
||||||
env:
|
env:
|
||||||
- name: TGI_LLM_ENDPOINT
|
- name: TGI_LLM_ENDPOINT
|
||||||
value: "http://faqgen-tgi:80"
|
value: "http://faq-tgi-svc.default.svc.cluster.local:8010"
|
||||||
- name: HUGGINGFACEHUB_API_TOKEN
|
- name: HUGGINGFACEHUB_API_TOKEN
|
||||||
value: "insert-your-huggingface-token-here"
|
value: "insert-your-huggingface-token-here"
|
||||||
- name: http_proxy
|
image: opea/llm-faqgen-tgi:latest
|
||||||
value:
|
|
||||||
- name: https_proxy
|
|
||||||
value:
|
|
||||||
- name: no_proxy
|
|
||||||
value:
|
|
||||||
securityContext: {}
|
|
||||||
image: "opea/llm-faqgen-tgi:latest"
|
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
|
args: null
|
||||||
ports:
|
ports:
|
||||||
- name: llm-uservice
|
- containerPort: 9000
|
||||||
containerPort: 9000
|
serviceAccountName: default
|
||||||
protocol: TCP
|
---
|
||||||
startupProbe:
|
kind: Service
|
||||||
exec:
|
apiVersion: v1
|
||||||
command:
|
metadata:
|
||||||
- curl
|
name: faq-micro-svc
|
||||||
- http://faqgen-tgi:80
|
spec:
|
||||||
initialDelaySeconds: 5
|
type: ClusterIP
|
||||||
periodSeconds: 5
|
selector:
|
||||||
failureThreshold: 120
|
app: faq-micro-deploy
|
||||||
resources: {}
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9003
|
||||||
|
targetPort: 9000
|
||||||
---
|
---
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: faqgen
|
name: faq-mega-server-deploy
|
||||||
labels:
|
namespace: default
|
||||||
helm.sh/chart: faqgen-0.1.0
|
|
||||||
app.kubernetes.io/name: faqgen
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
app.kubernetes.io/version: "1.0.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app.kubernetes.io/name: faqgen
|
app: faq-mega-server-deploy
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/name: faqgen
|
app: faq-mega-server-deploy
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
spec:
|
spec:
|
||||||
securityContext: null
|
hostIPC: true
|
||||||
containers:
|
containers:
|
||||||
- name: faqgen
|
- name: faq-mega-server-deploy
|
||||||
env:
|
env:
|
||||||
- name: LLM_SERVICE_HOST_IP
|
- name: LLM_SERVICE_HOST_IP
|
||||||
value: faqgen-llm-uservice
|
value: faq-micro-svc
|
||||||
- name: http_proxy
|
- name: LLM_SERVICE_PORT
|
||||||
value:
|
value: "9003"
|
||||||
- name: https_proxy
|
- name: MEGA_SERVICE_HOST_IP
|
||||||
value:
|
value: faq-mega-server-svc
|
||||||
- name: no_proxy
|
- name: MEGA_SERVICE_PORT
|
||||||
value:
|
value: "7777"
|
||||||
securityContext: null
|
image: opea/faqgen:latest
|
||||||
image: "opea/faqgen:latest"
|
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
|
args: null
|
||||||
ports:
|
ports:
|
||||||
- name: faqgen
|
- containerPort: 7777
|
||||||
containerPort: 8888
|
serviceAccountName: default
|
||||||
protocol: TCP
|
---
|
||||||
resources: null
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: faq-mega-server-svc
|
||||||
|
spec:
|
||||||
|
type: NodePort
|
||||||
|
selector:
|
||||||
|
app: faq-mega-server-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7779
|
||||||
|
targetPort: 7777
|
||||||
|
nodePort: 30779
|
||||||
|
|||||||
46
FaqGen/kubernetes/manifests/ui.yaml
Normal file
46
FaqGen/kubernetes/manifests/ui.yaml
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
---
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: faq-mega-ui-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: faq-mega-ui-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: faq-mega-ui-deploy
|
||||||
|
spec:
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- name: faq-mega-ui-deploy
|
||||||
|
env:
|
||||||
|
- name: DOC_BASE_URL
|
||||||
|
value: http://{insert_your_ip_here}:7779/v1/faqgen
|
||||||
|
image: opea/faqgen-ui:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 5173
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: faq-mega-ui-svc
|
||||||
|
spec:
|
||||||
|
type: NodePort
|
||||||
|
selector:
|
||||||
|
app: faq-mega-ui-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 5175
|
||||||
|
targetPort: 5173
|
||||||
|
nodePort: 30175
|
||||||
@@ -1,216 +1,165 @@
|
|||||||
---
|
---
|
||||||
# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: faqgen-tgi
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: tgi-0.1.0
|
|
||||||
app.kubernetes.io/name: tgi
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
app.kubernetes.io/version: "1.4"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- port: 80
|
|
||||||
targetPort: 80
|
|
||||||
protocol: TCP
|
|
||||||
name: tgi
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: tgi
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: faqgen-llm-uservice
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: llm-uservice-0.1.0
|
|
||||||
app.kubernetes.io/name: llm-uservice
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
app.kubernetes.io/version: "1.0.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- port: 9000
|
|
||||||
targetPort: 9000
|
|
||||||
protocol: TCP
|
|
||||||
name: llm-uservice
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: llm-uservice
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: faqgen
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: faqgen-0.1.0
|
|
||||||
app.kubernetes.io/name: faqgen
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
app.kubernetes.io/version: "1.0.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- port: 8888
|
|
||||||
targetPort: 8888
|
|
||||||
protocol: TCP
|
|
||||||
name: faqgen
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: faqgen
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
---
|
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: faqgen-tgi
|
name: faq-tgi-cpu-deploy
|
||||||
labels:
|
namespace: default
|
||||||
helm.sh/chart: tgi-0.1.0
|
|
||||||
app.kubernetes.io/name: tgi
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
app.kubernetes.io/version: "1.4"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app.kubernetes.io/name: tgi
|
app: faq-tgi-cpu-deploy
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/name: tgi
|
app: faq-tgi-cpu-deploy
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
spec:
|
spec:
|
||||||
|
hostIPC: true
|
||||||
securityContext: {}
|
securityContext: {}
|
||||||
containers:
|
containers:
|
||||||
- name: tgi
|
- name: faq-tgi-cpu-deploy-demo
|
||||||
env:
|
env:
|
||||||
- name: MODEL_ID
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
value: Intel/neural-chat-7b-v3-3
|
value: "insert-your-huggingface-token-here"
|
||||||
- name: PORT
|
- name: PORT
|
||||||
value: "80"
|
value: "80"
|
||||||
- name: http_proxy
|
image: ghcr.io/huggingface/text-generation-inference:1.4
|
||||||
value:
|
imagePullPolicy: IfNotPresent
|
||||||
- name: https_proxy
|
securityContext: {}
|
||||||
value:
|
args:
|
||||||
- name: no_proxy
|
- --model-id
|
||||||
value:
|
- 'meta-llama/Meta-Llama-3-8B-Instruct'
|
||||||
securityContext: {}
|
- --max-input-length
|
||||||
image: "ghcr.io/huggingface/text-generation-inference:1.4"
|
- '3096'
|
||||||
imagePullPolicy: IfNotPresent
|
- --max-total-tokens
|
||||||
volumeMounts:
|
- '4096'
|
||||||
- mountPath: /data
|
volumeMounts:
|
||||||
name: model-volume
|
- mountPath: /data
|
||||||
ports:
|
name: model-volume
|
||||||
- name: http
|
- mountPath: /dev/shm
|
||||||
containerPort: 80
|
name: shm
|
||||||
protocol: TCP
|
ports:
|
||||||
resources: {}
|
- containerPort: 80
|
||||||
|
serviceAccountName: default
|
||||||
volumes:
|
volumes:
|
||||||
- name: model-volume
|
- name: model-volume
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /mnt
|
path: /home/sdp/cesg
|
||||||
type: Directory
|
type: Directory
|
||||||
|
- name: shm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: faq-tgi-cpu-svc
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: faq-tgi-cpu-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8011
|
||||||
|
targetPort: 80
|
||||||
---
|
---
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: faqgen-llm-uservice
|
name: faq-micro-cpu-deploy
|
||||||
labels:
|
namespace: default
|
||||||
helm.sh/chart: llm-uservice-0.1.0
|
|
||||||
app.kubernetes.io/name: llm-uservice
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
app.kubernetes.io/version: "1.0.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app.kubernetes.io/name: llm-uservice
|
app: faq-micro-cpu-deploy
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/name: llm-uservice
|
app: faq-micro-cpu-deploy
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
spec:
|
spec:
|
||||||
securityContext: {}
|
hostIPC: true
|
||||||
containers:
|
containers:
|
||||||
- name: faqgen
|
- name: faq-micro-cpu-deploy
|
||||||
env:
|
env:
|
||||||
- name: TGI_LLM_ENDPOINT
|
- name: TGI_LLM_ENDPOINT
|
||||||
value: "http://faqgen-tgi:80"
|
value: "http://faq-tgi-cpu-svc.default.svc.cluster.local:8011"
|
||||||
- name: HUGGINGFACEHUB_API_TOKEN
|
- name: HUGGINGFACEHUB_API_TOKEN
|
||||||
value: "insert-your-huggingface-token-here"
|
value: "insert-your-huggingface-token-here"
|
||||||
- name: http_proxy
|
image: opea/llm-faqgen-tgi:latest
|
||||||
value:
|
|
||||||
- name: https_proxy
|
|
||||||
value:
|
|
||||||
- name: no_proxy
|
|
||||||
value:
|
|
||||||
securityContext: {}
|
|
||||||
image: "opea/llm-faqgen-tgi:latest"
|
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
|
args: null
|
||||||
ports:
|
ports:
|
||||||
- name: llm-uservice
|
- containerPort: 9000
|
||||||
containerPort: 9000
|
serviceAccountName: default
|
||||||
protocol: TCP
|
---
|
||||||
startupProbe:
|
kind: Service
|
||||||
exec:
|
apiVersion: v1
|
||||||
command:
|
metadata:
|
||||||
- curl
|
name: faq-micro-cpu-svc
|
||||||
- http://faqgen-tgi:80
|
spec:
|
||||||
initialDelaySeconds: 5
|
type: ClusterIP
|
||||||
periodSeconds: 5
|
selector:
|
||||||
failureThreshold: 120
|
app: faq-micro-cpu-deploy
|
||||||
resources: {}
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9004
|
||||||
|
targetPort: 9000
|
||||||
---
|
---
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: faqgen
|
name: faq-mega-server-cpu-deploy
|
||||||
labels:
|
namespace: default
|
||||||
helm.sh/chart: faqgen-0.1.0
|
|
||||||
app.kubernetes.io/name: faqgen
|
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
app.kubernetes.io/version: "1.0.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app.kubernetes.io/name: faqgen
|
app: faq-mega-server-cpu-deploy
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/name: faqgen
|
app: faq-mega-server-cpu-deploy
|
||||||
app.kubernetes.io/instance: faqgen
|
|
||||||
spec:
|
spec:
|
||||||
securityContext: null
|
hostIPC: true
|
||||||
containers:
|
containers:
|
||||||
- name: faqgen
|
- name: faq-mega-server-cpu-deploy
|
||||||
env:
|
env:
|
||||||
- name: LLM_SERVICE_HOST_IP
|
- name: LLM_SERVICE_HOST_IP
|
||||||
value: faqgen-llm-uservice
|
value: faq-micro-cpu-svc
|
||||||
- name: http_proxy
|
- name: LLM_SERVICE_PORT
|
||||||
value:
|
value: "9004"
|
||||||
- name: https_proxy
|
- name: MEGA_SERVICE_HOST_IP
|
||||||
value:
|
value: faq-mega-server-cpu-svc
|
||||||
- name: no_proxy
|
- name: MEGA_SERVICE_PORT
|
||||||
value:
|
value: "7777"
|
||||||
securityContext: null
|
image: opea/faqgen:latest
|
||||||
image: "opea/faqgen:latest"
|
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
|
args: null
|
||||||
ports:
|
ports:
|
||||||
- name: faqgen
|
- containerPort: 7777
|
||||||
containerPort: 8888
|
serviceAccountName: default
|
||||||
protocol: TCP
|
---
|
||||||
resources: null
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: faq-mega-server-cpu-svc
|
||||||
|
spec:
|
||||||
|
type: NodePort
|
||||||
|
selector:
|
||||||
|
app: faq-mega-server-cpu-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7778
|
||||||
|
targetPort: 7777
|
||||||
|
nodePort: 30778
|
||||||
|
|||||||
Reference in New Issue
Block a user