Files
GenAIExamples/FaqGen/kubernetes/manifests/xeon/faqgen.yaml
NeuralChatBot e7b000eca5 Freeze OPEA images tag
Signed-off-by: NeuralChatBot <grp_neural_chat_bot@intel.com>
2024-08-25 16:28:59 +00:00

168 lines
3.7 KiB
YAML

---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: faq-tgi-cpu-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: faq-tgi-cpu-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: faq-tgi-cpu-deploy
spec:
hostIPC: true
securityContext: {}
containers:
- name: faq-tgi-cpu-deploy-demo
env:
- name: HUGGING_FACE_HUB_TOKEN
value: "insert-your-huggingface-token-here"
- name: PORT
value: "80"
image: ghcr.io/huggingface/text-generation-inference:1.4
imagePullPolicy: IfNotPresent
securityContext: {}
args:
- --model-id
- 'meta-llama/Meta-Llama-3-8B-Instruct'
- --cuda_graphs
- '0'
- --max-input-length
- '3096'
- --max-total-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /home/sdp/cesg
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: faq-tgi-cpu-svc
spec:
type: ClusterIP
selector:
app: faq-tgi-cpu-deploy
ports:
- name: service
port: 8011
targetPort: 80
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: faq-micro-cpu-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: faq-micro-cpu-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: faq-micro-cpu-deploy
spec:
hostIPC: true
containers:
- name: faq-micro-cpu-deploy
env:
- name: TGI_LLM_ENDPOINT
value: "http://faq-tgi-cpu-svc.default.svc.cluster.local:8011"
- name: HUGGINGFACEHUB_API_TOKEN
value: "insert-your-huggingface-token-here"
image: opea/llm-faqgen-tgi:v0.9
imagePullPolicy: IfNotPresent
args: null
ports:
- containerPort: 9000
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: faq-micro-cpu-svc
spec:
type: ClusterIP
selector:
app: faq-micro-cpu-deploy
ports:
- name: service
port: 9004
targetPort: 9000
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: faq-mega-server-cpu-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: faq-mega-server-cpu-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: faq-mega-server-cpu-deploy
spec:
hostIPC: true
containers:
- name: faq-mega-server-cpu-deploy
env:
- name: LLM_SERVICE_HOST_IP
value: faq-micro-cpu-svc
- name: LLM_SERVICE_PORT
value: "9004"
- name: MEGA_SERVICE_HOST_IP
value: faq-mega-server-cpu-svc
- name: MEGA_SERVICE_PORT
value: "7777"
image: opea/faqgen:v0.9
imagePullPolicy: IfNotPresent
args: null
ports:
- containerPort: 7777
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: faq-mega-server-cpu-svc
spec:
type: NodePort
selector:
app: faq-mega-server-cpu-deploy
ports:
- name: service
port: 7778
targetPort: 7777
nodePort: 30778