[ChatQnA] Add no_wrapper benchmarking and update legacy manifests (#767)

Co-authored-by: Zhenzhong1 <zhenzhong.xu@intel.com>
This commit is contained in:
Sihan Chen
2024-09-14 16:17:15 +08:00
committed by GitHub
parent bc4bbfa849
commit 06696c8e58
27 changed files with 8746 additions and 1961 deletions

View File

@@ -31,6 +31,4 @@ USER user
WORKDIR /home/user
RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
ENTRYPOINT ["python", "chatqna_no_wrapper.py"]

View File

@@ -0,0 +1,34 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
FROM python:3.11-slim
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
libgl1-mesa-glx \
libjemalloc-dev \
vim \
git
RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/
WORKDIR /home/user/
RUN git clone https://github.com/opea-project/GenAIComps.git
WORKDIR /home/user/GenAIComps
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
pip install --no-cache-dir langchain_core
COPY ./chatqna_no_wrapper.py /home/user/chatqna_no_wrapper.py
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
USER user
WORKDIR /home/user
ENTRYPOINT ["python", "chatqna_no_wrapper.py", "--without-rerank"]

View File

@@ -37,6 +37,17 @@ Results will be displayed in the terminal and saved as CSV file named `1_stats.c
- Set up kubectl on the master node with access to the Kubernetes cluster.
- Install Python 3.8+ on the master node for running the stress tool.
- Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods.
- Ensure that the container's ulimit can meet the the number of requests.
```bash
# The way to modify the containered ulimit:
sudo systemctl edit containerd
# Add two lines:
[Service]
LimitNOFILE=65536:1048576
sudo systemctl daemon-reload; sudo systemctl restart containerd
```
### Kubernetes Cluster Example

View File

@@ -2,33 +2,27 @@
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVICE_HOST_IP: embedding-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVICE_HOST_IP: llm-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVICE_HOST_IP: reranking-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
RERANK_MODEL_ID: BAAI/bge-reranker-base
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
NODE_SELECTOR: chatqna-opea
LLM_SERVICE_HOST_IP: llm-svc
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -46,16 +40,6 @@ spec:
labels:
app: chatqna-backend-server-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -63,33 +47,35 @@ spec:
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
args: null
ports:
- containerPort: 8888
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
type: NodePort
selector:
app: chatqna-backend-server-deploy
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
nodePort: 30888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -107,70 +93,41 @@ spec:
labels:
app: dataprep-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: dataprep-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
args: null
ports:
- containerPort: 6007
- containerPort: 6008
- containerPort: 6009
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
type: ClusterIP
selector:
app: dataprep-deploy
ports:
- name: port1
port: 6007
targetPort: 6007
- name: port2
port: 6008
targetPort: 6008
- name: port3
port: 6009
targetPort: 6009
selector:
app: dataprep-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -188,57 +145,59 @@ spec:
labels:
app: embedding-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
name: embedding-dependency-deploy
args:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-dependency-deploy
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -256,16 +215,6 @@ spec:
labels:
app: embedding-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: embedding-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -273,32 +222,34 @@ spec:
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
args: null
ports:
- containerPort: 6000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: embedding-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-deploy
ports:
- name: service
port: 6000
targetPort: 6000
selector:
app: embedding-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -316,20 +267,8 @@ spec:
labels:
app: llm-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
name: llm-dependency-deploy-demo
securityContext:
capabilities:
add:
- SYS_NICE
args:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
@@ -340,16 +279,6 @@ spec:
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
@@ -361,38 +290,61 @@ spec:
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-dependency-deploy
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -410,16 +362,6 @@ spec:
labels:
app: llm-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: llm-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -427,32 +369,34 @@ spec:
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null
ports:
- containerPort: 9000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: llm-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-deploy
ports:
- name: service
port: 9000
targetPort: 9000
selector:
app: llm-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -470,35 +414,11 @@ spec:
labels:
app: reranking-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-dependency-deploy
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
name: reranking-dependency-deploy
args:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
@@ -512,38 +432,57 @@ spec:
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: reranking-dependency-deploy
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -561,16 +500,6 @@ spec:
labels:
app: reranking-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -578,32 +507,34 @@ spec:
image: opea/reranking-tei:latest
imagePullPolicy: IfNotPresent
name: reranking-deploy
args: null
ports:
- containerPort: 8000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: reranking-svc
namespace: default
spec:
type: ClusterIP
selector:
app: reranking-deploy
ports:
- name: service
port: 8000
targetPort: 8000
selector:
app: reranking-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -621,67 +552,41 @@ spec:
labels:
app: retriever-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: retriever-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_EMBEDDING_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: HUGGINGFACEHUB_API_TOKEN
valueFrom:
configMapKeyRef:
name: qna-config
key: HUGGINGFACEHUB_API_TOKEN
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
args: null
ports:
- containerPort: 7000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
type: ClusterIP
selector:
app: retriever-deploy
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -694,24 +599,32 @@ spec:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: vector-db
containers:
- name: vector-db
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
@@ -719,9 +632,6 @@ metadata:
name: vector-db
namespace: default
spec:
type: ClusterIP
selector:
app: vector-db
ports:
- name: vector-db-service
port: 6379
@@ -729,6 +639,7 @@ spec:
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -2,33 +2,27 @@
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVICE_HOST_IP: embedding-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVICE_HOST_IP: llm-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVICE_HOST_IP: reranking-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
RERANK_MODEL_ID: BAAI/bge-reranker-base
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
NODE_SELECTOR: chatqna-opea
LLM_SERVICE_HOST_IP: llm-svc
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -46,16 +40,6 @@ spec:
labels:
app: chatqna-backend-server-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -63,33 +47,35 @@ spec:
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
args: null
ports:
- containerPort: 8888
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
type: NodePort
selector:
app: chatqna-backend-server-deploy
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
nodePort: 30888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -107,70 +93,41 @@ spec:
labels:
app: dataprep-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: dataprep-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
args: null
ports:
- containerPort: 6007
- containerPort: 6008
- containerPort: 6009
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
type: ClusterIP
selector:
app: dataprep-deploy
ports:
- name: port1
port: 6007
targetPort: 6007
- name: port2
port: 6008
targetPort: 6008
- name: port3
port: 6009
targetPort: 6009
selector:
app: dataprep-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -188,57 +145,59 @@ spec:
labels:
app: embedding-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
name: embedding-dependency-deploy
args:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-dependency-deploy
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -256,16 +215,6 @@ spec:
labels:
app: embedding-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: embedding-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -273,32 +222,34 @@ spec:
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
args: null
ports:
- containerPort: 6000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: embedding-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-deploy
ports:
- name: service
port: 6000
targetPort: 6000
selector:
app: embedding-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -316,20 +267,8 @@ spec:
labels:
app: llm-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
name: llm-dependency-deploy-demo
securityContext:
capabilities:
add:
- SYS_NICE
args:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
@@ -340,16 +279,6 @@ spec:
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
@@ -361,38 +290,61 @@ spec:
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-dependency-deploy
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -410,16 +362,6 @@ spec:
labels:
app: llm-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: llm-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -427,32 +369,34 @@ spec:
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null
ports:
- containerPort: 9000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: llm-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-deploy
ports:
- name: service
port: 9000
targetPort: 9000
selector:
app: llm-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -470,35 +414,11 @@ spec:
labels:
app: reranking-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-dependency-deploy
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
name: reranking-dependency-deploy
args:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
@@ -512,38 +432,57 @@ spec:
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: reranking-dependency-deploy
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -561,16 +500,6 @@ spec:
labels:
app: reranking-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -578,32 +507,34 @@ spec:
image: opea/reranking-tei:latest
imagePullPolicy: IfNotPresent
name: reranking-deploy
args: null
ports:
- containerPort: 8000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: reranking-svc
namespace: default
spec:
type: ClusterIP
selector:
app: reranking-deploy
ports:
- name: service
port: 8000
targetPort: 8000
selector:
app: reranking-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -621,67 +552,41 @@ spec:
labels:
app: retriever-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: retriever-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_EMBEDDING_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: HUGGINGFACEHUB_API_TOKEN
valueFrom:
configMapKeyRef:
name: qna-config
key: HUGGINGFACEHUB_API_TOKEN
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
args: null
ports:
- containerPort: 7000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
type: ClusterIP
selector:
app: retriever-deploy
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -694,24 +599,32 @@ spec:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: vector-db
containers:
- name: vector-db
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
@@ -719,9 +632,6 @@ metadata:
name: vector-db
namespace: default
spec:
type: ClusterIP
selector:
app: vector-db
ports:
- name: vector-db-service
port: 6379
@@ -729,6 +639,7 @@ spec:
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -2,33 +2,27 @@
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVICE_HOST_IP: embedding-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVICE_HOST_IP: llm-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVICE_HOST_IP: reranking-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
RERANK_MODEL_ID: BAAI/bge-reranker-base
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
NODE_SELECTOR: chatqna-opea
LLM_SERVICE_HOST_IP: llm-svc
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -46,16 +40,6 @@ spec:
labels:
app: chatqna-backend-server-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -63,33 +47,35 @@ spec:
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
args: null
ports:
- containerPort: 8888
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
type: NodePort
selector:
app: chatqna-backend-server-deploy
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
nodePort: 30888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -107,70 +93,41 @@ spec:
labels:
app: dataprep-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: dataprep-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
args: null
ports:
- containerPort: 6007
- containerPort: 6008
- containerPort: 6009
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
type: ClusterIP
selector:
app: dataprep-deploy
ports:
- name: port1
port: 6007
targetPort: 6007
- name: port2
port: 6008
targetPort: 6008
- name: port3
port: 6009
targetPort: 6009
selector:
app: dataprep-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -188,57 +145,59 @@ spec:
labels:
app: embedding-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
name: embedding-dependency-deploy
args:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-dependency-deploy
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -256,16 +215,6 @@ spec:
labels:
app: embedding-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: embedding-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -273,32 +222,34 @@ spec:
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
args: null
ports:
- containerPort: 6000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: embedding-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-deploy
ports:
- name: service
port: 6000
targetPort: 6000
selector:
app: embedding-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -316,20 +267,8 @@ spec:
labels:
app: llm-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
name: llm-dependency-deploy-demo
securityContext:
capabilities:
add:
- SYS_NICE
args:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
@@ -340,16 +279,6 @@ spec:
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
@@ -361,38 +290,61 @@ spec:
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-dependency-deploy
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -410,16 +362,6 @@ spec:
labels:
app: llm-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: llm-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -427,32 +369,34 @@ spec:
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null
ports:
- containerPort: 9000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: llm-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-deploy
ports:
- name: service
port: 9000
targetPort: 9000
selector:
app: llm-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -470,35 +414,11 @@ spec:
labels:
app: reranking-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-dependency-deploy
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
name: reranking-dependency-deploy
args:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
@@ -512,38 +432,57 @@ spec:
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: reranking-dependency-deploy
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -561,16 +500,6 @@ spec:
labels:
app: reranking-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -578,32 +507,34 @@ spec:
image: opea/reranking-tei:latest
imagePullPolicy: IfNotPresent
name: reranking-deploy
args: null
ports:
- containerPort: 8000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: reranking-svc
namespace: default
spec:
type: ClusterIP
selector:
app: reranking-deploy
ports:
- name: service
port: 8000
targetPort: 8000
selector:
app: reranking-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -621,67 +552,41 @@ spec:
labels:
app: retriever-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: retriever-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_EMBEDDING_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: HUGGINGFACEHUB_API_TOKEN
valueFrom:
configMapKeyRef:
name: qna-config
key: HUGGINGFACEHUB_API_TOKEN
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
args: null
ports:
- containerPort: 7000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
type: ClusterIP
selector:
app: retriever-deploy
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -694,24 +599,32 @@ spec:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: vector-db
containers:
- name: vector-db
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
@@ -719,9 +632,6 @@ metadata:
name: vector-db
namespace: default
spec:
type: ClusterIP
selector:
app: vector-db
ports:
- name: vector-db-service
port: 6379
@@ -729,6 +639,7 @@ spec:
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,489 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-no-wrapper:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 63
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,489 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-no-wrapper:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 31
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,489 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-no-wrapper:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 7
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,489 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-no-wrapper:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 15
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,403 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-no-wrapper-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 64
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,403 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-no-wrapper-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 32
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,403 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-no-wrapper-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 8
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,403 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-no-wrapper-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 16
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -2,33 +2,27 @@
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVICE_HOST_IP: embedding-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVICE_HOST_IP: llm-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVICE_HOST_IP: reranking-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
RERANK_MODEL_ID: BAAI/bge-reranker-base
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
NODE_SELECTOR: chatqna-opea
LLM_SERVICE_HOST_IP: llm-svc
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -46,16 +40,6 @@ spec:
labels:
app: chatqna-backend-server-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -63,40 +47,42 @@ spec:
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
args: null
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 4000Mi
memory: 8000Mi
requests:
cpu: 8
memory: 4000Mi
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
type: NodePort
selector:
app: chatqna-backend-server-deploy
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
nodePort: 30888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -114,70 +100,41 @@ spec:
labels:
app: dataprep-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: dataprep-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
args: null
ports:
- containerPort: 6007
- containerPort: 6008
- containerPort: 6009
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
type: ClusterIP
selector:
app: dataprep-deploy
ports:
- name: port1
port: 6007
targetPort: 6007
- name: port2
port: 6008
targetPort: 6008
- name: port3
port: 6009
targetPort: 6009
selector:
app: dataprep-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -195,23 +152,17 @@ spec:
labels:
app: embedding-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
name: embedding-dependency-deploy
args:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
@@ -221,38 +172,46 @@ spec:
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-dependency-deploy
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -270,16 +229,6 @@ spec:
labels:
app: embedding-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: embedding-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -287,37 +236,38 @@ spec:
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
args: null
ports:
- containerPort: 6000
resources:
limits:
cpu: 4
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: embedding-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-deploy
ports:
- name: service
port: 6000
targetPort: 6000
selector:
app: embedding-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -335,20 +285,8 @@ spec:
labels:
app: llm-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
name: llm-dependency-deploy-demo
securityContext:
capabilities:
add:
- SYS_NICE
args:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
@@ -359,16 +297,6 @@ spec:
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
@@ -380,38 +308,61 @@ spec:
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-dependency-deploy
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -429,16 +380,6 @@ spec:
labels:
app: llm-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: llm-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -446,37 +387,38 @@ spec:
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null
ports:
- containerPort: 9000
resources:
limits:
cpu: 4
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: llm-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-deploy
ports:
- name: service
port: 9000
targetPort: 9000
selector:
app: llm-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -494,35 +436,11 @@ spec:
labels:
app: reranking-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-dependency-deploy
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
name: reranking-dependency-deploy
args:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
@@ -536,38 +454,57 @@ spec:
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: reranking-dependency-deploy
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -585,16 +522,6 @@ spec:
labels:
app: reranking-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -602,37 +529,38 @@ spec:
image: opea/reranking-tei:latest
imagePullPolicy: IfNotPresent
name: reranking-deploy
args: null
ports:
- containerPort: 8000
resources:
limits:
cpu: 4
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: reranking-svc
namespace: default
spec:
type: ClusterIP
selector:
app: reranking-deploy
ports:
- name: service
port: 8000
targetPort: 8000
selector:
app: reranking-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -650,74 +578,45 @@ spec:
labels:
app: retriever-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: retriever-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_EMBEDDING_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: HUGGINGFACEHUB_API_TOKEN
valueFrom:
configMapKeyRef:
name: qna-config
key: HUGGINGFACEHUB_API_TOKEN
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
args: null
ports:
- containerPort: 7000
resources:
limits:
cpu: 8
memory: 2500Mi
requests:
cpu: 8
memory: 2500Mi
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
type: ClusterIP
selector:
app: retriever-deploy
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -730,24 +629,32 @@ spec:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: vector-db
containers:
- name: vector-db
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
@@ -755,9 +662,6 @@ metadata:
name: vector-db
namespace: default
spec:
type: ClusterIP
selector:
app: vector-db
ports:
- name: vector-db-service
port: 6379
@@ -765,6 +669,7 @@ spec:
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -2,33 +2,27 @@
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVICE_HOST_IP: embedding-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVICE_HOST_IP: llm-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVICE_HOST_IP: reranking-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
RERANK_MODEL_ID: BAAI/bge-reranker-base
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
NODE_SELECTOR: chatqna-opea
LLM_SERVICE_HOST_IP: llm-svc
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -46,16 +40,6 @@ spec:
labels:
app: chatqna-backend-server-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -63,40 +47,42 @@ spec:
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
args: null
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 4000Mi
memory: 8000Mi
requests:
cpu: 8
memory: 4000Mi
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
type: NodePort
selector:
app: chatqna-backend-server-deploy
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
nodePort: 30888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -114,70 +100,41 @@ spec:
labels:
app: dataprep-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: dataprep-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
args: null
ports:
- containerPort: 6007
- containerPort: 6008
- containerPort: 6009
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
type: ClusterIP
selector:
app: dataprep-deploy
ports:
- name: port1
port: 6007
targetPort: 6007
- name: port2
port: 6008
targetPort: 6008
- name: port3
port: 6009
targetPort: 6009
selector:
app: dataprep-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -195,23 +152,17 @@ spec:
labels:
app: embedding-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
name: embedding-dependency-deploy
args:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
@@ -221,38 +172,46 @@ spec:
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-dependency-deploy
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -270,16 +229,6 @@ spec:
labels:
app: embedding-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: embedding-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -287,37 +236,38 @@ spec:
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
args: null
ports:
- containerPort: 6000
resources:
limits:
cpu: 4
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: embedding-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-deploy
ports:
- name: service
port: 6000
targetPort: 6000
selector:
app: embedding-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -335,20 +285,8 @@ spec:
labels:
app: llm-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
name: llm-dependency-deploy-demo
securityContext:
capabilities:
add:
- SYS_NICE
args:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
@@ -359,16 +297,6 @@ spec:
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
@@ -380,38 +308,61 @@ spec:
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-dependency-deploy
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -429,16 +380,6 @@ spec:
labels:
app: llm-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: llm-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -446,37 +387,38 @@ spec:
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null
ports:
- containerPort: 9000
resources:
limits:
cpu: 4
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: llm-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-deploy
ports:
- name: service
port: 9000
targetPort: 9000
selector:
app: llm-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -494,35 +436,11 @@ spec:
labels:
app: reranking-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-dependency-deploy
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
name: reranking-dependency-deploy
args:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
@@ -536,38 +454,57 @@ spec:
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: reranking-dependency-deploy
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -585,16 +522,6 @@ spec:
labels:
app: reranking-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -602,37 +529,38 @@ spec:
image: opea/reranking-tei:latest
imagePullPolicy: IfNotPresent
name: reranking-deploy
args: null
ports:
- containerPort: 8000
resources:
limits:
cpu: 4
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: reranking-svc
namespace: default
spec:
type: ClusterIP
selector:
app: reranking-deploy
ports:
- name: service
port: 8000
targetPort: 8000
selector:
app: reranking-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -650,74 +578,45 @@ spec:
labels:
app: retriever-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: retriever-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_EMBEDDING_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: HUGGINGFACEHUB_API_TOKEN
valueFrom:
configMapKeyRef:
name: qna-config
key: HUGGINGFACEHUB_API_TOKEN
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
args: null
ports:
- containerPort: 7000
resources:
limits:
cpu: 8
memory: 2500Mi
requests:
cpu: 8
memory: 2500Mi
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
type: ClusterIP
selector:
app: retriever-deploy
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -730,24 +629,32 @@ spec:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: vector-db
containers:
- name: vector-db
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
@@ -755,9 +662,6 @@ metadata:
name: vector-db
namespace: default
spec:
type: ClusterIP
selector:
app: vector-db
ports:
- name: vector-db-service
port: 6379
@@ -765,6 +669,7 @@ spec:
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -2,33 +2,27 @@
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVICE_HOST_IP: embedding-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVICE_HOST_IP: llm-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVICE_HOST_IP: reranking-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
RERANK_MODEL_ID: BAAI/bge-reranker-base
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
NODE_SELECTOR: chatqna-opea
LLM_SERVICE_HOST_IP: llm-svc
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -46,16 +40,6 @@ spec:
labels:
app: chatqna-backend-server-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -63,40 +47,42 @@ spec:
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
args: null
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 4000Mi
memory: 8000Mi
requests:
cpu: 8
memory: 4000Mi
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
type: NodePort
selector:
app: chatqna-backend-server-deploy
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
nodePort: 30888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -114,70 +100,41 @@ spec:
labels:
app: dataprep-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: dataprep-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
args: null
ports:
- containerPort: 6007
- containerPort: 6008
- containerPort: 6009
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
type: ClusterIP
selector:
app: dataprep-deploy
ports:
- name: port1
port: 6007
targetPort: 6007
- name: port2
port: 6008
targetPort: 6008
- name: port3
port: 6009
targetPort: 6009
selector:
app: dataprep-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -195,23 +152,17 @@ spec:
labels:
app: embedding-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
name: embedding-dependency-deploy
args:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
@@ -221,38 +172,46 @@ spec:
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-dependency-deploy
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -270,16 +229,6 @@ spec:
labels:
app: embedding-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: embedding-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -287,37 +236,38 @@ spec:
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
args: null
ports:
- containerPort: 6000
resources:
limits:
cpu: 4
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: embedding-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-deploy
ports:
- name: service
port: 6000
targetPort: 6000
selector:
app: embedding-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -335,20 +285,8 @@ spec:
labels:
app: llm-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
name: llm-dependency-deploy-demo
securityContext:
capabilities:
add:
- SYS_NICE
args:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
@@ -359,16 +297,6 @@ spec:
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
@@ -380,38 +308,61 @@ spec:
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-dependency-deploy
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -429,16 +380,6 @@ spec:
labels:
app: llm-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: llm-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -446,37 +387,38 @@ spec:
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null
ports:
- containerPort: 9000
resources:
limits:
cpu: 4
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: llm-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-deploy
ports:
- name: service
port: 9000
targetPort: 9000
selector:
app: llm-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -494,35 +436,11 @@ spec:
labels:
app: reranking-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-dependency-deploy
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
name: reranking-dependency-deploy
args:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
@@ -536,38 +454,57 @@ spec:
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
- hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: reranking-dependency-deploy
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -585,16 +522,6 @@ spec:
labels:
app: reranking-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
@@ -602,37 +529,38 @@ spec:
image: opea/reranking-tei:latest
imagePullPolicy: IfNotPresent
name: reranking-deploy
args: null
ports:
- containerPort: 8000
resources:
limits:
cpu: 4
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: reranking-svc
namespace: default
spec:
type: ClusterIP
selector:
app: reranking-deploy
ports:
- name: service
port: 8000
targetPort: 8000
selector:
app: reranking-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -650,74 +578,45 @@ spec:
labels:
app: retriever-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: retriever-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_EMBEDDING_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: HUGGINGFACEHUB_API_TOKEN
valueFrom:
configMapKeyRef:
name: qna-config
key: HUGGINGFACEHUB_API_TOKEN
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
args: null
ports:
- containerPort: 7000
resources:
limits:
cpu: 8
memory: 2500Mi
requests:
cpu: 8
memory: 2500Mi
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
kind: Service
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
type: ClusterIP
selector:
app: retriever-deploy
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -730,24 +629,32 @@ spec:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: vector-db
containers:
- name: vector-db
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
@@ -755,9 +662,6 @@ metadata:
name: vector-db
namespace: default
spec:
type: ClusterIP
selector:
app: vector-db
ports:
- name: vector-db-service
port: 6379
@@ -765,6 +669,7 @@ spec:
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,507 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 8
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-no-wrapper:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 8
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 63
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1280'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 8
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,507 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-no-wrapper:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 31
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1280'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,507 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-no-wrapper:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 7
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1280'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,507 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-no-wrapper:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 15
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1280'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,421 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-no-wrapper-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 64
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1280'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,421 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-no-wrapper-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 32
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1280'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,421 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-no-wrapper-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 8
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1280'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,421 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-no-wrapper-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 16
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1280'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -1,6 +1,7 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import argparse
import json
import os
import re
@@ -95,7 +96,7 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
next_data["texts"] = [doc["text"] for doc in data["retrieved_docs"]]
else:
# forward to llm
if not docs:
if not docs and with_rerank:
# delete the rerank from retriever -> rerank -> llm
for ds in reversed(runtime_graph.downstream(cur_node)):
for nds in runtime_graph.downstream(ds):
@@ -260,6 +261,13 @@ class ChatQnAService:
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--without-rerank", action="store_true")
args = parser.parse_args()
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
chatqna.add_remote_service()
# chatqna.add_remote_service_without_rerank()
if args.without_rerank:
chatqna.add_remote_service_without_rerank()
else:
chatqna.add_remote_service()

View File

@@ -29,6 +29,12 @@ services:
dockerfile: ./Dockerfile.no_wrapper
extends: chatqna
image: ${REGISTRY:-opea}/chatqna-no-wrapper:${TAG:-latest}
chatqna-no-wrapper-without-rerank:
build:
context: ../
dockerfile: ./Dockerfile.no_wrapper_without_rerank
extends: chatqna
image: ${REGISTRY:-opea}/chatqna-no-wrapper-without-rerank:${TAG:-latest}
chatqna-ui:
build:
context: ../ui