Compare commits
9 Commits
v1.0_aise_
...
llama3.2_s
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e6fde1456d | ||
|
|
954a22051b | ||
|
|
6f4b00f829 | ||
|
|
3fb60608b3 | ||
|
|
c35fe0b429 | ||
|
|
28f5e4a268 | ||
|
|
d55a33dda1 | ||
|
|
daf2a4fad7 | ||
|
|
3ce395582b |
13
.github/workflows/_example-workflow.yml
vendored
13
.github/workflows/_example-workflow.yml
vendored
@@ -46,33 +46,30 @@ jobs:
|
||||
- name: Clean Up Working Directory
|
||||
run: sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Get checkout ref
|
||||
- name: Get Checkout Ref
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
|
||||
echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
|
||||
else
|
||||
echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
|
||||
fi
|
||||
echo "checkout ref ${{ env.CHECKOUT_REF }}"
|
||||
|
||||
- name: Checkout out Repo
|
||||
- name: Checkout out GenAIExamples
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ env.CHECKOUT_REF }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Clone required Repo
|
||||
- name: Clone Required Repo
|
||||
run: |
|
||||
cd ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
|
||||
docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
|
||||
if [[ $(grep -c "tei-gaudi:" ${docker_compose_path}) != 0 ]]; then
|
||||
git clone https://github.com/huggingface/tei-gaudi.git
|
||||
fi
|
||||
if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
|
||||
git clone https://github.com/vllm-project/vllm.git
|
||||
cd vllm && git rev-parse HEAD && cd ../
|
||||
fi
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps && git checkout ${{ inputs.opea_branch }} && cd ../
|
||||
cd GenAIComps && git checkout ${{ inputs.opea_branch }} && git rev-parse HEAD && cd ../
|
||||
|
||||
- name: Build Image
|
||||
if: ${{ fromJSON(inputs.build) }}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-server:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-server
|
||||
ports:
|
||||
- "8085:80"
|
||||
@@ -13,12 +13,16 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -51,7 +51,7 @@ services:
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "3006:80"
|
||||
@@ -61,11 +61,15 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -25,7 +25,7 @@ The AudioQnA uses the below prebuilt images if you choose a Xeon deployment
|
||||
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
|
||||
For Gaudi:
|
||||
|
||||
- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
|
||||
- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
- whisper-gaudi: opea/whisper-gaudi:latest
|
||||
- speecht5-gaudi: opea/speecht5-gaudi:latest
|
||||
|
||||
|
||||
@@ -247,7 +247,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.2.0
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
|
||||
@@ -271,7 +271,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
@@ -303,6 +303,14 @@ spec:
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
@@ -315,7 +323,7 @@ spec:
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /home/sdp/cesg
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="audioqna whisper-gaudi asr llm-tgi speecht5-gaudi tts"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="audioqna whisper asr llm-tgi speecht5 tts"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -72,7 +72,7 @@ docker pull opea/chatqna-ui:latest
|
||||
|
||||
In following cases, you could build docker image from source by yourself.
|
||||
|
||||
- Failed to download the docker image. (The essential Docker image `opea/nginx` has not yet been released, users need to build this image first)
|
||||
- Failed to download the docker image.
|
||||
|
||||
- If you want to use a specific version of Docker image.
|
||||
|
||||
|
||||
@@ -0,0 +1,653 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 31
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '2048'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
selector:
|
||||
app: llm-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
selector:
|
||||
app: reranking-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -0,0 +1,653 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 7
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '2048'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
selector:
|
||||
app: llm-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
selector:
|
||||
app: reranking-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -0,0 +1,653 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 15
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '2048'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
selector:
|
||||
app: llm-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
selector:
|
||||
app: reranking-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -0,0 +1,742 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
INDEX_NAME: rag-redis
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
nodePort: 30888
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
- containerPort: 6008
|
||||
- containerPort: 6009
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
- name: port2
|
||||
port: 6008
|
||||
targetPort: 6008
|
||||
- name: port3
|
||||
port: 6009
|
||||
targetPort: 6009
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
name: embedding-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 32
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '2048'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
name: reranking-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: reranking-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: reranking-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_EMBEDDING_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: HUGGINGFACEHUB_API_TOKEN
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
containers:
|
||||
- name: vector-db
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: vector-db
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
|
||||
|
||||
---
|
||||
@@ -0,0 +1,591 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
INDEX_NAME: rag-redis
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
nodePort: 30888
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
- containerPort: 6008
|
||||
- containerPort: 6009
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
- name: port2
|
||||
port: 6008
|
||||
targetPort: 6008
|
||||
- name: port3
|
||||
port: 6009
|
||||
targetPort: 6009
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
name: embedding-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '2048'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_EMBEDDING_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: HUGGINGFACEHUB_API_TOKEN
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
containers:
|
||||
- name: vector-db
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: vector-db
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
|
||||
|
||||
---
|
||||
@@ -0,0 +1,591 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
INDEX_NAME: rag-redis
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
nodePort: 30888
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
- containerPort: 6008
|
||||
- containerPort: 6009
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
- name: port2
|
||||
port: 6008
|
||||
targetPort: 6008
|
||||
- name: port3
|
||||
port: 6009
|
||||
targetPort: 6009
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
name: embedding-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 16
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '2048'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_EMBEDDING_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: HUGGINGFACEHUB_API_TOKEN
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
containers:
|
||||
- name: vector-db
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: vector-db
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
|
||||
|
||||
---
|
||||
@@ -29,6 +29,8 @@ Results will be displayed in the terminal and saved as CSV file named `1_stats.c
|
||||
|
||||
## Getting Started
|
||||
|
||||
We recommend using Kubernetes to deploy the ChatQnA service, as it offers benefits such as load balancing and improved scalability. However, you can also deploy the service using Docker if that better suits your needs. Below is a description of Kubernetes deployment and benchmarking. For instructions on deploying and benchmarking with Docker, please refer to [this section](#benchmark-with-docker).
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md).
|
||||
@@ -187,10 +189,13 @@ curl -X POST "http://${cluster_ip}:6007/v1/dataprep" \
|
||||
|
||||
###### 3.2 Run Benchmark Test
|
||||
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
|
||||
```bash
|
||||
export USER_QUERIES="[4, 8, 16, 640]"
|
||||
export DEPLOYMENT_TYPE="k8s"
|
||||
export SERVICE_IP = None
|
||||
export SERVICE_PORT = None
|
||||
export USER_QUERIES="[640, 640, 640, 640]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_1"
|
||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||
```
|
||||
@@ -237,20 +242,22 @@ kubectl apply -f .
|
||||
|
||||
##### 3. Run tests
|
||||
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
|
||||
```bash
|
||||
export USER_QUERIES="[4, 8, 16, 1280]"
|
||||
````bash
|
||||
export DEPLOYMENT_TYPE="k8s"
|
||||
export SERVICE_IP = None
|
||||
export SERVICE_PORT = None
|
||||
export USER_QUERIES="[1280, 1280, 1280, 1280]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_2"
|
||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||
```
|
||||
|
||||
And then run the benchmark tool by:
|
||||
|
||||
```bash
|
||||
cd GenAIEval/evals/benchmark
|
||||
python benchmark.py
|
||||
```
|
||||
````
|
||||
|
||||
##### 4. Data collection
|
||||
|
||||
@@ -286,10 +293,13 @@ kubectl apply -f .
|
||||
|
||||
##### 3. Run tests
|
||||
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
|
||||
```bash
|
||||
export USER_QUERIES="[4, 8, 16, 2560]"
|
||||
export DEPLOYMENT_TYPE="k8s"
|
||||
export SERVICE_IP = None
|
||||
export SERVICE_PORT = None
|
||||
export USER_QUERIES="[2560, 2560, 2560, 2560]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_4"
|
||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||
```
|
||||
@@ -313,3 +323,80 @@ cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/single_gaudi
|
||||
kubectl delete -f .
|
||||
kubectl label nodes k8s-master k8s-worker1 k8s-worker2 k8s-worker3 node-type-
|
||||
```
|
||||
|
||||
## Benchmark with Docker
|
||||
|
||||
### Deploy ChatQnA service with Docker
|
||||
|
||||
In order to set up the environment correctly, you'll need to configure essential environment variables and, if applicable, proxy-related variables.
|
||||
|
||||
```bash
|
||||
# Example: host_ip="192.168.1.1"
|
||||
export host_ip="External_Public_IP"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy"
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||
```
|
||||
|
||||
#### Deploy ChatQnA on Gaudi
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Refer to the [Gaudi Guide](../../docker_compose/intel/hpu/gaudi/README.md) to build docker images from source.
|
||||
|
||||
#### Deploy ChatQnA on Xeon
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Refer to the [Xeon Guide](../../docker_compose/intel/cpu/xeon/README.md) for more instructions on building docker images from source.
|
||||
|
||||
#### Deploy ChatQnA on NVIDIA GPU
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker_compose/nvidia/gpu/
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Refer to the [NVIDIA GPU Guide](../../docker_compose/nvidia/gpu/README.md) for more instructions on building docker images from source.
|
||||
|
||||
### Run tests
|
||||
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
|
||||
```bash
|
||||
export DEPLOYMENT_TYPE="docker"
|
||||
export SERVICE_IP = "ChatQnA Service IP"
|
||||
export SERVICE_PORT = "ChatQnA Service Port"
|
||||
export USER_QUERIES="[640, 640, 640, 640]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/docker"
|
||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||
```
|
||||
|
||||
And then run the benchmark tool by:
|
||||
|
||||
```bash
|
||||
cd GenAIEval/evals/benchmark
|
||||
python benchmark.py
|
||||
```
|
||||
|
||||
### Data collection
|
||||
|
||||
All the test results will come to this folder `/home/sdp/benchmark_output/docker` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
|
||||
|
||||
### Clean up
|
||||
|
||||
Take gaudi as example, use the below command to clean up system.
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/docker_compose/intel/hpu/gaudi
|
||||
docker compose stop && docker compose rm -f
|
||||
echo y | docker system prune
|
||||
```
|
||||
|
||||
@@ -3,6 +3,9 @@
|
||||
|
||||
test_suite_config: # Overall configuration settings for the test suite
|
||||
examples: ["chatqna"] # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
|
||||
deployment_type: ${DEPLOYMENT_TYPE} # Default is "k8s", can also be "docker"
|
||||
service_ip: ${SERVICE_IP} # Leave as None for k8s, specify for Docker
|
||||
service_port: ${SERVICE_PORT} # Leave as None for k8s, specify for Docker
|
||||
concurrent_level: 5 # The concurrency level, adjustable based on requirements
|
||||
user_queries: ${USER_QUERIES} # Number of test requests at each concurrency level
|
||||
random_prompt: false # Use random prompts if true, fixed prompts if false
|
||||
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -0,0 +1,683 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 31
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1024'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
selector:
|
||||
app: llm-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
selector:
|
||||
app: reranking-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -0,0 +1,683 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 7
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1024'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
selector:
|
||||
app: llm-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
selector:
|
||||
app: reranking-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -0,0 +1,683 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 15
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1024'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
selector:
|
||||
app: llm-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
selector:
|
||||
app: reranking-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -0,0 +1,622 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
INDEX_NAME: rag-redis
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 4000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 4000Mi
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
nodePort: 30888
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
- containerPort: 6008
|
||||
- containerPort: 6009
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
- name: port2
|
||||
port: 6008
|
||||
targetPort: 6008
|
||||
- name: port3
|
||||
port: 6009
|
||||
targetPort: 6009
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
name: embedding-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 4
|
||||
requests:
|
||||
cpu: 4
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 32
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1024'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 4
|
||||
requests:
|
||||
cpu: 4
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_EMBEDDING_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: HUGGINGFACEHUB_API_TOKEN
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 2500Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 2500Mi
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
containers:
|
||||
- name: vector-db
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: vector-db
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
|
||||
|
||||
---
|
||||
@@ -0,0 +1,622 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
INDEX_NAME: rag-redis
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 4000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 4000Mi
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
nodePort: 30888
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
- containerPort: 6008
|
||||
- containerPort: 6009
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
- name: port2
|
||||
port: 6008
|
||||
targetPort: 6008
|
||||
- name: port3
|
||||
port: 6009
|
||||
targetPort: 6009
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
name: embedding-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 4
|
||||
requests:
|
||||
cpu: 4
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1024'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 4
|
||||
requests:
|
||||
cpu: 4
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_EMBEDDING_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: HUGGINGFACEHUB_API_TOKEN
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 2500Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 2500Mi
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
containers:
|
||||
- name: vector-db
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: vector-db
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
|
||||
|
||||
---
|
||||
@@ -0,0 +1,622 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
INDEX_NAME: rag-redis
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 4000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 4000Mi
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
nodePort: 30888
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
- containerPort: 6008
|
||||
- containerPort: 6009
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
- name: port2
|
||||
port: 6008
|
||||
targetPort: 6008
|
||||
- name: port3
|
||||
port: 6009
|
||||
targetPort: 6009
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
name: embedding-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 4
|
||||
requests:
|
||||
cpu: 4
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 16
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1024'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 4
|
||||
requests:
|
||||
cpu: 4
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_EMBEDDING_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: HUGGINGFACEHUB_API_TOKEN
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 2500Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 2500Mi
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
containers:
|
||||
- name: vector-db
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: vector-db
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
|
||||
|
||||
---
|
||||
@@ -19,7 +19,7 @@ opea_micro_services:
|
||||
tei-embedding-service:
|
||||
host: ${TEI_EMBEDDING_SERVICE_IP}
|
||||
ports: ${TEI_EMBEDDING_SERVICE_PORT}
|
||||
image: opea/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
@@ -48,7 +48,7 @@ opea_micro_services:
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
@@ -56,10 +56,13 @@ opea_micro_services:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
environment:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
model-id: ${LLM_MODEL_ID}
|
||||
llm:
|
||||
host: ${LLM_SERVICE_HOST_IP}
|
||||
|
||||
@@ -49,7 +49,7 @@ docker pull opea/chatqna-ui:latest
|
||||
|
||||
In following cases, you could build docker image from source by yourself.
|
||||
|
||||
- Failed to download the docker image. (The essential Docker image `opea/nginx` has not yet been released, users need to build this image first)
|
||||
- Failed to download the docker image.
|
||||
|
||||
- If you want to use a specific version of Docker image.
|
||||
|
||||
@@ -233,7 +233,7 @@ For users in China who are unable to download models directly from Huggingface,
|
||||
export HF_TOKEN=${your_hf_token}
|
||||
export HF_ENDPOINT="https://hf-mirror.com"
|
||||
model_name="Intel/neural-chat-7b-v3-3"
|
||||
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.2.0 --model-id $model_name
|
||||
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id $model_name
|
||||
```
|
||||
|
||||
2. Offline
|
||||
@@ -247,7 +247,7 @@ For users in China who are unable to download models directly from Huggingface,
|
||||
```bash
|
||||
export HF_TOKEN=${your_hf_token}
|
||||
export model_path="/path/to/model"
|
||||
docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.2.0 --model-id /data
|
||||
docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id /data
|
||||
```
|
||||
|
||||
### Setup Environment Variables
|
||||
|
||||
@@ -69,7 +69,7 @@ services:
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-server
|
||||
ports:
|
||||
- "6041:80"
|
||||
|
||||
@@ -25,7 +25,7 @@ services:
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-embedding-server
|
||||
ports:
|
||||
- "6006:80"
|
||||
@@ -75,7 +75,7 @@ services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
|
||||
@@ -50,7 +50,7 @@ docker pull opea/chatqna-ui:latest
|
||||
|
||||
In following cases, you could build docker image from source by yourself.
|
||||
|
||||
- Failed to download the docker image. (The essential Docker image `opea/nginx` has not yet been released, users need to build this image first)
|
||||
- Failed to download the docker image.
|
||||
|
||||
- If you want to use a specific version of Docker image.
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ services:
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
@@ -108,7 +108,7 @@ services:
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8005:80"
|
||||
@@ -118,11 +118,15 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: ${llm_service_devices}
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -25,7 +25,7 @@ services:
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tgi-guardrails-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-guardrails-server
|
||||
ports:
|
||||
- "8088:80"
|
||||
@@ -35,11 +35,15 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
@@ -60,7 +64,7 @@ services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-embedding-service:
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
@@ -141,7 +145,7 @@ services:
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
@@ -151,11 +155,15 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -25,7 +25,7 @@ services:
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
@@ -108,7 +108,7 @@ services:
|
||||
# HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
# restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8005:80"
|
||||
@@ -118,11 +118,15 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -25,7 +25,7 @@ services:
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
@@ -73,7 +73,7 @@ services:
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-gaudi-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
|
||||
@@ -25,7 +25,7 @@ services:
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
@@ -73,7 +73,7 @@ services:
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-gaudi-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
|
||||
@@ -25,7 +25,7 @@ services:
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
@@ -75,7 +75,7 @@ services:
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8005:80"
|
||||
@@ -85,11 +85,15 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -56,16 +56,16 @@ f810f3b4d329 opea/embedding-tei:latest "python e
|
||||
2fa17d84605f opea/dataprep-redis:latest "python prepare_doc_…" 2 minutes ago Up 2 minutes 0.0.0.0:6007->6007/tcp, :::6007->6007/tcp dataprep-redis-server
|
||||
69e1fb59e92c opea/retriever-redis:latest "/home/user/comps/re…" 2 minutes ago Up 2 minutes 0.0.0.0:7000->7000/tcp, :::7000->7000/tcp retriever-redis-server
|
||||
313b9d14928a opea/reranking-tei:latest "python reranking_te…" 2 minutes ago Up 2 minutes 0.0.0.0:8000->8000/tcp, :::8000->8000/tcp reranking-tei-gaudi-server
|
||||
05c40b636239 ghcr.io/huggingface/tgi-gaudi:1.2.1 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
|
||||
174bd43fa6b5 opea/tei-gaudi:latest "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, :::8090->80/tcp tei-embedding-gaudi-server
|
||||
05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
|
||||
174bd43fa6b5 ghcr.io/huggingface/tei-gaudi:latest "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, :::8090->80/tcp tei-embedding-gaudi-server
|
||||
74084469aa33 redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 2 minutes ago Up 2 minutes 0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp redis-vector-db
|
||||
88399dbc9e43 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8808->80/tcp, :::8808->80/tcp tei-reranking-gaudi-server
|
||||
```
|
||||
|
||||
In this case, `ghcr.io/huggingface/tgi-gaudi:1.2.1` Existed.
|
||||
In this case, `ghcr.io/huggingface/tgi-gaudi:2.0.5` Existed.
|
||||
|
||||
```
|
||||
05c40b636239 ghcr.io/huggingface/tgi-gaudi:1.2.1 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
|
||||
05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
|
||||
```
|
||||
|
||||
Next we can check the container logs to get to know what happened during the docker start.
|
||||
@@ -76,7 +76,7 @@ Check the log of container by:
|
||||
|
||||
`docker logs <CONTAINER ID> -t`
|
||||
|
||||
View the logs of `ghcr.io/huggingface/tgi-gaudi:1.2.1`
|
||||
View the logs of `ghcr.io/huggingface/tgi-gaudi:2.0.5`
|
||||
|
||||
`docker logs 05c40b636239 -t`
|
||||
|
||||
@@ -105,7 +105,7 @@ So just make sure the devices are available.
|
||||
Here is another failure example:
|
||||
|
||||
```
|
||||
f7a08f9867f9 ghcr.io/huggingface/tgi-gaudi:1.2.1 "text-generation-lau…" 16 seconds ago Exited (2) 14 seconds ago tgi-gaudi-server
|
||||
f7a08f9867f9 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 16 seconds ago Exited (2) 14 seconds ago tgi-gaudi-server
|
||||
```
|
||||
|
||||
Check the log by `docker logs f7a08f9867f9 -t`.
|
||||
@@ -122,7 +122,7 @@ View the docker input parameters in `./ChatQnA/docker_compose/intel/hpu/gaudi/co
|
||||
|
||||
```
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:1.2.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
@@ -131,9 +131,13 @@ View the docker input parameters in `./ChatQnA/docker_compose/intel/hpu/gaudi/co
|
||||
environment:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -50,7 +50,7 @@ docker pull opea/chatqna-ui:latest
|
||||
|
||||
In following cases, you could build docker image from source by yourself.
|
||||
|
||||
- Failed to download the docker image. (The essential Docker image `opea/nginx` has not yet been released, users need to build this image first)
|
||||
- Failed to download the docker image.
|
||||
|
||||
- If you want to use a specific version of Docker image.
|
||||
|
||||
|
||||
@@ -125,12 +125,6 @@ services:
|
||||
dockerfile: comps/guardrails/llama_guard/langchain/Dockerfile
|
||||
extends: chatqna
|
||||
image: ${REGISTRY:-opea}/guardrails-tgi:${TAG:-latest}
|
||||
tei-gaudi:
|
||||
build:
|
||||
context: tei-gaudi
|
||||
dockerfile: Dockerfile-hpu
|
||||
extends: chatqna
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
vllm:
|
||||
build:
|
||||
context: vllm
|
||||
|
||||
@@ -27,8 +27,8 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
|
||||
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
|
||||
For Gaudi:
|
||||
|
||||
- tei-embedding-service: opea/tei-gaudi:latest
|
||||
- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
|
||||
- tei-embedding-service: ghcr.io/huggingface/tei-gaudi:latest
|
||||
- tgi-service: gghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
|
||||
> [NOTE]
|
||||
> Please refer to [Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker_compose/intel/cpu/xeon/README.md) or [Gaudi README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker_compose/intel/hpu/gaudi/README.md) to build the OPEA images. These too will be available on Docker Hub soon to simplify use.
|
||||
|
||||
@@ -1474,7 +1474,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.2.0"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -1554,7 +1554,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.2.0"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -1477,7 +1477,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -1558,7 +1558,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -1298,7 +1298,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -17,14 +17,14 @@ ip_address=$(hostname -I | awk '{print $1}')
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="chatqna-guardrails chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-tgi tei-gaudi guardrails-tgi"
|
||||
service_list="chatqna-guardrails chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-tgi guardrails-tgi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -17,14 +17,14 @@ ip_address=$(hostname -I | awk '{print $1}')
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="chatqna-no-wrapper chatqna-ui dataprep-redis retriever-redis tei-gaudi"
|
||||
service_list="chatqna-no-wrapper chatqna-ui dataprep-redis retriever-redis"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="chatqna-no-wrapper chatqna-ui chatqna-conversation-ui dataprep-redis retriever-redis"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
docker images && sleep 1s
|
||||
|
||||
@@ -17,14 +17,14 @@ ip_address=$(hostname -I | awk '{print $1}')
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-tgi tei-gaudi nginx"
|
||||
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-tgi nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="chatqna chatqna-ui chatqna-conversation-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-tgi nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
docker images && sleep 1s
|
||||
|
||||
@@ -17,13 +17,13 @@ ip_address=$(hostname -I | awk '{print $1}')
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei tei-gaudi llm-vllm-hpu llm-vllm"
|
||||
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-vllm-hpu llm-vllm"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ function build_docker_images() {
|
||||
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-vllm vllm"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
docker images && sleep 1s
|
||||
|
||||
@@ -17,13 +17,13 @@ ip_address=$(hostname -I | awk '{print $1}')
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei tei-gaudi llm-vllm-ray-hpu llm-vllm-ray"
|
||||
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-vllm-ray-hpu llm-vllm-ray"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -17,14 +17,14 @@ ip_address=$(hostname -I | awk '{print $1}')
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="chatqna-without-rerank chatqna-ui dataprep-redis embedding-tei retriever-redis llm-tgi tei-gaudi"
|
||||
service_list="chatqna-without-rerank chatqna-ui dataprep-redis embedding-tei retriever-redis llm-tgi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="chatqna-without-rerank chatqna-ui chatqna-conversation-ui dataprep-redis embedding-tei retriever-redis llm-tgi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
docker images && sleep 1s
|
||||
|
||||
@@ -6,7 +6,7 @@ opea_micro_services:
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
@@ -17,7 +17,11 @@ opea_micro_services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
model-id: ${LLM_MODEL_ID}
|
||||
llm:
|
||||
host: ${LLM_SERVICE_HOST_IP}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8028:80"
|
||||
@@ -15,7 +15,11 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -405,7 +405,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="codegen codegen-ui llm-tgi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ opea_micro_services:
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
@@ -17,7 +17,11 @@ opea_micro_services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
model-id: ${LLM_MODEL_ID}
|
||||
llm:
|
||||
host: ${LLM_SERVICE_HOST_IP}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: codetrans-tgi-service
|
||||
ports:
|
||||
- "8008:80"
|
||||
@@ -15,7 +15,11 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -405,7 +405,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="codetrans codetrans-ui llm-tgi nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ services:
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
|
||||
@@ -35,9 +35,3 @@ services:
|
||||
dockerfile: comps/dataprep/redis/langchain/Dockerfile
|
||||
extends: doc-index-retriever
|
||||
image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
|
||||
tei-gaudi:
|
||||
build:
|
||||
context: tei-gaudi
|
||||
dockerfile: Dockerfile-hpu
|
||||
extends: doc-index-retriever
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
|
||||
@@ -19,14 +19,12 @@ function build_docker_images() {
|
||||
if [ ! -d "GenAIComps" ] ; then
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
fi
|
||||
if [ ! -d "tei-gaudi" ] ; then
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
fi
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull redis/redis-stack:7.2.0-v9
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
@@ -11,8 +11,11 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
|
||||
@@ -6,7 +6,7 @@ opea_micro_services:
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
@@ -17,7 +17,11 @@ opea_micro_services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
model-id: ${LLM_MODEL_ID}
|
||||
llm:
|
||||
host: ${LLM_SERVICE_HOST_IP}
|
||||
|
||||
@@ -7,9 +7,9 @@ Install GMC in your Kubernetes cluster, if you have not already done so, by foll
|
||||
|
||||
The DocSum application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not it starts them and then proceeds to connect them. When the DocSum RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular embedding, retriever, rerank, and llm.
|
||||
|
||||
The DocSum pipeline uses prebuilt images. The Xeon version uses the prebuilt image llm-docsum-tgi:latest which internally leverages the
|
||||
the image ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
|
||||
service tgi-gaudi-svc, which uses the image ghcr.io/huggingface/tgi-gaudi:1.2.1. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use Intel/neural-chat-7b-v3-3.
|
||||
The DocSum pipeline uses prebuilt images. The Xeon version uses the prebuilt image `llm-docsum-tgi:latest` which internally leverages the
|
||||
the image `ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
|
||||
service tgi-gaudi-svc, which uses the image `ghcr.io/huggingface/tgi-gaudi:2.0.5`. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use `Intel/neural-chat-7b-v3-3`.
|
||||
|
||||
[NOTE]
|
||||
Refer to [Docker Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/DocSum/docker_compose/intel/cpu/xeon/README.md) or
|
||||
@@ -17,7 +17,7 @@ Refer to [Docker Xeon README](https://github.com/opea-project/GenAIExamples/blob
|
||||
These will be available on Docker Hub soon, simplifying installation.
|
||||
|
||||
## Deploy the RAG pipeline
|
||||
This involves deploying the application pipeline custom resource. You can use docsum_xeon.yaml if you have just a Xeon cluster or docsum_gaudi.yaml if you have a Gaudi cluster.
|
||||
This involves deploying the application pipeline custom resource. You can use `docsum_xeon.yaml` if you have just a Xeon cluster or `docsum_gaudi.yaml` if you have a Gaudi cluster.
|
||||
|
||||
1. Setup Environment variables. These are specific to the user. Skip the proxy settings if you are not operating behind one.
|
||||
|
||||
|
||||
@@ -405,7 +405,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="docsum docsum-ui llm-docsum-tgi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
@@ -18,6 +18,10 @@ services:
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
PREFILL_BATCH_BUCKET_SIZE: 1
|
||||
BATCH_BUCKET_SIZE: 8
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -6,7 +6,7 @@ opea_micro_services:
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
image: ghcr.io/huggingface/tgi-gaudi:1.2.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
@@ -14,10 +14,13 @@ opea_micro_services:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
environment:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
model-id: ${LLM_MODEL_ID}
|
||||
llm:
|
||||
host: ${LLM_SERVICE_HOST_IP}
|
||||
|
||||
@@ -39,7 +39,15 @@ spec:
|
||||
value: "8"
|
||||
- name: PORT
|
||||
value: "80"
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
@@ -72,7 +80,7 @@ spec:
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /home/sdp/cesg
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="faqgen faqgen-ui llm-faqgen-tgi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
@@ -1,23 +1,39 @@
|
||||
# OPEA Productivity Suite Application
|
||||
# Productivity Suite Application
|
||||
|
||||
OPEA Productivity Suite streamlines your workflow to boost productivity. It leverages the OPEA microservices to provide a comprehensive suite of features to cater to the diverse needs of modern enterprises.
|
||||
Productivity Suite, a tool designed to streamline your workflow and boost productivity! Our application leverages the power of OPEA microservices to deliver a comprehensive suite of features tailored to meet the diverse needs of modern enterprises.
|
||||
|
||||
## Key Features
|
||||
---
|
||||
|
||||
- Chat with Documents: Engage in intelligent conversations with your documents using our advanced RAG Capabilities. Our Retrieval-Augmented Generation (RAG) model allows you to ask questions, receive relevant information, and gain insights from your documents in real-time.
|
||||
## 🛠️ Key Features
|
||||
|
||||
- Content Summarization: Save time and effort by automatically summarizing lengthy documents or articles, enabling you to quickly grasp the key takeaways.
|
||||
### 💬 Chat with Documents
|
||||
|
||||
- FAQ Generation: Effortlessly create comprehensive FAQs based on your documents, ensuring that your users have access to the information they need.
|
||||
Engage in intelligent conversations with your documents using our advanced **Retrieval-Augmented Generation (RAG)** capabilities. Ask questions, receive relevant information, and gain insights from your documents in real-time!
|
||||
|
||||
- Code Generation: Boost your coding productivity with our code generation feature. Simply provide a description of the functionality you require, and the application will generate the corresponding code snippets, saving you valuable time and effort.
|
||||
### 📄 Content Summarization
|
||||
|
||||
- User Context Management: Maintain a seamless workflow by managing your user's context within the application. Our context management system keeps track of your documents and chat history, allowing for personalized experiences.
|
||||
Summarize lengthy documents or articles, enabling you to grasp key takeaways quickly. Save time and effort with our intelligent summarization feature!
|
||||
|
||||
- Identity and access management: uses the open source platform Keycloak for single sign-on identity and access management.
|
||||
### ❓ FAQ Generation
|
||||
|
||||
Refer to the [Keycloak Configuration Guide](./docker_compose/intel/cpu/xeon/keycloak_setup_guide.md) for instructions to setup Keycloak.
|
||||
Effortlessly create comprehensive FAQs based on your documents. Ensure your users have access to the information they need with minimal effort!
|
||||
|
||||
Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for instructions to build docker images from source and running the application via docker compose.
|
||||
### 💻 Code Generation
|
||||
|
||||
Refer to the [Xeon Kubernetes Guide](./kubernetes/intel/README.md) for instructions to deploy the application via kubernetes.
|
||||
Boost your coding productivity by providing a description of the functionality you require. Our application generates corresponding code snippets, saving you valuable time and effort!
|
||||
|
||||
### 🎛️ User Context Management
|
||||
|
||||
Maintain a seamless workflow by managing your user's context within the application. Our context management system keeps track of documents and chat history for a personalized experience.
|
||||
|
||||
### 🔐 Identity and Access Management
|
||||
|
||||
Utilizes the open-source platform **Keycloak** for single sign-on identity and access management. This ensures secure and convenient access to your productivity tools.
|
||||
|
||||
---
|
||||
|
||||
## 📚 Setup Guide
|
||||
|
||||
- **[Keycloak Configuration Guide](./docker_compose/intel/cpu/xeon/keycloak_setup_guide.md)**: Instructions to set up Keycloak for identity and access management.
|
||||
- **[Xeon Guide](./docker_compose/intel/cpu/xeon/README.md)**: Instructions to build Docker images from source and run the application via Docker Compose.
|
||||
- **[Xeon Kubernetes Guide](./kubernetes/intel/README.md)**: Instructions to deploy the application via Kubernetes.
|
||||
|
||||
@@ -2,7 +2,9 @@
|
||||
|
||||
This document outlines the deployment process for OPEA Productivity Suite utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server and [GenAIExamples](https://github.com/opea-project/GenAIExamples.git) solutions. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service.
|
||||
|
||||
## 🚀 Build Docker Images
|
||||
---
|
||||
|
||||
## 🐳 Build Docker Images
|
||||
|
||||
First of all, you need to build Docker Images locally and install the python package of it.
|
||||
|
||||
@@ -38,15 +40,12 @@ docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_pr
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
|
||||
|
||||
```
|
||||
|
||||
### 6. Build Prompt Registry Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/promptregistry-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/mongo/Dockerfile .
|
||||
|
||||
|
||||
```
|
||||
|
||||
### 7. Build Chat History Image
|
||||
@@ -100,6 +99,8 @@ cd GenAIExamples/ProductivitySuite/ui
|
||||
docker build --no-cache -t ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml docker/Dockerfile.react .
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Start Microservices
|
||||
|
||||
### Setup Environment Variables
|
||||
@@ -184,17 +185,19 @@ Note: Please replace with `host_ip` with you external IP address, do not use loc
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ProductivitySuite/docker_compose/intel/cpu/xeon
|
||||
```
|
||||
|
||||
```bash
|
||||
docker compose -f compose.yaml up -d
|
||||
```
|
||||
|
||||
### Setup Keycloak
|
||||
---
|
||||
|
||||
Please refer to [keycloak_setup_guide](keycloak_setup_guide.md) for more detail related to Keycloak configuration setup.
|
||||
### 🔐 Setup Keycloak
|
||||
|
||||
### Validate Microservices
|
||||
Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more detail related to Keycloak configuration setup.
|
||||
|
||||
---
|
||||
|
||||
### ✅ Validate Microservices
|
||||
|
||||
1. TEI Embedding Service
|
||||
|
||||
@@ -474,6 +477,8 @@ Please refer to [keycloak_setup_guide](keycloak_setup_guide.md) for more detail
|
||||
"user": "test", "id":"{Conversation id to Delete}"}'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Launch the UI
|
||||
|
||||
To access the frontend, open the following URL in your browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
|
||||
@@ -490,57 +495,60 @@ Here is an example of running Productivity Suite
|
||||

|
||||

|
||||
|
||||
## 🧐 Features
|
||||
---
|
||||
|
||||
## 🛠️ Key Features
|
||||
|
||||
Here're some of the project's features:
|
||||
|
||||
### CHAT QNA
|
||||
### 💬ChatQnA
|
||||
|
||||
- Start a Text Chat:Initiate a text chat with the ability to input written conversations, where the dialogue content can also be customized based on uploaded files.
|
||||
- Context Awareness: The AI assistant maintains the context of the conversation, understanding references to previous statements or questions. This allows for more natural and coherent exchanges.
|
||||
- **Start a Text Chat**:Initiate a text chat with the ability to input written conversations, where the dialogue content can also be customized based on uploaded files.
|
||||
- **Context Awareness**: The AI assistant maintains the context of the conversation, understanding references to previous statements or questions. This allows for more natural and coherent exchanges.
|
||||
|
||||
### DATA SOURCE
|
||||
### 🎛️ Data Source
|
||||
|
||||
- The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
|
||||
- Uploaded File would get listed and user would be able add or remove file/links
|
||||
- **File Upload or Remote Link**: The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
|
||||
- **File Management**:Uploaded File would get listed and user would be able add or remove file/links
|
||||
|
||||
#### Screen Shot
|
||||
#### Screenshots
|
||||
|
||||

|
||||
|
||||
- Clear: Clear the record of the current dialog box without retaining the contents of the dialog box.
|
||||
- Chat history: Historical chat records can still be retained after refreshing, making it easier for users to view the context.
|
||||
- Conversational Chat : The application maintains a history of the conversation, allowing users to review previous messages and the AI to refer back to earlier points in the dialogue when necessary.
|
||||
- **Clear Chat**: Clear the record of the current dialog box without retaining the contents of the dialog box.
|
||||
- **Chat history**: Historical chat records can still be retained after refreshing, making it easier for users to view the context.
|
||||
- **Conversational Chat**: The application maintains a history of the conversation, allowing users to review previous messages and the AI to refer back to earlier points in the dialogue when necessary.
|
||||
|
||||
#### Screen Shots
|
||||
#### Screenshots
|
||||
|
||||

|
||||

|
||||
|
||||
### CODEGEN
|
||||
### 💻 Codegen
|
||||
|
||||
- Generate code: generate the corresponding code based on the current user's input.
|
||||
- **Generate code**: generate the corresponding code based on the current user's input.
|
||||
|
||||
Screen Shot
|
||||

|
||||
#### Screenshots
|
||||
|
||||
### DOC SUMMARY
|
||||

|
||||
|
||||
- Summarizing Uploaded Files: Upload files from their local device, then click 'Generate Summary' to summarize the content of the uploaded file. The summary will be displayed on the 'Summary' box.
|
||||
- Summarizing Text via Pasting: Paste the text to be summarized into the text box, then click 'Generate Summary' to produce a condensed summary of the content, which will be displayed in the 'Summary' box on the right.
|
||||
- Scroll to Bottom: The summarized content will automatically scroll to the bottom.
|
||||
### 📚 Document Summarization
|
||||
|
||||
#### Screen Shot
|
||||
- **Summarizing Uploaded Files**: Upload files from their local device, then click 'Generate Summary' to summarize the content of the uploaded file. The summary will be displayed on the 'Summary' box.
|
||||
- **Summarizing Text via Pasting**: Paste the text to be summarized into the text box, then click 'Generate Summary' to produce a condensed summary of the content, which will be displayed in the 'Summary' box on the right.
|
||||
- **Scroll to Bottom**: The summarized content will automatically scroll to the bottom.
|
||||
|
||||
#### Screenshots
|
||||
|
||||

|
||||

|
||||
|
||||
### FAQ Generator
|
||||
### ❓ FAQ Generator
|
||||
|
||||
- Generate FAQs from Text via Pasting: Paste the text to into the text box, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.
|
||||
- **Generate FAQs from Text via Pasting**: Paste the text to into the text box, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.
|
||||
|
||||
- Generate FAQs from Text via txt file Upload: Upload the file in the Upload bar, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.
|
||||
- **Generate FAQs from Text via txt file Upload**: Upload the file in the Upload bar, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.
|
||||
|
||||
#### Screen Shot
|
||||
#### Screenshots
|
||||
|
||||

|
||||
|
||||
@@ -72,9 +72,7 @@ services:
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-retriever-service"
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
@@ -1,21 +1,27 @@
|
||||
# Keycloak Configuration Setup
|
||||
# 🔐 Keycloak Configuration Setup
|
||||
|
||||
This document show you step-by-step how to configure Keycloak settings.
|
||||
This README document provides a comprehensive, step-by-step guide on how to configure **Keycloak** settings. The user management is facilitated via Keycloak, and the configuration is outlined below:
|
||||
|
||||
The user management is done via Keycloak and the configuration steps look like this:
|
||||
|
||||
1. Access the Keycloak admin console via url http:${host_ip}:8080 or endpoint that exposed from your kubernetes cluster to configure user. Use default username(admin) and password(admin) to login.
|
||||
1. Access the Keycloak admin console via url http:${host_ip}:8080 or endpoint that is exposed from your Kubernetes cluster to configure users. Use the default username(**admin**) and password(**admin**) to login.
|
||||

|
||||
|
||||
2. Create a new realm named **productivitysuite** within Keycloak.
|
||||

|
||||
|
||||

|
||||
|
||||
3. Create a new client called **productivitysuite** with default configurations.
|
||||

|
||||
4. Select the **productivitysuite** client that created just now. Insert your ProductivitySuite UI url endpoint into "Valid redirect URIs" and "Web origins" field. Example as screenshot below:
|
||||
|
||||
4. Select the **productivitysuite** client that you just created. Insert your ProductivitySuite UI url endpoint into **"Valid redirect URIs"** and **"Web origins"** field. Refer to screenshot below as an example:
|
||||

|
||||
5. From the left pane select the Realm roles and create a new role name as user and another new role as viewer.
|
||||
|
||||
5. From the left pane, select the Realm roles and create a new role named **user** and another new role as **viewer**.
|
||||

|
||||
6. Create a new user name as for example mary and another user as bob. Set passwords for both users (set 'Temporary' to 'Off'). Select Role mapping on the top, assign the user role to mary and assign the viewer role to bob.
|
||||
|
||||
6. Create a new user named, for example, **mary** and another user as **bob**. Set passwords for both users (set **'Temporary'** to **'Off'**).Select **Role mapping** on the top, assign the user role to mary and assign the viewer role to bob.
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Deploy ProductivitySuite with ReactUI
|
||||
# 🚀 Deploy ProductivitySuite with ReactUI
|
||||
|
||||
The document outlines the deployment steps for ProductivitySuite via Kubernetes cluster while utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline components and ReactUI, a popular React-based user interface library.
|
||||
|
||||
@@ -16,12 +16,14 @@ In ProductivitySuite, it consists of following pipelines/examples and components
|
||||
- keycloak
|
||||
```
|
||||
|
||||
## Prerequisites for Deploying ProductivitySuite with ReactUI
|
||||
---
|
||||
|
||||
## ⚠️ Prerequisites for Deploying ProductivitySuite with ReactUI
|
||||
To begin with, ensure that you have following prerequisites in place:
|
||||
|
||||
1. Kubernetes installation: Make sure that you have Kubernetes installed.
|
||||
2. Images: Make sure you have all the images ready for the examples and components stated above. You may refer to [README](../../docker_compose/intel/cpu/xeon/README.md) for steps to build the images.
|
||||
3. Configuration Values: Set the following values in all the yaml files before proceeding with the deployment:
|
||||
1. ☸ Kubernetes installation: Make sure that you have Kubernetes installed.
|
||||
2. 🐳 Images: Make sure you have all the images ready for the examples and components stated above. You may refer to [README](../../docker_compose/intel/cpu/xeon/README.md) for steps to build the images.
|
||||
3. 🔧 Configuration Values: Set the following values in all the yaml files before proceeding with the deployment:
|
||||
|
||||
a. HUGGINGFACEHUB_API_TOKEN (Your HuggingFace token to download your desired model from HuggingFace):
|
||||
```
|
||||
@@ -42,20 +44,26 @@ To begin with, ensure that you have following prerequisites in place:
|
||||
# Look for ENDPOINT in the yaml and insert all the url endpoint for all the required backend service.
|
||||
```
|
||||
|
||||
4. MODEL_ID and model-volume (OPTIONAL): You may as well customize the "MODEL_ID" to use different model and model-volume for the volume to be mounted.
|
||||
4. MODEL_ID and model-volume **(OPTIONAL)**: You may as well customize the "MODEL_ID" to use different model and model-volume for the volume to be mounted.
|
||||
5. After finish with steps above, you can proceed with the deployment of the yaml file.
|
||||
|
||||
## Deploying ProductivitySuite
|
||||
---
|
||||
|
||||
## 🌐 Deploying ProductivitySuite
|
||||
You can use yaml files in xeon folder to deploy ProductivitySuite with reactUI.
|
||||
```
|
||||
cd GenAIExamples/ProductivitySuite/kubernetes/intel/cpu/xeon/manifests/
|
||||
kubectl apply -f *.yaml
|
||||
```
|
||||
|
||||
## User Management via Keycloak Configuration
|
||||
Please refer to [keycloak_setup_guide](../../docker_compose/intel/cpu/xeon/keycloak_setup_guide.md) for more detail related to Keycloak configuration setup.
|
||||
---
|
||||
|
||||
## Verify Services
|
||||
## 🔐 User Management via Keycloak Configuration
|
||||
Please refer to **[keycloak_setup_guide](../../docker_compose/intel/cpu/xeon/keycloak_setup_guide.md)** for more detail related to Keycloak configuration setup.
|
||||
|
||||
---
|
||||
|
||||
## ✅ Verify Services
|
||||
To verify the installation, run command 'kubectl get pod' to make sure all pods are running.
|
||||
|
||||
To view all the available services, run command 'kubectl get svc' to obtain ports that need to used as backend service endpoint in productivity_suite_reactui.yaml.
|
||||
|
||||
@@ -993,7 +993,7 @@ spec:
|
||||
name: chatqna-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.1.0"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -229,7 +229,7 @@ spec:
|
||||
name: codegen-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:1.4"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -229,7 +229,7 @@ spec:
|
||||
name: docsum-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.1.0"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -138,7 +138,7 @@ spec:
|
||||
- configMapRef:
|
||||
name: faqgen-tgi-config
|
||||
securityContext: {}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.1.0"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -53,20 +53,20 @@ function start_services() {
|
||||
export TGI_LLM_ENDPOINT_CODEGEN="http://${ip_address}:8028"
|
||||
export TGI_LLM_ENDPOINT_FAQGEN="http://${ip_address}:9009"
|
||||
export TGI_LLM_ENDPOINT_DOCSUM="http://${ip_address}:9009"
|
||||
export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna"
|
||||
export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${host_ip}:8889/v1/faqgen"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
|
||||
export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen"
|
||||
export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${host_ip}:8890/v1/docsum"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
|
||||
export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create"
|
||||
export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create"
|
||||
export CHAT_HISTORY_DELETE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/delete"
|
||||
export CHAT_HISTORY_GET_ENDPOINT="http://${host_ip}:6012/v1/chathistory/get"
|
||||
export PROMPT_SERVICE_GET_ENDPOINT="http://${host_ip}:6015/v1/prompt/get"
|
||||
export PROMPT_SERVICE_CREATE_ENDPOINT="http://${host_ip}:6015/v1/prompt/create"
|
||||
export KEYCLOAK_SERVICE_ENDPOINT="http://${host_ip}:8080"
|
||||
export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${ip_address}:8888/v1/chatqna"
|
||||
export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${ip_address}:8889/v1/faqgen"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6009/v1/dataprep/delete_file"
|
||||
export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${ip_address}:7778/v1/codegen"
|
||||
export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${ip_address}:8890/v1/docsum"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6008/v1/dataprep/get_file"
|
||||
export CHAT_HISTORY_CREATE_ENDPOINT="http://${ip_address}:6012/v1/chathistory/create"
|
||||
export CHAT_HISTORY_CREATE_ENDPOINT="http://${ip_address}:6012/v1/chathistory/create"
|
||||
export CHAT_HISTORY_DELETE_ENDPOINT="http://${ip_address}:6012/v1/chathistory/delete"
|
||||
export CHAT_HISTORY_GET_ENDPOINT="http://${ip_address}:6012/v1/chathistory/get"
|
||||
export PROMPT_SERVICE_GET_ENDPOINT="http://${ip_address}:6015/v1/prompt/get"
|
||||
export PROMPT_SERVICE_CREATE_ENDPOINT="http://${ip_address}:6015/v1/prompt/create"
|
||||
export KEYCLOAK_SERVICE_ENDPOINT="http://${ip_address}:8080"
|
||||
export MONGO_HOST=${ip_address}
|
||||
export MONGO_PORT=27017
|
||||
export DB_NAME="opea"
|
||||
@@ -235,7 +235,7 @@ function validate_microservices() {
|
||||
|
||||
# FAQGen llm microservice
|
||||
validate_service \
|
||||
"${ip_address}:${LLM_SERVICE_HOST_PORT_FAQGEN}/v1/faqgen" \
|
||||
"${ip_address}:9002/v1/faqgen" \
|
||||
"data: " \
|
||||
"llm_faqgen" \
|
||||
"llm-faqgen-server" \
|
||||
@@ -243,7 +243,7 @@ function validate_microservices() {
|
||||
|
||||
# Docsum llm microservice
|
||||
validate_service \
|
||||
"${ip_address}:${LLM_SERVICE_HOST_PORT_DOCSUM}/v1/chat/docsum" \
|
||||
"${ip_address}:9003/v1/chat/docsum" \
|
||||
"data: " \
|
||||
"llm_docsum" \
|
||||
"llm-docsum-server" \
|
||||
@@ -251,7 +251,7 @@ function validate_microservices() {
|
||||
|
||||
# CodeGen llm microservice
|
||||
validate_service \
|
||||
"${ip_address}:${LLM_SERVICE_HOST_PORT_CODEGEN}/v1/chat/completions" \
|
||||
"${ip_address}:9001/v1/chat/completions" \
|
||||
"data: " \
|
||||
"llm_codegen" \
|
||||
"llm-tgi-server-codegen" \
|
||||
|
||||
@@ -32,18 +32,7 @@ docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$ht
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Build TEI Gaudi Image
|
||||
|
||||
Since a TEI Gaudi Docker image hasn't been published, we'll need to build it from the [tei-guadi](https://github.com/huggingface/tei-gaudi) repository.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
cd tei-gaudi/
|
||||
docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest .
|
||||
cd ../..
|
||||
```
|
||||
|
||||
### 6. Build MegaService Docker Image
|
||||
### 5. Build MegaService Docker Image
|
||||
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `searchqna.py` Python script. Build the MegaService Docker image using the command below:
|
||||
|
||||
@@ -62,12 +51,11 @@ docker build --no-cache -t opea/searchqna:latest --build-arg https_proxy=$https_
|
||||
|
||||
Then run the command `docker images`, you will have
|
||||
|
||||
1. `opea/tei-gaudi:latest`
|
||||
2. `opea/embedding-tei:latest`
|
||||
3. `opea/web-retriever-chroma:latest`
|
||||
4. `opea/reranking-tei:latest`
|
||||
5. `opea/llm-tgi:latest`
|
||||
6. `opea/searchqna:latest`
|
||||
1. `opea/embedding-tei:latest`
|
||||
2. `opea/web-retriever-chroma:latest`
|
||||
3. `opea/reranking-tei:latest`
|
||||
4. `opea/llm-tgi:latest`
|
||||
5. `opea/searchqna:latest`
|
||||
|
||||
## 🚀 Set the environment variables
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tei-embedding-service:
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "3001:80"
|
||||
@@ -80,7 +80,7 @@ services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "3006:80"
|
||||
@@ -90,11 +90,15 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -41,9 +41,3 @@ services:
|
||||
dockerfile: comps/llms/text-generation/tgi/Dockerfile
|
||||
extends: searchqna
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
tei-gaudi:
|
||||
build:
|
||||
context: tei-gaudi
|
||||
dockerfile: Dockerfile-hpu
|
||||
extends: searchqna
|
||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
||||
|
||||
@@ -17,14 +17,14 @@ ip_address=$(hostname -I | awk '{print $1}')
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="searchqna searchqna-ui embedding-tei web-retriever-chroma reranking-tei llm-tgi tei-gaudi"
|
||||
service_list="searchqna searchqna-ui embedding-tei web-retriever-chroma reranking-tei llm-tgi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -73,10 +73,10 @@ function start_services() {
|
||||
|
||||
|
||||
function validate_megaservice() {
|
||||
result=$(http_proxy="" curl http://${ip_address}:3008/v1/searchqna -XPOST -d '{"messages": "How many gold medals does USA win in olympics 2024? Give me also the source link.", "stream": "False"}' -H 'Content-Type: application/json')
|
||||
result=$(http_proxy="" curl http://${ip_address}:3008/v1/searchqna -XPOST -d '{"messages": "What is black myth wukong?", "stream": "False"}' -H 'Content-Type: application/json')
|
||||
echo $result
|
||||
|
||||
if [[ $result == *"2024"* ]]; then
|
||||
if [[ $result == *"the"* ]]; then
|
||||
docker logs web-retriever-chroma-server > ${LOG_PATH}/web-retriever-chroma-server.log
|
||||
docker logs searchqna-gaudi-backend-server > ${LOG_PATH}/searchqna-gaudi-backend-server.log
|
||||
docker logs tei-embedding-gaudi-server > ${LOG_PATH}/tei-embedding-gaudi-server.log
|
||||
|
||||
@@ -71,10 +71,10 @@ function start_services() {
|
||||
|
||||
|
||||
function validate_megaservice() {
|
||||
result=$(http_proxy="" curl http://${ip_address}:3008/v1/searchqna -XPOST -d '{"messages": "How many gold medals does USA win in olympics 2024? Give me also the source link.", "stream": "False"}' -H 'Content-Type: application/json')
|
||||
result=$(http_proxy="" curl http://${ip_address}:3008/v1/searchqna -XPOST -d '{"messages": "What is black myth wukong?", "stream": "False"}' -H 'Content-Type: application/json')
|
||||
echo $result
|
||||
|
||||
if [[ $result == *"2024"* ]]; then
|
||||
if [[ $result == *"the"* ]]; then
|
||||
docker logs web-retriever-chroma-server
|
||||
docker logs searchqna-xeon-backend-server
|
||||
echo "Result correct."
|
||||
|
||||
@@ -3,18 +3,23 @@
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
@@ -31,6 +36,7 @@ services:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
@@ -47,6 +53,7 @@ services:
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
@@ -61,6 +68,7 @@ services:
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="translation translation-ui llm-tgi nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -166,7 +166,7 @@ function main() {
|
||||
|
||||
validate_microservices
|
||||
validate_megaservice
|
||||
validate_frontend
|
||||
#validate_frontend
|
||||
|
||||
stop_docker
|
||||
echo y | docker system prune
|
||||
|
||||
@@ -6,7 +6,7 @@ opea_micro_services:
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
@@ -14,10 +14,17 @@ opea_micro_services:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
model-id: ${LLM_MODEL_ID}
|
||||
llm:
|
||||
host: ${LLM_SERVICE_HOST_IP}
|
||||
@@ -25,6 +32,9 @@ opea_micro_services:
|
||||
image: opea/llm-tgi:latest
|
||||
endpoint: /v1/chat/completions
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ui:
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
llava-tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-llava-gaudi-server
|
||||
ports:
|
||||
- "8399:80"
|
||||
@@ -17,6 +17,11 @@ services:
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
@@ -216,7 +216,7 @@ spec:
|
||||
name: visualqna-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.2.0"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
|
||||
@@ -21,7 +21,7 @@ function build_docker_images() {
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user