Compare commits

...

14 Commits

Author SHA1 Message Date
Zhenzhong1
cc5be6c6a3 add manifests wit wrapper 2024-09-28 20:00:53 -07:00
sri-intel
2de1bfc5bb Bug fix for issue #881 (#882)
Signed-off-by: srinarayan-srikanthan <srinarayan.srikanthan@intel.com>
2024-09-27 13:06:02 +08:00
sri-intel
75df2c9979 docker install instruction for csp (#843)
Signed-off-by: sri <srinarayan.srikanthan@intel.com>
Signed-off-by: srinarayan-srikanthan <srinarayan.srikanthan@intel.com>
2024-09-27 13:00:10 +08:00
minmin-intel
62e06a0aff Update DocIndexRetriever Example to allow user passing in retriever/reranker params (#880)
Signed-off-by: minmin-intel <minmin.hou@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-09-27 10:21:54 +08:00
jotpalch
bd32b03e3c Doc: Update folder path to correct location in "Deploy ChatQnA in Kubernetes" (#875) 2024-09-26 14:38:22 +08:00
xiguiw
9d0b49c2d6 [doc] Update AIPC document (#874)
Signed-off-by: Wang, Xigui <xigui.wang@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-09-26 14:28:16 +08:00
XinyaoWa
75ce2a3ca6 remove old knowledgegraphs link (#876)
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
2024-09-26 14:23:42 +08:00
David Kinder
99c10933b4 doc: fix doc heading (#873)
Signed-off-by: David B. Kinder <david.b.kinder@intel.com>
2024-09-26 12:33:57 +09:00
Pranav Singh
8bcd82e82d [docs] Fixes Typo in Gaudi Docker Images Setup (#868)
Signed-off-by: Pranav Singh <pranav.singh@intel.com>
2024-09-25 17:47:59 +08:00
Zhenzhong1
c1038d2193 [ChatQnA] Deploy ChatQnA for benchmarking with different configurations. (#870) 2024-09-25 16:47:44 +08:00
lvliang-intel
33b9d4e421 Remove redundant code and update tgi version (#871)
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
2024-09-25 15:33:33 +08:00
chen, suyue
c9553c6f9a Clean up CI jobs (#872)
Signed-off-by: chensuyue <suyue.chen@intel.com>
2024-09-25 14:59:14 +08:00
David Kinder
3e796ba73d doc: fix missing references to README.md (#860)
Signed-off-by: David B. Kinder <david.b.kinder@intel.com>
2024-09-24 21:40:42 +08:00
Hoong Tee, Yeoh
5ed776709d [docker_images_list]: Update images information (#864)
Signed-off-by: Yeoh, Hoong Tee <hoong.tee.yeoh@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-09-24 21:39:42 +08:00
96 changed files with 9034 additions and 294 deletions

View File

@@ -11,23 +11,23 @@ on:
required: true
type: string
examples:
default: "Translation"
description: 'List of examples to publish [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
default: ""
description: 'List of examples to publish [AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA]'
required: false
type: string
images:
default: "gmcmanager,gmcrouter"
description: 'List of images to publish [gmcmanager,gmcrouter, ...]'
default: ""
description: 'List of images to publish [gmcmanager,gmcrouter]'
required: false
type: string
tag:
default: "v0.9"
description: "Tag to publish"
default: "rc"
description: "Tag to publish, like [1.0rc]"
required: true
type: string
publish_tags:
default: "latest,v0.9"
description: 'Tag list apply to publish images'
default: "latest,1.x"
description: "Tag list apply to publish images, like [latest,1.0]"
required: false
type: string

View File

@@ -11,13 +11,13 @@ on:
required: true
type: string
examples:
default: "ChatQnA"
description: 'List of examples to scan [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
default: ""
description: 'List of examples to publish "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"'
required: false
type: string
images:
default: "gmcmanager,gmcrouter"
description: 'List of images to scan [gmcmanager,gmcrouter, ...]'
default: ""
description: 'List of images to publish "gmcmanager,gmcrouter"'
required: false
type: string
tag:

View File

@@ -1,54 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Manifests Validate
on:
pull_request:
branches: [main]
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- "**/kubernetes/manifests/**"
- .github/workflows/manifest-validate.yml
workflow_dispatch:
# If there is a new commit, the previous jobs will be canceled
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
MANIFEST_DIR: "manifests"
jobs:
manifests-validate:
runs-on: ubuntu-latest
steps:
- name: Checkout out Repo
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: changed files
id: changed_files
run: |
set -xe
changed_folder=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} | \
grep "kubernetes/manifests" | grep -vE '.github|README.md|*.txt|*.sh' | cut -d'/' -f1 | sort -u )
echo "changed_folder: $changed_folder"
if [ -z "$changed_folder" ]; then
echo "No changes in manifests folder"
echo "SKIP=true" >> $GITHUB_OUTPUT
exit 0
fi
echo "SKIP=false" >> $GITHUB_OUTPUT
for folder in $changed_folder; do
folder_str="$folder_str $folder/kubernetes/manifests/"
done
echo "folder_str=$folder_str"
echo "folder_str=$folder_str" >> $GITHUB_ENV
- uses: docker://ghcr.io/yannh/kubeconform:latest
if: steps.changed_files.outputs.SKIP == 'false'
with:
args: "-summary -output json ${{env.folder_str}}"

View File

@@ -9,7 +9,6 @@ on:
paths:
- "**.py"
- "**Dockerfile"
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-on-push

View File

@@ -18,8 +18,6 @@ repos:
SearchQnA/ui/svelte/tsconfig.json|
DocSum/ui/svelte/tsconfig.json
)$
- id: check-yaml
args: [--allow-multiple-documents]
- id: debug-statements
- id: requirements-txt-fixer
- id: trailing-whitespace

View File

@@ -103,4 +103,4 @@ curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: app
## How to register your own tools with agent
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain#5-customize-agent-strategy).
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md#5-customize-agent-strategy).

View File

@@ -14,7 +14,7 @@ We evaluate the WER (Word Error Rate) metric of the ASR microservice.
### Launch ASR microservice
Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr).
Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr/whisper/README.md).
```bash
git clone https://github.com/opea-project/GenAIComps

View File

@@ -4,7 +4,7 @@ This document outlines the deployment process for a AudioQnA application utilizi
The AudioQnA Service leverages a Kubernetes operator called genai-microservices-connector(GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere.
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
The AudioQnA application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not starts them and then proceeds to connect them. When the AudioQnA pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular `asr`, `tts`, and `llm`.

View File

@@ -16,6 +16,8 @@ Quick Start Deployment Steps:
2. Run Docker Compose.
3. Consume the ChatQnA Service.
Note: If you do not have docker installed you can run this script to install docker : `bash docker_compose/install_docker.sh`
### Quick Start: 1.Setup Environment Variable
To set up environment variables for deploying ChatQnA services, follow these steps:
@@ -240,7 +242,7 @@ Refer to the [Kubernetes Guide](./kubernetes/intel/README.md) for instructions o
Install Helm (version >= 3.15) first. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
Refer to the [ChatQnA helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi.
Refer to the [ChatQnA helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna/README.md) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi.
### Deploy ChatQnA on AI PC
@@ -306,7 +308,7 @@ Two ways of consuming ChatQnA Service:
## Troubleshooting
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker_compose/intel/cpu/xeon#validate-microservices) first. A simple example:
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker_compose/intel/cpu/xeon/README.md#validate-microservices) first. A simple example:
```bash
http_proxy="" curl ${host_ip}:6006/embed -X POST -d '{"inputs":"What is Deep Learning?"}' -H 'Content-Type: application/json'

View File

@@ -275,10 +275,6 @@ spec:
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
@@ -288,20 +284,12 @@ spec:
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HUGGING_FACE_HUB_TOKEN
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -275,10 +275,6 @@ spec:
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
@@ -288,20 +284,12 @@ spec:
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HUGGING_FACE_HUB_TOKEN
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -275,10 +275,6 @@ spec:
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
@@ -288,20 +284,12 @@ spec:
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HUGGING_FACE_HUB_TOKEN
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -323,7 +323,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
name: llm-dependency-deploy-demo
securityContext:
capabilities:
@@ -336,10 +336,6 @@ spec:
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
@@ -359,16 +355,8 @@ spec:
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HUGGING_FACE_HUB_TOKEN
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
serviceAccountName: default
volumes:
- name: model-volume
@@ -491,7 +479,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tei-gaudi:latest
image: opea/tei-gaudi:latest
name: reranking-dependency-deploy
args:
- --model-id

View File

@@ -323,7 +323,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
name: llm-dependency-deploy-demo
securityContext:
capabilities:
@@ -336,10 +336,6 @@ spec:
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
@@ -359,16 +355,8 @@ spec:
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HUGGING_FACE_HUB_TOKEN
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
serviceAccountName: default
volumes:
- name: model-volume

View File

@@ -323,7 +323,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
name: llm-dependency-deploy-demo
securityContext:
capabilities:
@@ -336,10 +336,6 @@ spec:
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
@@ -359,16 +355,8 @@ spec:
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HUGGING_FACE_HUB_TOKEN
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
serviceAccountName: default
volumes:
- name: model-volume

View File

@@ -90,7 +90,7 @@ find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL
### Benchmark tool preparation
The test uses the [benchmark tool](https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark) to do performance test. We need to set up benchmark tool at the master node of Kubernetes which is k8s-master.
The test uses the [benchmark tool](https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark/README.md) to do performance test. We need to set up benchmark tool at the master node of Kubernetes which is k8s-master.
```bash
# on k8s-master node

View File

@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View File

@@ -0,0 +1,27 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v2
name: chatqna-charts
description: A Helm chart for Kubernetes
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

View File

@@ -0,0 +1,38 @@
# ChatQnA Deployment
This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
## Getting Started
### Preparation
```bash
# on k8s-master node
cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
# Replace <your token> with your actual Hugging Face token and run the following command:
HUGGINGFACE_TOKEN=<your token>
find . -name '*.yaml' -type f -exec sed -i "s#\${HF_TOKEN}#${HUGGINGFACE_TOKEN}#g" {} \;
# Replace the following placeholders with the desired model IDs:
LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
RERANK_MODEL_ID=BAAI/bge-reranker-base
find . -name '*.yaml' -type f -exec sed -i "s#\$(LLM_MODEL_ID)#${LLM_MODEL_ID}#g" {} \;
find . -name '*.yaml' -type f -exec sed -i "s#\$(EMBEDDING_MODEL_ID)#${EMBEDDING_MODEL_ID}#g" {} \;
find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL_ID}#g" {} \;
```
### ChatQnA Installation
```bash
# Deploy a ChatQnA pipeline using the specified YAML configuration.
# To deploy with different configurations, simply provide a different YAML file.
helm install chatqna helm_charts/ -f helm_charts/oob_single_node.yaml
# Tips: To display rendered manifests according to the given yaml.
helm template chatqna helm_charts/ -f helm_charts/oob_single_node.yaml
```
Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.

View File

@@ -0,0 +1,237 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
config:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
deployments:
- name: chatqna-backend-server-deploy
spec:
image_name: opea/chatqna-no-wrapper
image_tag: latest
replicas: 1
ports:
- containerPort: 8888
- name: dataprep-deploy
spec:
image_name: opea/dataprep-redis
image_tag: latest
replicas: 1
ports:
- containerPort: 6007
- name: vector-db
spec:
image_name: redis/redis-stack
image_tag: 7.2.0-v9
replicas: 1
ports:
- containerPort: 6379
- containerPort: 8001
- name: retriever-deploy
spec:
image_name: opea/retriever-redis
image_tag: latest
replicas: 1
ports:
- containerPort: 7000
- name: embedding-dependency-deploy
spec:
image_name: ghcr.io/huggingface/text-embeddings-inference
image_tag: cpu-1.5
replicas: 1
ports:
- containerPort: 80
args:
- name: "--model-id"
value: $(EMBEDDING_MODEL_ID)
- name: "--auto-truncate"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
- name: reranking-dependency-deploy
spec:
image_name: opea/tei-gaudi
image_tag: latest
replicas: 1
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
- value: $(RERANK_MODEL_ID)
- name: "--auto-truncate"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: "512"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
- name: llm-dependency-deploy
spec:
image_name: ghcr.io/huggingface/tgi-gaudi
image_tag: 2.0.4
replicas: 7
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
value: $(LLM_MODEL_ID)
- name: "--max-input-length"
value: "2048"
- name: "--max-total-tokens"
value: "4096"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
services:
- name: chatqna-backend-server-svc
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
- name: dataprep-svc
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
- name: embedding-dependency-svc
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
- name: llm-dependency-svc
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
- name: reranking-dependency-svc
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
- name: retriever-svc
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
- name: vector-db
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP

View File

@@ -0,0 +1,25 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: qna-config
namespace: default
data:
EMBEDDING_MODEL_ID: {{ .Values.config.EMBEDDING_MODEL_ID }}
EMBEDDING_SERVER_HOST_IP: {{ .Values.config.EMBEDDING_SERVER_HOST_IP }}
HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }}
INDEX_NAME: {{ .Values.config.INDEX_NAME }}
LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
LLM_SERVER_HOST_IP: {{ .Values.config.LLM_SERVER_HOST_IP }}
NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
REDIS_URL: {{ .Values.config.REDIS_URL }}
RERANK_MODEL_ID: {{ .Values.config.RERANK_MODEL_ID }}
RERANK_SERVER_HOST_IP: {{ .Values.config.RERANK_SERVER_HOST_IP }}
RETRIEVER_SERVICE_HOST_IP: {{ .Values.config.RETRIEVER_SERVICE_HOST_IP }}
TEI_EMBEDDING_ENDPOINT: {{ .Values.config.TEI_EMBEDDING_ENDPOINT }}
TEI_ENDPOINT: {{ .Values.config.TEI_ENDPOINT }}
TEI_RERANKING_ENDPOINT: {{ .Values.config.TEI_RERANKING_ENDPOINT }}
TGI_LLM_ENDPOINT: {{ .Values.config.TGI_LLM_ENDPOINT }}
---

View File

@@ -0,0 +1,108 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
{{- range $deployment := .Values.deployments }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ $deployment.name }}
namespace: default
spec:
replicas: {{ $deployment.spec.replicas }}
selector:
matchLabels:
app: {{ $deployment.name }}
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: {{ $deployment.name }}
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
{{- if $deployment.spec.args }}
args:
{{- range $arg := $deployment.spec.args }}
{{- if $arg.name }}
- {{ $arg.name }}
{{- end }}
{{- if $arg.value }}
- "{{ $arg.value }}"
{{- end }}
{{- end }}
{{- end }}
{{- if $deployment.spec.env }}
env:
{{- range $env := $deployment.spec.env }}
- name: {{ $env.name }}
value: "{{ $env.value }}"
{{- end }}
{{- end }}
image: {{ $deployment.spec.image_name }}:{{ $deployment.spec.image_tag }}
imagePullPolicy: IfNotPresent
name: {{ $deployment.name }}
{{- if $deployment.spec.ports }}
ports:
{{- range $port := $deployment.spec.ports }}
{{- range $port_name, $port_id := $port }}
- {{ $port_name }}: {{ $port_id }}
{{- end }}
{{- end }}
{{- end }}
{{- if $deployment.spec.resources }}
resources:
{{- range $resourceType, $resource := $deployment.spec.resources }}
{{ $resourceType }}:
{{- range $limitType, $limit := $resource }}
{{ $limitType }}: {{ $limit }}
{{- end }}
{{- end }}
{{- end }}
{{- if $deployment.spec.volumeMounts }}
volumeMounts:
{{- range $volumeMount := $deployment.spec.volumeMounts }}
- mountPath: {{ $volumeMount.mountPath }}
name: {{ $volumeMount.name }}
{{- end }}
{{- end }}
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: {{ $deployment.name }}
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
{{- if $deployment.spec.volumes }}
volumes:
{{- range $index, $volume := $deployment.spec.volumes }}
- name: {{ $volume.name }}
{{- if $volume.hostPath }}
hostPath:
path: {{ $volume.hostPath.path }}
type: {{ $volume.hostPath.type }}
{{- else if $volume.emptyDir }}
emptyDir:
medium: {{ $volume.emptyDir.medium }}
sizeLimit: {{ $volume.emptyDir.sizeLimit }}
{{- end }}
{{- end }}
{{- end }}
---
{{- end }}

View File

@@ -0,0 +1,24 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
{{- range $service := .Values.services }}
apiVersion: v1
kind: Service
metadata:
name: {{ $service.name }}
namespace: default
spec:
ports:
{{- range $port := $service.spec.ports }}
- name: {{ $port.name }}
{{- range $port_name, $port_id := $port }}
{{- if ne $port_name "name"}}
{{ $port_name }}: {{ $port_id }}
{{- end }}
{{- end }}
{{- end }}
selector:
app: {{ $service.spec.selector.app }}
type: {{ $service.spec.type }}
---
{{- end }}

View File

@@ -0,0 +1,259 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
config:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
deployments:
- name: chatqna-backend-server-deploy
spec:
image_name: opea/chatqna-no-wrapper
image_tag: latest
replicas: 2
ports:
- containerPort: 8888
resources:
limits:
cpu: "8"
memory: "8000Mi"
requests:
cpu: "8"
memory: "8000Mi"
- name: dataprep-deploy
spec:
image_name: opea/dataprep-redis
image_tag: latest
replicas: 1
ports:
- containerPort: 6007
- name: vector-db
spec:
image_name: redis/redis-stack
image_tag: 7.2.0-v9
replicas: 1
ports:
- containerPort: 6379
- containerPort: 8001
- name: retriever-deploy
spec:
image_name: opea/retriever-redis
image_tag: latest
replicas: 2
ports:
- containerPort: 7000
resources:
requests:
cpu: "4"
memory: "4000Mi"
- name: embedding-dependency-deploy
spec:
image_name: ghcr.io/huggingface/text-embeddings-inference
image_tag: cpu-1.5
replicas: 1
ports:
- containerPort: 80
args:
- name: "--model-id"
value: $(EMBEDDING_MODEL_ID)
- name: "--auto-truncate"
resources:
limits:
cpu: "80"
memory: "20000Mi"
requests:
cpu: "80"
memory: "20000Mi"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
- name: reranking-dependency-deploy
spec:
image_name: opea/tei-gaudi
image_tag: latest
replicas: 1
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
- value: $(RERANK_MODEL_ID)
- name: "--auto-truncate"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: "512"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
- name: llm-dependency-deploy
spec:
image_name: ghcr.io/huggingface/tgi-gaudi
image_tag: 2.0.4
replicas: 7
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
value: $(LLM_MODEL_ID)
- name: "--max-input-length"
value: "1280"
- name: "--max-total-tokens"
value: "2048"
- name: "--max-batch-total-tokens"
value: "65536"
- name: "--max-batch-prefill-tokens"
value: "4096"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
services:
- name: chatqna-backend-server-svc
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
- name: dataprep-svc
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
- name: embedding-dependency-svc
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
- name: llm-dependency-svc
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
- name: reranking-dependency-svc
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
- name: retriever-svc
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
- name: vector-db
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP

View File

@@ -0,0 +1,237 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
config:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
deployments:
- name: chatqna-backend-server-deploy
spec:
image_name: opea/chatqna-no-wrapper
image_tag: latest
replicas: 1
ports:
- containerPort: 8888
- name: dataprep-deploy
spec:
image_name: opea/dataprep-redis
image_tag: latest
replicas: 1
ports:
- containerPort: 6007
- name: vector-db
spec:
image_name: redis/redis-stack
image_tag: 7.2.0-v9
replicas: 1
ports:
- containerPort: 6379
- containerPort: 8001
- name: retriever-deploy
spec:
image_name: opea/retriever-redis
image_tag: latest
replicas: 1
ports:
- containerPort: 7000
- name: embedding-dependency-deploy
spec:
image_name: ghcr.io/huggingface/text-embeddings-inference
image_tag: cpu-1.5
replicas: 1
ports:
- containerPort: 80
args:
- name: "--model-id"
value: $(EMBEDDING_MODEL_ID)
- name: "--auto-truncate"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
- name: reranking-dependency-deploy
spec:
image_name: opea/tei-gaudi
image_tag: latest
replicas: 1
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
- value: $(RERANK_MODEL_ID)
- name: "--auto-truncate"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: "512"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
- name: llm-dependency-deploy
spec:
image_name: ghcr.io/huggingface/tgi-gaudi
image_tag: 2.0.4
replicas: 7
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
value: $(LLM_MODEL_ID)
- name: "--max-input-length"
value: "2048"
- name: "--max-total-tokens"
value: "4096"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
services:
- name: chatqna-backend-server-svc
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
- name: dataprep-svc
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
- name: embedding-dependency-svc
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
- name: llm-dependency-svc
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
- name: reranking-dependency-svc
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
- name: retriever-svc
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
- name: vector-db
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP

View File

@@ -237,7 +237,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -237,7 +237,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -237,7 +237,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -237,7 +237,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -237,7 +237,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -237,7 +237,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -237,7 +237,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -237,7 +237,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -0,0 +1,641 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVICE_HOST_IP: embedding-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVICE_HOST_IP: llm-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVICE_HOST_IP: reranking-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
ports:
- containerPort: 6000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: embedding-svc
namespace: default
spec:
ports:
- name: service
port: 6000
targetPort: 6000
selector:
app: embedding-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 31
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: llm-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
ports:
- containerPort: 9000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: llm-svc
namespace: default
spec:
ports:
- name: service
port: 9000
targetPort: 9000
selector:
app: llm-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/reranking-tei:latest
imagePullPolicy: IfNotPresent
name: reranking-deploy
ports:
- containerPort: 8000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: reranking-svc
namespace: default
spec:
ports:
- name: service
port: 8000
targetPort: 8000
selector:
app: reranking-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,641 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVICE_HOST_IP: embedding-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVICE_HOST_IP: llm-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVICE_HOST_IP: reranking-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
ports:
- containerPort: 6000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: embedding-svc
namespace: default
spec:
ports:
- name: service
port: 6000
targetPort: 6000
selector:
app: embedding-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 7
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: llm-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
ports:
- containerPort: 9000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: llm-svc
namespace: default
spec:
ports:
- name: service
port: 9000
targetPort: 9000
selector:
app: llm-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/reranking-tei:latest
imagePullPolicy: IfNotPresent
name: reranking-deploy
ports:
- containerPort: 8000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: reranking-svc
namespace: default
spec:
ports:
- name: service
port: 8000
targetPort: 8000
selector:
app: reranking-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,641 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVICE_HOST_IP: embedding-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVICE_HOST_IP: llm-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVICE_HOST_IP: reranking-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
ports:
- containerPort: 6000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: embedding-svc
namespace: default
spec:
ports:
- name: service
port: 6000
targetPort: 6000
selector:
app: embedding-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 15
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: llm-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
ports:
- containerPort: 9000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: llm-svc
namespace: default
spec:
ports:
- name: service
port: 9000
targetPort: 9000
selector:
app: llm-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/reranking-tei:latest
imagePullPolicy: IfNotPresent
name: reranking-deploy
ports:
- containerPort: 8000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: reranking-svc
namespace: default
spec:
ports:
- name: service
port: 8000
targetPort: 8000
selector:
app: reranking-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,730 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: qna-config
namespace: default
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
RERANK_MODEL_ID: BAAI/bge-reranker-base
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
NODE_SELECTOR: chatqna-opea
LLM_SERVICE_HOST_IP: llm-svc
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
args: null
ports:
- containerPort: 8888
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
type: NodePort
selector:
app: chatqna-backend-server-deploy
ports:
- name: service
port: 8888
targetPort: 8888
nodePort: 30888
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: dataprep-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
args: null
ports:
- containerPort: 6007
- containerPort: 6008
- containerPort: 6009
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: dataprep-svc
namespace: default
spec:
type: ClusterIP
selector:
app: dataprep-deploy
ports:
- name: port1
port: 6007
targetPort: 6007
- name: port2
port: 6008
targetPort: 6008
- name: port3
port: 6009
targetPort: 6009
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
name: embedding-dependency-deploy
args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: embedding-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-dependency-deploy
ports:
- name: service
port: 6006
targetPort: 80
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: embedding-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
args: null
ports:
- containerPort: 6000
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: embedding-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-deploy
ports:
- name: service
port: 6000
targetPort: 6000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 32
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
name: llm-dependency-deploy-demo
securityContext:
capabilities:
add:
- SYS_NICE
args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: llm-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-dependency-deploy
ports:
- name: service
port: 9009
targetPort: 80
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: llm-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: llm-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null
ports:
- containerPort: 9000
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: llm-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-deploy
ports:
- name: service
port: 9000
targetPort: 9000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-dependency-deploy
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
name: reranking-dependency-deploy
args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: reranking-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: reranking-dependency-deploy
ports:
- name: service
port: 8808
targetPort: 80
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/reranking-tei:latest
imagePullPolicy: IfNotPresent
name: reranking-deploy
args: null
ports:
- containerPort: 8000
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: reranking-svc
namespace: default
spec:
type: ClusterIP
selector:
app: reranking-deploy
ports:
- name: service
port: 8000
targetPort: 8000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: retriever-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_EMBEDDING_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: HUGGINGFACEHUB_API_TOKEN
valueFrom:
configMapKeyRef:
name: qna-config
key: HUGGINGFACEHUB_API_TOKEN
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
args: null
ports:
- containerPort: 7000
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: retriever-svc
namespace: default
spec:
type: ClusterIP
selector:
app: retriever-deploy
ports:
- name: service
port: 7000
targetPort: 7000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
labels:
app: vector-db
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: vector-db
containers:
- name: vector-db
image: redis/redis-stack:7.2.0-v9
ports:
- containerPort: 6379
- containerPort: 8001
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
type: ClusterIP
selector:
app: vector-db
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
---

View File

@@ -0,0 +1,579 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: qna-config
namespace: default
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
RERANK_MODEL_ID: BAAI/bge-reranker-base
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
NODE_SELECTOR: chatqna-opea
LLM_SERVICE_HOST_IP: llm-svc
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
args: null
ports:
- containerPort: 8888
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
type: NodePort
selector:
app: chatqna-backend-server-deploy
ports:
- name: service
port: 8888
targetPort: 8888
nodePort: 30888
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: dataprep-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
args: null
ports:
- containerPort: 6007
- containerPort: 6008
- containerPort: 6009
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: dataprep-svc
namespace: default
spec:
type: ClusterIP
selector:
app: dataprep-deploy
ports:
- name: port1
port: 6007
targetPort: 6007
- name: port2
port: 6008
targetPort: 6008
- name: port3
port: 6009
targetPort: 6009
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
name: embedding-dependency-deploy
args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: embedding-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-dependency-deploy
ports:
- name: service
port: 6006
targetPort: 80
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: embedding-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
args: null
ports:
- containerPort: 6000
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: embedding-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-deploy
ports:
- name: service
port: 6000
targetPort: 6000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 8
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
name: llm-dependency-deploy-demo
securityContext:
capabilities:
add:
- SYS_NICE
args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: llm-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-dependency-deploy
ports:
- name: service
port: 9009
targetPort: 80
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: llm-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: llm-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null
ports:
- containerPort: 9000
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: llm-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-deploy
ports:
- name: service
port: 9000
targetPort: 9000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: retriever-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_EMBEDDING_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: HUGGINGFACEHUB_API_TOKEN
valueFrom:
configMapKeyRef:
name: qna-config
key: HUGGINGFACEHUB_API_TOKEN
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
args: null
ports:
- containerPort: 7000
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: retriever-svc
namespace: default
spec:
type: ClusterIP
selector:
app: retriever-deploy
ports:
- name: service
port: 7000
targetPort: 7000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
labels:
app: vector-db
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: vector-db
containers:
- name: vector-db
image: redis/redis-stack:7.2.0-v9
ports:
- containerPort: 6379
- containerPort: 8001
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
type: ClusterIP
selector:
app: vector-db
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
---

View File

@@ -0,0 +1,579 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: qna-config
namespace: default
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
RERANK_MODEL_ID: BAAI/bge-reranker-base
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
NODE_SELECTOR: chatqna-opea
LLM_SERVICE_HOST_IP: llm-svc
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
args: null
ports:
- containerPort: 8888
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
type: NodePort
selector:
app: chatqna-backend-server-deploy
ports:
- name: service
port: 8888
targetPort: 8888
nodePort: 30888
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: dataprep-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
args: null
ports:
- containerPort: 6007
- containerPort: 6008
- containerPort: 6009
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: dataprep-svc
namespace: default
spec:
type: ClusterIP
selector:
app: dataprep-deploy
ports:
- name: port1
port: 6007
targetPort: 6007
- name: port2
port: 6008
targetPort: 6008
- name: port3
port: 6009
targetPort: 6009
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
name: embedding-dependency-deploy
args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: embedding-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-dependency-deploy
ports:
- name: service
port: 6006
targetPort: 80
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: embedding-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
args: null
ports:
- containerPort: 6000
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: embedding-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-deploy
ports:
- name: service
port: 6000
targetPort: 6000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 16
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
name: llm-dependency-deploy-demo
securityContext:
capabilities:
add:
- SYS_NICE
args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: llm-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-dependency-deploy
ports:
- name: service
port: 9009
targetPort: 80
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: llm-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: llm-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null
ports:
- containerPort: 9000
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: llm-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-deploy
ports:
- name: service
port: 9000
targetPort: 9000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: retriever-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_EMBEDDING_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: HUGGINGFACEHUB_API_TOKEN
valueFrom:
configMapKeyRef:
name: qna-config
key: HUGGINGFACEHUB_API_TOKEN
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
args: null
ports:
- containerPort: 7000
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: retriever-svc
namespace: default
spec:
type: ClusterIP
selector:
app: retriever-deploy
ports:
- name: service
port: 7000
targetPort: 7000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
labels:
app: vector-db
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: vector-db
containers:
- name: vector-db
image: redis/redis-stack:7.2.0-v9
ports:
- containerPort: 6379
- containerPort: 8001
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
type: ClusterIP
selector:
app: vector-db
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
---

View File

@@ -255,7 +255,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -255,7 +255,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -255,7 +255,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -255,7 +255,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -255,7 +255,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -255,7 +255,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -255,7 +255,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -255,7 +255,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -306,20 +306,12 @@ spec:
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HUGGING_FACE_HUB_TOKEN
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -306,20 +306,12 @@ spec:
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HUGGING_FACE_HUB_TOKEN
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -306,20 +306,12 @@ spec:
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HUGGING_FACE_HUB_TOKEN
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:

View File

@@ -342,7 +342,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
name: llm-dependency-deploy-demo
securityContext:
capabilities:
@@ -378,16 +378,8 @@ spec:
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HUGGING_FACE_HUB_TOKEN
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
serviceAccountName: default
volumes:
- name: model-volume

View File

@@ -342,7 +342,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
name: llm-dependency-deploy-demo
securityContext:
capabilities:
@@ -378,16 +378,8 @@ spec:
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HUGGING_FACE_HUB_TOKEN
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
serviceAccountName: default
volumes:
- name: model-volume

View File

@@ -342,7 +342,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
name: llm-dependency-deploy-demo
securityContext:
capabilities:
@@ -378,16 +378,8 @@ spec:
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HUGGING_FACE_HUB_TOKEN
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
serviceAccountName: default
volumes:
- name: model-volume

View File

@@ -0,0 +1,675 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVICE_HOST_IP: embedding-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVICE_HOST_IP: llm-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVICE_HOST_IP: reranking-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 76
memory: 20000Mi
requests:
cpu: 76
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: embedding-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
ports:
- containerPort: 6000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: embedding-svc
namespace: default
spec:
ports:
- name: service
port: 6000
targetPort: 6000
selector:
app: embedding-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 31
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1024'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: llm-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
ports:
- containerPort: 9000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: llm-svc
namespace: default
spec:
ports:
- name: service
port: 9000
targetPort: 9000
selector:
app: llm-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: reranking-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/reranking-tei:latest
imagePullPolicy: IfNotPresent
name: reranking-deploy
ports:
- containerPort: 8000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: reranking-svc
namespace: default
spec:
ports:
- name: service
port: 8000
targetPort: 8000
selector:
app: reranking-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,675 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVICE_HOST_IP: embedding-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVICE_HOST_IP: llm-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVICE_HOST_IP: reranking-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 76
memory: 20000Mi
requests:
cpu: 76
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
ports:
- containerPort: 6000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: embedding-svc
namespace: default
spec:
ports:
- name: service
port: 6000
targetPort: 6000
selector:
app: embedding-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 7
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1024'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: llm-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
ports:
- containerPort: 9000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: llm-svc
namespace: default
spec:
ports:
- name: service
port: 9000
targetPort: 9000
selector:
app: llm-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/reranking-tei:latest
imagePullPolicy: IfNotPresent
name: reranking-deploy
ports:
- containerPort: 8000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: reranking-svc
namespace: default
spec:
ports:
- name: service
port: 8000
targetPort: 8000
selector:
app: reranking-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,675 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVICE_HOST_IP: embedding-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVICE_HOST_IP: llm-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVICE_HOST_IP: reranking-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 76
memory: 20000Mi
requests:
cpu: 76
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: embedding-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
ports:
- containerPort: 6000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: embedding-svc
namespace: default
spec:
ports:
- name: service
port: 6000
targetPort: 6000
selector:
app: embedding-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 15
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1024'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: llm-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
ports:
- containerPort: 9000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: llm-svc
namespace: default
spec:
ports:
- name: service
port: 9000
targetPort: 9000
selector:
app: llm-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: reranking-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/reranking-tei:latest
imagePullPolicy: IfNotPresent
name: reranking-deploy
ports:
- containerPort: 8000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: reranking-svc
namespace: default
spec:
ports:
- name: service
port: 8000
targetPort: 8000
selector:
app: reranking-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -0,0 +1,614 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: qna-config
namespace: default
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
RERANK_MODEL_ID: BAAI/bge-reranker-base
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
NODE_SELECTOR: chatqna-opea
LLM_SERVICE_HOST_IP: llm-svc
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
args: null
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 4000Mi
requests:
cpu: 8
memory: 4000Mi
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
type: NodePort
selector:
app: chatqna-backend-server-deploy
ports:
- name: service
port: 8888
targetPort: 8888
nodePort: 30888
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: dataprep-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
args: null
ports:
- containerPort: 6007
- containerPort: 6008
- containerPort: 6009
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: dataprep-svc
namespace: default
spec:
type: ClusterIP
selector:
app: dataprep-deploy
ports:
- name: port1
port: 6007
targetPort: 6007
- name: port2
port: 6008
targetPort: 6008
- name: port3
port: 6009
targetPort: 6009
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
name: embedding-dependency-deploy
args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
cpu: 76
memory: 20000Mi
requests:
cpu: 76
memory: 20000Mi
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: embedding-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-dependency-deploy
ports:
- name: service
port: 6006
targetPort: 80
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: embedding-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: embedding-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
args: null
ports:
- containerPort: 6000
resources:
limits:
cpu: 4
requests:
cpu: 4
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: embedding-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-deploy
ports:
- name: service
port: 6000
targetPort: 6000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 32
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
name: llm-dependency-deploy-demo
securityContext:
capabilities:
add:
- SYS_NICE
args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1024'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: llm-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-dependency-deploy
ports:
- name: service
port: 9009
targetPort: 80
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: llm-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: llm-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null
ports:
- containerPort: 9000
resources:
limits:
cpu: 4
requests:
cpu: 4
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: llm-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-deploy
ports:
- name: service
port: 9000
targetPort: 9000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: retriever-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_EMBEDDING_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: HUGGINGFACEHUB_API_TOKEN
valueFrom:
configMapKeyRef:
name: qna-config
key: HUGGINGFACEHUB_API_TOKEN
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
args: null
ports:
- containerPort: 7000
resources:
limits:
cpu: 8
memory: 2500Mi
requests:
cpu: 8
memory: 2500Mi
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: retriever-svc
namespace: default
spec:
type: ClusterIP
selector:
app: retriever-deploy
ports:
- name: service
port: 7000
targetPort: 7000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
labels:
app: vector-db
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: vector-db
containers:
- name: vector-db
image: redis/redis-stack:7.2.0-v9
ports:
- containerPort: 6379
- containerPort: 8001
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
type: ClusterIP
selector:
app: vector-db
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
---

View File

@@ -0,0 +1,614 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: qna-config
namespace: default
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
RERANK_MODEL_ID: BAAI/bge-reranker-base
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
NODE_SELECTOR: chatqna-opea
LLM_SERVICE_HOST_IP: llm-svc
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
args: null
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 4000Mi
requests:
cpu: 8
memory: 4000Mi
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
type: NodePort
selector:
app: chatqna-backend-server-deploy
ports:
- name: service
port: 8888
targetPort: 8888
nodePort: 30888
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: dataprep-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
args: null
ports:
- containerPort: 6007
- containerPort: 6008
- containerPort: 6009
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: dataprep-svc
namespace: default
spec:
type: ClusterIP
selector:
app: dataprep-deploy
ports:
- name: port1
port: 6007
targetPort: 6007
- name: port2
port: 6008
targetPort: 6008
- name: port3
port: 6009
targetPort: 6009
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
name: embedding-dependency-deploy
args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
cpu: 76
memory: 20000Mi
requests:
cpu: 76
memory: 20000Mi
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: embedding-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-dependency-deploy
ports:
- name: service
port: 6006
targetPort: 80
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: embedding-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
args: null
ports:
- containerPort: 6000
resources:
limits:
cpu: 4
requests:
cpu: 4
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: embedding-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-deploy
ports:
- name: service
port: 6000
targetPort: 6000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 8
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
name: llm-dependency-deploy-demo
securityContext:
capabilities:
add:
- SYS_NICE
args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1024'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: llm-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-dependency-deploy
ports:
- name: service
port: 9009
targetPort: 80
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: llm-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: llm-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null
ports:
- containerPort: 9000
resources:
limits:
cpu: 4
requests:
cpu: 4
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: llm-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-deploy
ports:
- name: service
port: 9000
targetPort: 9000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: retriever-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_EMBEDDING_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: HUGGINGFACEHUB_API_TOKEN
valueFrom:
configMapKeyRef:
name: qna-config
key: HUGGINGFACEHUB_API_TOKEN
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
args: null
ports:
- containerPort: 7000
resources:
limits:
cpu: 8
memory: 2500Mi
requests:
cpu: 8
memory: 2500Mi
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: retriever-svc
namespace: default
spec:
type: ClusterIP
selector:
app: retriever-deploy
ports:
- name: service
port: 7000
targetPort: 7000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
labels:
app: vector-db
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: vector-db
containers:
- name: vector-db
image: redis/redis-stack:7.2.0-v9
ports:
- containerPort: 6379
- containerPort: 8001
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
type: ClusterIP
selector:
app: vector-db
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
---

View File

@@ -0,0 +1,614 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: qna-config
namespace: default
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
RERANK_MODEL_ID: BAAI/bge-reranker-base
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
NODE_SELECTOR: chatqna-opea
LLM_SERVICE_HOST_IP: llm-svc
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
args: null
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 4000Mi
requests:
cpu: 8
memory: 4000Mi
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
type: NodePort
selector:
app: chatqna-backend-server-deploy
ports:
- name: service
port: 8888
targetPort: 8888
nodePort: 30888
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: dataprep-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
args: null
ports:
- containerPort: 6007
- containerPort: 6008
- containerPort: 6009
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: dataprep-svc
namespace: default
spec:
type: ClusterIP
selector:
app: dataprep-deploy
ports:
- name: port1
port: 6007
targetPort: 6007
- name: port2
port: 6008
targetPort: 6008
- name: port3
port: 6009
targetPort: 6009
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
name: embedding-dependency-deploy
args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
cpu: 76
memory: 20000Mi
requests:
cpu: 76
memory: 20000Mi
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: embedding-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-dependency-deploy
ports:
- name: service
port: 6006
targetPort: 80
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: embedding-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: embedding-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
args: null
ports:
- containerPort: 6000
resources:
limits:
cpu: 4
requests:
cpu: 4
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: embedding-svc
namespace: default
spec:
type: ClusterIP
selector:
app: embedding-deploy
ports:
- name: service
port: 6000
targetPort: 6000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 16
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
name: llm-dependency-deploy-demo
securityContext:
capabilities:
add:
- SYS_NICE
args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1024'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: llm-dependency-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-dependency-deploy
ports:
- name: service
port: 9009
targetPort: 80
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: llm-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: llm-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null
ports:
- containerPort: 9000
resources:
limits:
cpu: 4
requests:
cpu: 4
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: llm-svc
namespace: default
spec:
type: ClusterIP
selector:
app: llm-deploy
ports:
- name: service
port: 9000
targetPort: 9000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: retriever-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: TEI_EMBEDDING_ENDPOINT
valueFrom:
configMapKeyRef:
name: qna-config
key: TEI_EMBEDDING_ENDPOINT
- name: HUGGINGFACEHUB_API_TOKEN
valueFrom:
configMapKeyRef:
name: qna-config
key: HUGGINGFACEHUB_API_TOKEN
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
args: null
ports:
- containerPort: 7000
resources:
limits:
cpu: 8
memory: 2500Mi
requests:
cpu: 8
memory: 2500Mi
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: retriever-svc
namespace: default
spec:
type: ClusterIP
selector:
app: retriever-deploy
ports:
- name: service
port: 7000
targetPort: 7000
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
labels:
app: vector-db
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: vector-db
containers:
- name: vector-db
image: redis/redis-stack:7.2.0-v9
ports:
- containerPort: 6379
- containerPort: 8001
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
type: ClusterIP
selector:
app: vector-db
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
---

View File

@@ -0,0 +1,35 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# Update the package index
sudo apt-get -y update
# Install prerequisites
sudo apt-get -y install ca-certificates curl
# Create the directory for the Docker GPG key
sudo install -m 0755 -d /etc/apt/keyrings
# Add Docker's official GPG key
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
# Set permissions for the GPG key
sudo chmod a+r /etc/apt/keyrings/docker.asc
# Add Docker repository to the sources list
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
# Update the package index with Docker packages
sudo apt-get -y update
# Install Docker packages
sudo apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
# add existing user
sudo usermod -aG docker $USER
# Optional: Verify that Docker is installed correctly
sudo docker --version

View File

@@ -11,36 +11,84 @@ git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
```
If you are in a proxy environment, set the proxy-related environment variables:
export http_proxy="Your_HTTP_Proxy"
export https_proxy="Your_HTTPs_Proxy"
### 1. Build Embedding Image
```bash
docker build --no-cache -t opea/embedding-tei:latest -f comps/embeddings/tei/langchain/Dockerfile .
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
```
### 2. Build Retriever Image
```bash
docker build --no-cache -t opea/retriever-redis:latest -f comps/retrievers/redis/langchain/Dockerfile .
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
```
### 3. Build Rerank Image
```bash
docker build --no-cache -t opea/reranking-tei:latest -f comps/reranks/tei/Dockerfile .
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
```
### 4. Build LLM Image
### 4. Set up Ollama Service and Build LLM Image
We use [Ollama](https://ollama.com/) as our LLM service for AIPC. Please pre-download Ollama on your PC.
We use [Ollama](https://ollama.com/) as our LLM service for AIPC.
Please set up Ollama on your PC follow the instructions. This will set the entrypoint needed for the Ollama to suit the ChatQnA examples.
#### 4.1 Set Up Ollama LLM Service
Install Ollama service with one command
curl -fsSL https://ollama.com/install.sh | sh
##### Set Ollama Service Configuration
Ollama Service Configuration file is /etc/systemd/system/ollama.service. Edit the file to set OLLAMA_HOST environment (Replace **${host_ip}** with your host IPV4).
```
Environment="OLLAMA_HOST=${host_ip}:11434"
```
##### Set https_proxy environment for Ollama
if your system access network through proxy, add https_proxy in Ollama Service Configuration file
```
Environment="https_proxy="Your_HTTPS_Proxy"
```
##### Restart Ollam services
```
$ sudo systemctl daemon-reload
$ sudo systemctl restart ollama.service
```
##### Pull LLM model
```
#export OLLAMA_HOST=http://${host_ip}:11434
#ollama pull llam3
#ollama lists
NAME ID SIZE MODIFIED
llama3:latest 365c0bd3c000 4.7 GB 5 days ago
```
#### 4.2 Build LLM Image
```bash
docker build --no-cache -t opea/llm-ollama:latest -f comps/llms/text-generation/ollama/langchain/Dockerfile .
docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/langchain/Dockerfile .
```
### 5. Build Dataprep Image
```bash
docker build --no-cache -t opea/dataprep-redis:latest -f comps/dataprep/redis/langchain/Dockerfile .
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
cd ..
```
@@ -61,7 +109,7 @@ Build frontend Docker image via below command:
```bash
cd GenAIExamples/ChatQnA/ui
docker build --no-cache -t opea/chatqna-ui:latest -f ./docker/Dockerfile .
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
cd ../../../..
```

View File

@@ -10,6 +10,7 @@ export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
export vLLM_LLM_ENDPOINT="http://${host_ip}:9009"
export REDIS_URL="redis://${host_ip}:6379"
export INDEX_NAME="rag-redis"
export REDIS_HOST=${host_ip}

View File

@@ -13,7 +13,7 @@
## Deploy On Xeon
```
cd GenAIExamples/ChatQnA/kubernetes/intel/cpu/xeon/manifests
cd GenAIExamples/ChatQnA/kubernetes/intel/cpu/xeon/manifest
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" chatqna.yaml
kubectl apply -f chatqna.yaml
@@ -33,7 +33,7 @@ kubectl apply -f chatqna_bf16.yaml
## Deploy On Gaudi
```
cd GenAIExamples/ChatQnA/kubernetes/intel/hpu/gaudi/manifests
cd GenAIExamples/ChatQnA/kubernetes/intel/hpu/gaudi/manifest
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" chatqna.yaml
kubectl apply -f chatqna.yaml

View File

@@ -2,9 +2,9 @@
This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline components on Intel Xeon server and Gaudi machines.
The ChatQnA Service leverages a Kubernetes operator called genai-microservices-connector(GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere.
The ChatQnA Service leverages a Kubernetes operator called genai-microservices-connector (GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere.
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
The ChatQnA application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not starts them and then proceeds to connect them. When the ChatQnA RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular `embedding`, `retriever`, `rerank`, and `llm`.

View File

@@ -1294,7 +1294,7 @@ spec:
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/tei-gaudi:synapse_1.16"
image: "ghcr.io/huggingface/tei-gaudi:latest"
imagePullPolicy: IfNotPresent
args:
- "--auto-truncate"

View File

@@ -1115,7 +1115,7 @@ spec:
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/tei-gaudi:synapse_1.16"
image: "ghcr.io/huggingface/tei-gaudi:latest"
imagePullPolicy: IfNotPresent
args:
- "--auto-truncate"

View File

@@ -106,7 +106,7 @@ Refer to the [Kubernetes Guide](./kubernetes/intel/README.md) for instructions o
Install Helm (version >= 3.15) first. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
Refer to the [CodeGen helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codegen) for instructions on deploying CodeGen into Kubernetes on Xeon & Gaudi.
Refer to the [CodeGen helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codegen/README.md) for instructions on deploying CodeGen into Kubernetes on Xeon & Gaudi.
## Consume CodeGen Service
@@ -128,7 +128,7 @@ Two ways of consuming CodeGen Service:
## Troubleshooting
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/CodeGen/docker_compose/intel/cpu/xeon#validate-microservices) first. A simple example:
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/CodeGen/docker_compose/intel/cpu/xeon/README.md#validate-microservices) first. A simple example:
```bash
http_proxy=""

View File

@@ -8,7 +8,7 @@ We evaluate accuracy by [bigcode-evaluation-harness](https://github.com/bigcode-
### Launch CodeGen microservice
Please refer to [CodeGen Examples](https://github.com/opea-project/GenAIExamples/tree/main/CodeGen), follow the guide to deploy CodeGen megeservice.
Please refer to [CodeGen Examples](https://github.com/opea-project/GenAIExamples/tree/main/CodeGen/README.md), follow the guide to deploy CodeGen megeservice.
Use `curl` command to test codegen service and ensure that it has started properly

View File

@@ -2,7 +2,7 @@
This document outlines the deployment process for a Code Generation (CodeGen) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines.
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
If you have only Intel Xeon machines you could use the codegen_xeon.yaml file or if you have a Gaudi cluster you could use codegen_gaudi.yaml
In the below example we illustrate on Xeon.

View File

@@ -99,7 +99,7 @@ Refer to the [Code Translation Kubernetes Guide](./kubernetes/intel/README.md)
Install Helm (version >= 3.15) first. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
Refer to the [CodeTrans helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codetrans) for instructions on deploying CodeTrans into Kubernetes on Xeon & Gaudi.
Refer to the [CodeTrans helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codetrans/README.md) for instructions on deploying CodeTrans into Kubernetes on Xeon & Gaudi.
## Consume Code Translation Service
@@ -121,7 +121,7 @@ By default, the UI runs on port 5173 internally.
## Troubleshooting
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/CodeTrans/docker_compose/intel/cpu/xeon#validate-microservices) first. A simple example:
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/CodeTrans/docker_compose/intel/cpu/xeon/README.md#validate-microservices) first. A simple example:
```bash
http_proxy=""

View File

@@ -2,7 +2,7 @@
This document outlines the deployment process for a Code Translation (CodeTran) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines.
Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
If you have only Intel Xeon machines you could use the codetrans_xeon.yaml file or if you have a Gaudi cluster you could use codetrans_gaudi.yaml
In the below example we illustrate on Xeon.

View File

@@ -1,8 +1,22 @@
# DocRetriever Application
DocRetriever are the most widely adopted use case for leveraging the different methodologies to match user query against a set of free-text records. DocRetriever is essential to RAG system, which bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. The core of this architecture are vector databases, which are instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity.
DocRetriever is the most widely adopted use case for leveraging the different methodologies to match user query against a set of free-text records. DocRetriever is essential to RAG system, which bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. The core of this architecture are vector databases, which are instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity.
## We provided DocRetriever with different deployment infra
- [docker xeon version](docker_compose/intel/cpu/xeon/) => minimum endpoints, easy to setup
- [docker gaudi version](docker_compose/intel/hpu/gaudi/) => with extra tei_gaudi endpoint, faster
- [docker xeon version](docker_compose/intel/cpu/xeon/README.md) => minimum endpoints, easy to setup
- [docker gaudi version](docker_compose/intel/hpu/gaudi/README.md) => with extra tei_gaudi endpoint, faster
## We allow users to set retriever/reranker hyperparams via requests
Example usage:
```python
url = "http://{host_ip}:{port}/v1/retrievaltool".format(host_ip=host_ip, port=port)
payload = {
"messages": query,
"k": 5, # retriever top k
"top_n": 2, # reranker top n
}
response = requests.post(url, json=payload)
```

View File

@@ -79,13 +79,26 @@ Retrieval from KnowledgeBase
```bash
curl http://${host_ip}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
"text": "Explain the OPEA project?"
"messages": "Explain the OPEA project?"
}'
# expected output
{"id":"354e62c703caac8c547b3061433ec5e8","reranked_docs":[{"id":"06d5a5cefc06cf9a9e0b5fa74a9f233c","text":"Close SearchsearchMenu WikiNewsCommunity Daysx-twitter linkedin github searchStreamlining implementation of enterprise-grade Generative AIEfficiently integrate secure, performant, and cost-effective Generative AI workflows into business value.TODAYOPEA..."}],"initial_query":"Explain the OPEA project?"}
```
**Note**: `messages` is the required field. You can also pass in parameters for the retriever and reranker in the request. The parameters that can changed are listed below.
1. retriever
* search_type: str = "similarity"
* k: int = 4
* distance_threshold: Optional[float] = None
* fetch_k: int = 20
* lambda_mult: float = 0.5
* score_threshold: float = 0.2
2. reranker
* top_n: int = 1
## 5. Trouble shooting
1. check all containers are alive

View File

@@ -74,13 +74,30 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-reranking-server
ports:
- "8808:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-xeon-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
entrypoint: python local_reranking.py
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}

View File

@@ -80,13 +80,26 @@ Retrieval from KnowledgeBase
```bash
curl http://${host_ip}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
"text": "Explain the OPEA project?"
"messages": "Explain the OPEA project?"
}'
# expected output
{"id":"354e62c703caac8c547b3061433ec5e8","reranked_docs":[{"id":"06d5a5cefc06cf9a9e0b5fa74a9f233c","text":"Close SearchsearchMenu WikiNewsCommunity Daysx-twitter linkedin github searchStreamlining implementation of enterprise-grade Generative AIEfficiently integrate secure, performant, and cost-effective Generative AI workflows into business value.TODAYOPEA..."}],"initial_query":"Explain the OPEA project?"}
```
**Note**: `messages` is the required field. You can also pass in parameters for the retriever and reranker in the request. The parameters that can changed are listed below.
1. retriever
* search_type: str = "similarity"
* k: int = 4
* distance_threshold: Optional[float] = None
* fetch_k: int = 20
* lambda_mult: float = 0.5
* score_threshold: float = 0.2
2. reranker
* top_n: int = 1
## 5. Trouble shooting
1. check all containers are alive

View File

@@ -77,13 +77,30 @@ services:
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-reranking-gaudi-server
ports:
- "8808:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-gaudi-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
entrypoint: python local_reranking.py
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}

View File

@@ -0,0 +1,71 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import argparse
import requests
def search_knowledge_base(query: str, url: str, request_type="chat_completion") -> str:
"""Search the knowledge base for a specific query."""
print(url)
proxies = {"http": ""}
if request_type == "chat_completion":
print("Sending chat completion request")
payload = {
"messages": query,
"k": 5,
"top_n": 2,
}
else:
print("Sending text request")
payload = {
"text": query,
}
response = requests.post(url, json=payload, proxies=proxies)
print(response)
if "documents" in response.json():
docs = response.json()["documents"]
context = ""
for i, doc in enumerate(docs):
if i == 0:
context = str(i) + ": " + doc
else:
context += "\n" + str(i) + ": " + doc
# print(context)
return context
elif "text" in response.json():
return response.json()["text"]
elif "reranked_docs" in response.json():
docs = response.json()["reranked_docs"]
context = ""
for i, doc in enumerate(docs):
if i == 0:
context = doc["text"]
else:
context += "\n" + doc["text"]
# print(context)
return context
else:
return "Error parsing response from the knowledge base."
def main():
parser = argparse.ArgumentParser(description="Index data")
parser.add_argument("--host_ip", type=str, default="localhost", help="Host IP")
parser.add_argument("--port", type=int, default=8889, help="Port")
parser.add_argument("--request_type", type=str, default="chat_completion", help="Test type")
args = parser.parse_args()
print(args)
host_ip = args.host_ip
port = args.port
url = "http://{host_ip}:{port}/v1/retrievaltool".format(host_ip=host_ip, port=port)
response = search_knowledge_base("OPEA", url, request_type=args.request_type)
print(response)
if __name__ == "__main__":
main()

View File

@@ -64,7 +64,7 @@ function validate() {
}
function validate_megaservice() {
echo "Testing DataPrep Service"
echo "=========Ingest data=================="
local CONTENT=$(curl -X POST "http://${ip_address}:6007/v1/dataprep" \
-H "Content-Type: multipart/form-data" \
-F 'link_list=["https://opea.dev"]')
@@ -78,7 +78,7 @@ function validate_megaservice() {
fi
# Curl the Mega Service
echo "Testing retriever service"
echo "==============Testing retriever service: Text Request================="
local CONTENT=$(curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
"text": "Explain the OPEA project?"
}')
@@ -93,6 +93,21 @@ function validate_megaservice() {
docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log
exit 1
fi
echo "==============Testing retriever service: ChatCompletion Request================"
cd $WORKPATH/tests
local CONTENT=$(python test.py --host_ip ${ip_address} --request_type chat_completion)
local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-gaudi")
echo "$EXIT_CODE"
local EXIT_CODE="${EXIT_CODE:0-1}"
echo "return value is $EXIT_CODE"
if [ "$EXIT_CODE" == "1" ]; then
docker logs tei-embedding-gaudi-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log
docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log
docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log
docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log
exit 1
fi
}
function stop_docker() {

View File

@@ -63,8 +63,8 @@ function validate() {
}
function validate_megaservice() {
echo "Testing DataPrep Service"
local CONTENT=$(curl -X POST "http://${ip_address}:6007/v1/dataprep" \
echo "===========Ingest data=================="
local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep" \
-H "Content-Type: multipart/form-data" \
-F 'link_list=["https://opea.dev"]')
local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-service-xeon")
@@ -77,16 +77,32 @@ function validate_megaservice() {
fi
# Curl the Mega Service
echo "Testing retriever service"
echo "================Testing retriever service: Default params================"
local CONTENT=$(curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
"text": "Explain the OPEA project?"
"messages": "Explain the OPEA project?"
}')
local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon")
echo "$EXIT_CODE"
local EXIT_CODE="${EXIT_CODE:0-1}"
echo "return value is $EXIT_CODE"
if [ "$EXIT_CODE" == "1" ]; then
docker logs tei-embedding-xeon-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
docker logs tei-embedding-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
exit 1
fi
echo "================Testing retriever service: ChatCompletion Request================"
cd $WORKPATH/tests
local CONTENT=$(python test.py --host_ip ${ip_address} --request_type chat_completion)
local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon")
echo "$EXIT_CODE"
local EXIT_CODE="${EXIT_CODE:0-1}"
echo "return value is $EXIT_CODE"
if [ "$EXIT_CODE" == "1" ]; then
docker logs tei-embedding-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log

View File

@@ -21,7 +21,7 @@ Currently we support two ways of deploying Document Summarization services with
docker pull opea/docsum:latest
```
2. Start services using the docker images `built from source`: [Guide](./docker_compose)
2. Start services using the docker images `built from source`: [Guide](https://github.com/opea-project/GenAIExamples/tree/main/DocSum/docker_compose)
### Required Models
@@ -98,7 +98,7 @@ Refer to [Kubernetes deployment](./kubernetes/intel/README.md)
Install Helm (version >= 3.15) first. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
Refer to the [DocSum helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/docsum) for instructions on deploying DocSum into Kubernetes on Xeon & Gaudi.
Refer to the [DocSum helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/docsum/README.md) for instructions on deploying DocSum into Kubernetes on Xeon & Gaudi.
### Workflow of the deployed Document Summarization Service
@@ -143,7 +143,7 @@ Two ways of consuming Document Summarization Service:
## Troubleshooting
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/DocSum/docker_compose/intel/cpu/xeon#validate-microservices) first. A simple example:
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/DocSum/docker_compose/intel/cpu/xeon/README.md#validate-microservices) first. A simple example:
```bash
http_proxy=""

View File

@@ -3,7 +3,7 @@
This document outlines the deployment process for a Document Summary (DocSum) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines.
The DocSum Service leverages a Kubernetes operator called genai-microservices-connector(GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file, in addition it allows the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private clouds elsewhere.
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
The DocSum application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not it starts them and then proceeds to connect them. When the DocSum RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular embedding, retriever, rerank, and llm.

View File

@@ -16,7 +16,7 @@ python get_context.py
### Launch FaQGen microservice
Please refer to [FaQGen microservice](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/faq-generation/tgi), set up an microservice endpoint.
Please refer to [FaQGen microservice](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/faq-generation/tgi/langchain/README.md), set up an microservice endpoint.
```
export FAQ_ENDPOINT = "http://${your_ip}:9000/v1/faqgen"

View File

@@ -20,7 +20,7 @@ docker run -it --rm \
--ipc=host \
-e HTTPS_PROXY=$https_proxy \
-e HTTP_PROXY=$https_proxy \
ghcr.io/huggingface/tgi-gaudi:2.0.1 \
ghcr.io/huggingface/tgi-gaudi:2.0.5 \
--model-id $model_name \
--max-input-tokens $max_input_tokens \
--max-total-tokens $max_total_tokens \

View File

@@ -9,9 +9,9 @@ Generative AI Examples is licensed under [Apache License Version 2.0](http://www
This software includes components that have separate copyright notices and licensing terms.
Your use of the source code for these components is subject to the terms and conditions of the following licenses.
- [Third Party Programs](/third-party-programs.txt)
- [Third Party Programs](third-party-programs.txt)
See the accompanying [license](/LICENSE) file for full license text and copyright notices.
See the accompanying [license](LICENSE) file for full license text and copyright notices.
## Citation

View File

@@ -6,7 +6,7 @@ This document outlines the deployment process for a MultimodalQnA application ut
Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
**Export the value of the public IP address of your Gaudi server to the `host_ip` environment variable**
> Change the External_Public_IP below with the actual IPV4 value

View File

@@ -68,7 +68,7 @@ services:
INDEX_NAME: ${INDEX_NAME}
restart: unless-stopped
tgi-gaudi:
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
container_name: tgi-llava-gaudi-server
ports:
- "8399:80"

View File

@@ -24,7 +24,7 @@ function build_docker_images() {
service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding-multimodal retriever-multimodal-redis lvm-tgi dataprep-multimodal-redis"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.4
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
docker images && sleep 1s
}

View File

@@ -30,8 +30,8 @@ Deployment are based on released docker images by default, check [docker image l
#### Prerequisite
- For Docker Compose based deployment, you should have docker compose installed. Refer to [docker compose install](https://docs.docker.com/compose/install/).
- For Kubernetes based deployment, we provide 3 ways from the easiest manifests to powerful [GMC](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector) based deployment.
- You should have a kubernetes cluster ready for use. If not, you can refer to [k8s install](https://github.com/opea-project/docs/tree/main/guide/installation/k8s_install) to deploy one.
- For Kubernetes based deployment, we provide 3 ways from the easiest manifests to powerful [GMC](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md) based deployment.
- You should have a kubernetes cluster ready for use. If not, you can refer to [k8s install](https://github.com/opea-project/docs/tree/main/guide/installation/k8s_install/README.md) to deploy one.
- (Optional) You should have GMC installed to your kubernetes cluster if you want to try with GMC. Refer to [GMC install](https://github.com/opea-project/docs/blob/main/guide/installation/gmc_install/gmc_install.md) for more information.
- (Optional) You should have Helm (version >= 3.15) installed if you want to deploy with Helm Charts. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
@@ -68,4 +68,4 @@ Thank you for being a part of this journey. We can't wait to see what we can ach
- [Code of Conduct](https://github.com/opea-project/docs/tree/main/community/CODE_OF_CONDUCT.md)
- [Security Policy](https://github.com/opea-project/docs/tree/main/community/SECURITY.md)
- [Legal Information](/LEGAL_INFORMATION.md)
- [Legal Information](LEGAL_INFORMATION.md)

View File

@@ -32,7 +32,7 @@ Currently we support two ways of deploying SearchQnA services with docker compos
docker pull opea/searchqna:latest
```
2. Start services using the docker images `built from source`: [Guide](./docker_compose)
2. Start services using the docker images `built from source`: [Guide](https://github.com/opea-project/GenAIExamples/tree/main/SearchQnA/docker_compose/)
### Setup Environment Variable
@@ -110,7 +110,7 @@ Two ways of consuming SearchQnA Service:
## Troubleshooting
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker_compose/intel/cpu/xeon#validate-microservices) first. A simple example:
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker_compose/intel/cpu/xeon/README.md#validate-microservices) first. A simple example:
```bash
http_proxy=""

View File

@@ -2,7 +2,7 @@
This document outlines the deployment process for a Code Generation (SearchQnA) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines.
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
If you have only Intel Xeon machines you could use the searchQnA_xeon.yaml file or if you have a Gaudi cluster you could use searchQnA_gaudi.yaml
In the below example we illustrate on Xeon.

View File

@@ -2,7 +2,7 @@
This document outlines the deployment process for a Code Generation (Translation) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines.
Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
If you have only Intel Xeon machines you could use the translation_xeon.yaml file or if you have a Gaudi cluster you could use translation_gaudi.yaml
In the below example we illustrate on Xeon.

View File

@@ -362,7 +362,7 @@ spec:
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data

View File

@@ -2,7 +2,7 @@
This document outlines the deployment process for a Visual Question Answering (VisualQnA) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines.
Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
If you have only Intel Xeon machines you could use the visualqna_xeon.yaml file or if you have a Gaudi cluster you could use visualqna_gaudi.yaml
In the below example we illustrate on Xeon.

View File

@@ -16,7 +16,6 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
| [opea/chatqna-without-rerank](https://hub.docker.com/r/opea/chatqna-without-rerank) | | |
| [opea/chat-conversation-service](https://hub.docker.com/r/opea/chat-conversation-service) | | |
| [opea/chat-conversation-ui-service](https://hub.docker.com/r/opea/chat-conversation-ui-service) | | |
| [opea/chat-history-service](https://hub.docker.com/r/opea/chat-history-service) | | |
| [opea/codegen](https://hub.docker.com/r/opea/codegen) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/CodeGen/Dockerfile) | The docker image served as the codegen gateway to provide service of the automatic creation of source code from a higher-level representation |
| [opea/codegen-ui](https://hub.docker.com/r/opea/codegen-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/CodeGen/ui/docker/Dockerfile) | The docker image acted as the codegen UI entry for facilitating interaction with users for automatically generating code from user's description |
| [opea/codegen-react-ui](https://hub.docker.com/r/opea/codegen-react-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/CodeGen/ui/docker/Dockerfile.react) | The purpose of the docker image is to provide a user interface for Codegen using React. It allows generating the appropriate code based on the current user input. |
@@ -48,7 +47,6 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
| [opea/guardrails-tgi](https://hub.docker.com/r/opea/guardrails-tgi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/llama_guard/langchain/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide content review for GenAI application use |
| [opea/guardrails-pii-detection](https://hub.docker.com/r/opea/guardrails-pii-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/pii_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide PII detection for GenAI application use |
| [opea/habanalabs](https://hub.docker.com/r/opea/habanalabs) | | |
| [opea/knowledge_graphs](https://hub.docker.com/r/opea/knowledge_graphs) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/knowledgegraphs/langchain/Dockerfile) | The docker image served as knowledge graph gateway to enhance question answering with graph knowledge searching. |
| [opea/llm-docsum-tgi](https://hub.docker.com/r/opea/llm-docsum-tgi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/summarization/tgi/langchain/Dockerfile) | This docker image is designed to build a document summarization microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a document summary. |
| [opea/llm-faqgen-tgi](https://hub.docker.com/r/opea/llm-faqgen-tgi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/faq-generation/tgi/langchain/Dockerfile) | This docker image is designed to build a frequently asked questions microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a FAQ. |
| [opea/llm-ollama](https://hub.docker.com/r/opea/llm-ollama) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/ollama/langchain/Dockerfile) | The docker image exposed the OPEA LLM microservice based on ollama for GenAI application use |
@@ -57,7 +55,8 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
| [opea/llm-vllm-hpu](https://hub.docker.com/r/opea/llm-vllm-hpu) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_hpu) | The docker image exposed the OPEA LLM microservice upon vLLM docker image for use by GenAI apps on the Gaudi |
| [opea/llm-vllm-ray](https://hub.docker.com/r/opea/llm-vllm-ray) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/ray/Dockerfile) | The docker image exposes the OPEA LLM microservices Ray-based upon the vLLM Docker image for GenAI application use |
| [opea/llm-vllm-ray-hpu](https://hub.docker.com/r/opea/llm-vllm-ray-hpu) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/ray/dependency/Dockerfile) | The docker image exposes Ray-based OPEA LLM microservices upon the vLLM Docker image for use by GenAI applications on the Gaudi |
| [opea/prompt-registry-service](https://hub.docker.com/r/opea/prompt-registry-service) | | |
| [opea/chat-history-service](https://hub.docker.com/r/opea/chat-history-service) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/chathistory/mongo/Dockerfile) | The docker image exposes OPEA Chat History microservice which based on MongoDB database, designed to allow user to store, retrieve and manage chat conversations |
| [opea/prompt-registry-service](https://hub.docker.com/r/opea/prompt-registry-service) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/prompt_registry/mongo/Dockerfile) | The docker image exposes the OPEA Prompt Registry microservices which based on MongoDB database, designed to store and retrieve user's preferred prompts |
| [opea/reranking-tei](https://hub.docker.com/r/opea/reranking-tei) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/reranks/tei/Dockerfile) | The docker image exposed the OPEA reranking microservice based on tei docker image for GenAI application use |
| [opea/retriever-qdrant](https://hub.docker.com/r/opea/retriever-qdrant) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/qdrant/haystack/Dockerfile) | The docker image exposed the OPEA retrieval microservice based on qdrant vectordb for GenAI application use |
| [opea/retriever-redis](https://hub.docker.com/r/opea/retriever-redis) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/redis/langchain/Dockerfile) | The docker image exposed the OPEA retrieval microservice based on redis vectordb for GenAI application use |