Compare commits
40 Commits
add_gatewa
...
1.1_with_w
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cc5be6c6a3 | ||
|
|
2de1bfc5bb | ||
|
|
75df2c9979 | ||
|
|
62e06a0aff | ||
|
|
bd32b03e3c | ||
|
|
9d0b49c2d6 | ||
|
|
75ce2a3ca6 | ||
|
|
99c10933b4 | ||
|
|
8bcd82e82d | ||
|
|
c1038d2193 | ||
|
|
33b9d4e421 | ||
|
|
c9553c6f9a | ||
|
|
3e796ba73d | ||
|
|
5ed776709d | ||
|
|
954a22051b | ||
|
|
6f4b00f829 | ||
|
|
3fb60608b3 | ||
|
|
c35fe0b429 | ||
|
|
28f5e4a268 | ||
|
|
d55a33dda1 | ||
|
|
daf2a4fad7 | ||
|
|
3ce395582b | ||
|
|
7eaab93d0b | ||
|
|
bc817700b9 | ||
|
|
bd811bd622 | ||
|
|
05f9828e77 | ||
|
|
6c364487d3 | ||
|
|
21e215c5d5 | ||
|
|
a09395e4a4 | ||
|
|
f04f061f8c | ||
|
|
872e93e4bd | ||
|
|
2f03a3a894 | ||
|
|
372d78c2ac | ||
|
|
933c3d3445 | ||
|
|
88829c9381 | ||
|
|
d85ec0947c | ||
|
|
dc94026d98 | ||
|
|
1e130314d9 | ||
|
|
b205dc7571 | ||
|
|
3b70fb0d42 |
4
.github/CODEOWNERS
vendored
Normal file → Executable file
4
.github/CODEOWNERS
vendored
Normal file → Executable file
@@ -3,10 +3,10 @@
|
|||||||
/ChatQnA/ liang1.lv@intel.com
|
/ChatQnA/ liang1.lv@intel.com
|
||||||
/CodeGen/ liang1.lv@intel.com
|
/CodeGen/ liang1.lv@intel.com
|
||||||
/CodeTrans/ sihan.chen@intel.com
|
/CodeTrans/ sihan.chen@intel.com
|
||||||
/DocSum/ sihan.chen@intel.com
|
/DocSum/ letong.han@intel.com
|
||||||
/DocIndexRetriever/ xuhui.ren@intel.com chendi.xue@intel.com
|
/DocIndexRetriever/ xuhui.ren@intel.com chendi.xue@intel.com
|
||||||
/FaqGen/ xinyao.wang@intel.com
|
/FaqGen/ xinyao.wang@intel.com
|
||||||
/SearchQnA/ letong.han@intel.com
|
/SearchQnA/ sihan.chen@intel.com
|
||||||
/Translation/ liang1.lv@intel.com
|
/Translation/ liang1.lv@intel.com
|
||||||
/VisualQnA/ liang1.lv@intel.com
|
/VisualQnA/ liang1.lv@intel.com
|
||||||
/ProductivitySuite/ hoong.tee.yeoh@intel.com
|
/ProductivitySuite/ hoong.tee.yeoh@intel.com
|
||||||
|
|||||||
13
.github/workflows/_example-workflow.yml
vendored
13
.github/workflows/_example-workflow.yml
vendored
@@ -46,33 +46,30 @@ jobs:
|
|||||||
- name: Clean Up Working Directory
|
- name: Clean Up Working Directory
|
||||||
run: sudo rm -rf ${{github.workspace}}/*
|
run: sudo rm -rf ${{github.workspace}}/*
|
||||||
|
|
||||||
- name: Get checkout ref
|
- name: Get Checkout Ref
|
||||||
run: |
|
run: |
|
||||||
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
|
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
|
||||||
echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
|
echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
|
||||||
else
|
else
|
||||||
echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
|
echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
|
||||||
fi
|
fi
|
||||||
echo "checkout ref ${{ env.CHECKOUT_REF }}"
|
|
||||||
|
|
||||||
- name: Checkout out Repo
|
- name: Checkout out GenAIExamples
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
ref: ${{ env.CHECKOUT_REF }}
|
ref: ${{ env.CHECKOUT_REF }}
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Clone required Repo
|
- name: Clone Required Repo
|
||||||
run: |
|
run: |
|
||||||
cd ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
|
cd ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
|
||||||
docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
|
docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
|
||||||
if [[ $(grep -c "tei-gaudi:" ${docker_compose_path}) != 0 ]]; then
|
|
||||||
git clone https://github.com/huggingface/tei-gaudi.git
|
|
||||||
fi
|
|
||||||
if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
|
if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
|
||||||
git clone https://github.com/vllm-project/vllm.git
|
git clone https://github.com/vllm-project/vllm.git
|
||||||
|
cd vllm && git rev-parse HEAD && cd ../
|
||||||
fi
|
fi
|
||||||
git clone https://github.com/opea-project/GenAIComps.git
|
git clone https://github.com/opea-project/GenAIComps.git
|
||||||
cd GenAIComps && git checkout ${{ inputs.opea_branch }} && cd ../
|
cd GenAIComps && git checkout ${{ inputs.opea_branch }} && git rev-parse HEAD && cd ../
|
||||||
|
|
||||||
- name: Build Image
|
- name: Build Image
|
||||||
if: ${{ fromJSON(inputs.build) }}
|
if: ${{ fromJSON(inputs.build) }}
|
||||||
|
|||||||
16
.github/workflows/manual-docker-publish.yml
vendored
16
.github/workflows/manual-docker-publish.yml
vendored
@@ -11,23 +11,23 @@ on:
|
|||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
examples:
|
examples:
|
||||||
default: "Translation"
|
default: ""
|
||||||
description: 'List of examples to publish [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
|
description: 'List of examples to publish [AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA]'
|
||||||
required: false
|
required: false
|
||||||
type: string
|
type: string
|
||||||
images:
|
images:
|
||||||
default: "gmcmanager,gmcrouter"
|
default: ""
|
||||||
description: 'List of images to publish [gmcmanager,gmcrouter, ...]'
|
description: 'List of images to publish [gmcmanager,gmcrouter]'
|
||||||
required: false
|
required: false
|
||||||
type: string
|
type: string
|
||||||
tag:
|
tag:
|
||||||
default: "v0.9"
|
default: "rc"
|
||||||
description: "Tag to publish"
|
description: "Tag to publish, like [1.0rc]"
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
publish_tags:
|
publish_tags:
|
||||||
default: "latest,v0.9"
|
default: "latest,1.x"
|
||||||
description: 'Tag list apply to publish images'
|
description: "Tag list apply to publish images, like [latest,1.0]"
|
||||||
required: false
|
required: false
|
||||||
type: string
|
type: string
|
||||||
|
|
||||||
|
|||||||
8
.github/workflows/manual-docker-scan.yml
vendored
8
.github/workflows/manual-docker-scan.yml
vendored
@@ -11,13 +11,13 @@ on:
|
|||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
examples:
|
examples:
|
||||||
default: "ChatQnA"
|
default: ""
|
||||||
description: 'List of examples to scan [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
|
description: 'List of examples to publish "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"'
|
||||||
required: false
|
required: false
|
||||||
type: string
|
type: string
|
||||||
images:
|
images:
|
||||||
default: "gmcmanager,gmcrouter"
|
default: ""
|
||||||
description: 'List of images to scan [gmcmanager,gmcrouter, ...]'
|
description: 'List of images to publish "gmcmanager,gmcrouter"'
|
||||||
required: false
|
required: false
|
||||||
type: string
|
type: string
|
||||||
tag:
|
tag:
|
||||||
|
|||||||
54
.github/workflows/pr-manifest-validate.yml
vendored
54
.github/workflows/pr-manifest-validate.yml
vendored
@@ -1,54 +0,0 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
name: Manifests Validate
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
branches: [main]
|
|
||||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
|
||||||
paths:
|
|
||||||
- "**/kubernetes/manifests/**"
|
|
||||||
- .github/workflows/manifest-validate.yml
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
# If there is a new commit, the previous jobs will be canceled
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
env:
|
|
||||||
MANIFEST_DIR: "manifests"
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
manifests-validate:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout out Repo
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: changed files
|
|
||||||
id: changed_files
|
|
||||||
run: |
|
|
||||||
set -xe
|
|
||||||
changed_folder=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} | \
|
|
||||||
grep "kubernetes/manifests" | grep -vE '.github|README.md|*.txt|*.sh' | cut -d'/' -f1 | sort -u )
|
|
||||||
echo "changed_folder: $changed_folder"
|
|
||||||
if [ -z "$changed_folder" ]; then
|
|
||||||
echo "No changes in manifests folder"
|
|
||||||
echo "SKIP=true" >> $GITHUB_OUTPUT
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
echo "SKIP=false" >> $GITHUB_OUTPUT
|
|
||||||
for folder in $changed_folder; do
|
|
||||||
folder_str="$folder_str $folder/kubernetes/manifests/"
|
|
||||||
done
|
|
||||||
echo "folder_str=$folder_str"
|
|
||||||
echo "folder_str=$folder_str" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- uses: docker://ghcr.io/yannh/kubeconform:latest
|
|
||||||
if: steps.changed_files.outputs.SKIP == 'false'
|
|
||||||
with:
|
|
||||||
args: "-summary -output json ${{env.folder_str}}"
|
|
||||||
2
.github/workflows/pr-path-detection.yml
vendored
2
.github/workflows/pr-path-detection.yml
vendored
@@ -136,7 +136,7 @@ jobs:
|
|||||||
if [ "$response_retry" -eq 200 ]; then
|
if [ "$response_retry" -eq 200 ]; then
|
||||||
echo "*****Retry successfully*****"
|
echo "*****Retry successfully*****"
|
||||||
else
|
else
|
||||||
echo "Invalid link from $real_path: $url_dev"
|
echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path"
|
||||||
fail="TRUE"
|
fail="TRUE"
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
|
|||||||
1
.github/workflows/push-image-build.yml
vendored
1
.github/workflows/push-image-build.yml
vendored
@@ -9,7 +9,6 @@ on:
|
|||||||
paths:
|
paths:
|
||||||
- "**.py"
|
- "**.py"
|
||||||
- "**Dockerfile"
|
- "**Dockerfile"
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}-on-push
|
group: ${{ github.workflow }}-${{ github.ref }}-on-push
|
||||||
|
|||||||
@@ -18,8 +18,6 @@ repos:
|
|||||||
SearchQnA/ui/svelte/tsconfig.json|
|
SearchQnA/ui/svelte/tsconfig.json|
|
||||||
DocSum/ui/svelte/tsconfig.json
|
DocSum/ui/svelte/tsconfig.json
|
||||||
)$
|
)$
|
||||||
- id: check-yaml
|
|
||||||
args: [--allow-multiple-documents]
|
|
||||||
- id: debug-statements
|
- id: debug-statements
|
||||||
- id: requirements-txt-fixer
|
- id: requirements-txt-fixer
|
||||||
- id: trailing-whitespace
|
- id: trailing-whitespace
|
||||||
|
|||||||
@@ -103,4 +103,4 @@ curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: app
|
|||||||
|
|
||||||
## How to register your own tools with agent
|
## How to register your own tools with agent
|
||||||
|
|
||||||
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain#5-customize-agent-strategy).
|
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md#5-customize-agent-strategy).
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
services:
|
services:
|
||||||
tgi-server:
|
tgi-server:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
container_name: tgi-server
|
container_name: tgi-server
|
||||||
ports:
|
ports:
|
||||||
- "8085:80"
|
- "8085:80"
|
||||||
@@ -13,12 +13,16 @@ services:
|
|||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
HABANA_VISIBLE_DEVICES: all
|
HABANA_VISIBLE_DEVICES: all
|
||||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
|
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
|
||||||
|
ENABLE_HPU_GRAPH: true
|
||||||
|
LIMIT_HPU_GRAPH: true
|
||||||
|
USE_FLASH_ATTENTION: true
|
||||||
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
|
|||||||
51
AudioQnA/benchmark/accuracy/README.md
Normal file
51
AudioQnA/benchmark/accuracy/README.md
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
# AudioQnA accuracy Evaluation
|
||||||
|
|
||||||
|
AudioQnA is an example that demonstrates the integration of Generative AI (GenAI) models for performing question-answering (QnA) on audio scene, which contains Automatic Speech Recognition (ASR) and Text-to-Speech (TTS). The following is the piepline for evaluating the ASR accuracy.
|
||||||
|
|
||||||
|
## Dataset
|
||||||
|
|
||||||
|
We evaluate the ASR accuracy on the test set of librispeech [dataset](https://huggingface.co/datasets/andreagasparini/librispeech_test_only), which contains 2620 records of audio and texts.
|
||||||
|
|
||||||
|
## Metrics
|
||||||
|
|
||||||
|
We evaluate the WER (Word Error Rate) metric of the ASR microservice.
|
||||||
|
|
||||||
|
## Evaluation
|
||||||
|
|
||||||
|
### Launch ASR microservice
|
||||||
|
|
||||||
|
Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr/whisper/README.md).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/opea-project/GenAIComps
|
||||||
|
cd GenAIComps
|
||||||
|
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
|
||||||
|
# change the name of model by editing model_name_or_path you want to evaluate
|
||||||
|
docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper:latest --model_name_or_path "openai/whisper-tiny"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Evaluate
|
||||||
|
|
||||||
|
Install dependencies:
|
||||||
|
|
||||||
|
```
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
Evaluate the performance with the LLM:
|
||||||
|
|
||||||
|
```py
|
||||||
|
# validate the offline model
|
||||||
|
# python offline_evaluate.py
|
||||||
|
# validate the online asr microservice accuracy
|
||||||
|
python online_evaluate.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Performance Result
|
||||||
|
|
||||||
|
Here is the tested result for your reference
|
||||||
|
|| WER |
|
||||||
|
| --- | ---- |
|
||||||
|
|whisper-large-v2| 2.87|
|
||||||
|
|whisper-large| 2.7 |
|
||||||
|
|whisper-medium| 3.45 |
|
||||||
35
AudioQnA/benchmark/accuracy/local_eval.py
Normal file
35
AudioQnA/benchmark/accuracy/local_eval.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from datasets import load_dataset
|
||||||
|
from evaluate import load
|
||||||
|
from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
||||||
|
|
||||||
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
|
||||||
|
MODEL_NAME = "openai/whisper-large-v2"
|
||||||
|
|
||||||
|
librispeech_test_clean = load_dataset(
|
||||||
|
"andreagasparini/librispeech_test_only", "clean", split="test", trust_remote_code=True
|
||||||
|
)
|
||||||
|
processor = WhisperProcessor.from_pretrained(MODEL_NAME)
|
||||||
|
model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME).to(device)
|
||||||
|
|
||||||
|
|
||||||
|
def map_to_pred(batch):
|
||||||
|
audio = batch["audio"]
|
||||||
|
input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
|
||||||
|
batch["reference"] = processor.tokenizer._normalize(batch["text"])
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
predicted_ids = model.generate(input_features.to(device))[0]
|
||||||
|
transcription = processor.decode(predicted_ids)
|
||||||
|
batch["prediction"] = processor.tokenizer._normalize(transcription)
|
||||||
|
return batch
|
||||||
|
|
||||||
|
|
||||||
|
result = librispeech_test_clean.map(map_to_pred)
|
||||||
|
|
||||||
|
wer = load("wer")
|
||||||
|
print(100 * wer.compute(references=result["reference"], predictions=result["prediction"]))
|
||||||
56
AudioQnA/benchmark/accuracy/online_eval.py
Normal file
56
AudioQnA/benchmark/accuracy/online_eval.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import torch
|
||||||
|
from datasets import load_dataset
|
||||||
|
from evaluate import load
|
||||||
|
from pydub import AudioSegment
|
||||||
|
from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
||||||
|
|
||||||
|
MODEL_NAME = "openai/whisper-large-v2"
|
||||||
|
processor = WhisperProcessor.from_pretrained(MODEL_NAME)
|
||||||
|
|
||||||
|
librispeech_test_clean = load_dataset(
|
||||||
|
"andreagasparini/librispeech_test_only", "clean", split="test", trust_remote_code=True
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def map_to_pred(batch):
|
||||||
|
batch["reference"] = processor.tokenizer._normalize(batch["text"])
|
||||||
|
|
||||||
|
file_path = batch["file"]
|
||||||
|
# process the file_path
|
||||||
|
pidx = file_path.rfind("/")
|
||||||
|
sidx = file_path.rfind(".")
|
||||||
|
|
||||||
|
file_path_prefix = file_path[: pidx + 1]
|
||||||
|
file_path_suffix = file_path[sidx:]
|
||||||
|
file_path_mid = file_path[pidx + 1 : sidx]
|
||||||
|
splits = file_path_mid.split("-")
|
||||||
|
file_path_mid = f"LibriSpeech/test-clean/{splits[0]}/{splits[1]}/{file_path_mid}"
|
||||||
|
|
||||||
|
file_path = file_path_prefix + file_path_mid + file_path_suffix
|
||||||
|
|
||||||
|
audio = AudioSegment.from_file(file_path)
|
||||||
|
audio.export("tmp.wav")
|
||||||
|
with open("tmp.wav", "rb") as f:
|
||||||
|
test_audio_base64_str = base64.b64encode(f.read()).decode("utf-8")
|
||||||
|
|
||||||
|
inputs = {"audio": test_audio_base64_str}
|
||||||
|
endpoint = "http://localhost:7066/v1/asr"
|
||||||
|
response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None})
|
||||||
|
|
||||||
|
result_str = response.json()["asr_result"]
|
||||||
|
|
||||||
|
batch["prediction"] = processor.tokenizer._normalize(result_str)
|
||||||
|
return batch
|
||||||
|
|
||||||
|
|
||||||
|
result = librispeech_test_clean.map(map_to_pred)
|
||||||
|
|
||||||
|
wer = load("wer")
|
||||||
|
print(100 * wer.compute(references=result["reference"], predictions=result["prediction"]))
|
||||||
8
AudioQnA/benchmark/accuracy/requirements.txt
Normal file
8
AudioQnA/benchmark/accuracy/requirements.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
datasets
|
||||||
|
evaluate
|
||||||
|
jiwer
|
||||||
|
librosa
|
||||||
|
pydub
|
||||||
|
soundfile
|
||||||
|
torch
|
||||||
|
transformers
|
||||||
@@ -108,7 +108,7 @@ curl http://${host_ip}:3006/generate \
|
|||||||
# llm microservice
|
# llm microservice
|
||||||
curl http://${host_ip}:3007/v1/chat/completions\
|
curl http://${host_ip}:3007/v1/chat/completions\
|
||||||
-X POST \
|
-X POST \
|
||||||
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
|
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
|
||||||
-H 'Content-Type: application/json'
|
-H 'Content-Type: application/json'
|
||||||
|
|
||||||
# speecht5 service
|
# speecht5 service
|
||||||
|
|||||||
@@ -108,7 +108,7 @@ curl http://${host_ip}:3006/generate \
|
|||||||
# llm microservice
|
# llm microservice
|
||||||
curl http://${host_ip}:3007/v1/chat/completions\
|
curl http://${host_ip}:3007/v1/chat/completions\
|
||||||
-X POST \
|
-X POST \
|
||||||
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
|
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
|
||||||
-H 'Content-Type: application/json'
|
-H 'Content-Type: application/json'
|
||||||
|
|
||||||
# speecht5 service
|
# speecht5 service
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||||
tgi-service:
|
tgi-service:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
container_name: tgi-gaudi-server
|
container_name: tgi-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "3006:80"
|
- "3006:80"
|
||||||
@@ -61,11 +61,15 @@ services:
|
|||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
HABANA_VISIBLE_DEVICES: all
|
HABANA_VISIBLE_DEVICES: all
|
||||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
|
ENABLE_HPU_GRAPH: true
|
||||||
|
LIMIT_HPU_GRAPH: true
|
||||||
|
USE_FLASH_ATTENTION: true
|
||||||
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ This document outlines the deployment process for a AudioQnA application utilizi
|
|||||||
|
|
||||||
The AudioQnA Service leverages a Kubernetes operator called genai-microservices-connector(GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere.
|
The AudioQnA Service leverages a Kubernetes operator called genai-microservices-connector(GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere.
|
||||||
|
|
||||||
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
|
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
|
||||||
|
|
||||||
|
|
||||||
The AudioQnA application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not starts them and then proceeds to connect them. When the AudioQnA pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular `asr`, `tts`, and `llm`.
|
The AudioQnA application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not starts them and then proceeds to connect them. When the AudioQnA pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular `asr`, `tts`, and `llm`.
|
||||||
@@ -25,7 +25,7 @@ The AudioQnA uses the below prebuilt images if you choose a Xeon deployment
|
|||||||
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
|
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
|
||||||
For Gaudi:
|
For Gaudi:
|
||||||
|
|
||||||
- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
|
- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
- whisper-gaudi: opea/whisper-gaudi:latest
|
- whisper-gaudi: opea/whisper-gaudi:latest
|
||||||
- speecht5-gaudi: opea/speecht5-gaudi:latest
|
- speecht5-gaudi: opea/speecht5-gaudi:latest
|
||||||
|
|
||||||
|
|||||||
@@ -247,7 +247,7 @@ spec:
|
|||||||
- envFrom:
|
- envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: audio-qna-config
|
name: audio-qna-config
|
||||||
image: ghcr.io/huggingface/text-generation-inference:2.2.0
|
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||||
name: llm-dependency-deploy-demo
|
name: llm-dependency-deploy-demo
|
||||||
securityContext:
|
securityContext:
|
||||||
capabilities:
|
capabilities:
|
||||||
|
|||||||
@@ -271,7 +271,7 @@ spec:
|
|||||||
- envFrom:
|
- envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: audio-qna-config
|
name: audio-qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
name: llm-dependency-deploy-demo
|
name: llm-dependency-deploy-demo
|
||||||
securityContext:
|
securityContext:
|
||||||
capabilities:
|
capabilities:
|
||||||
@@ -303,6 +303,14 @@ spec:
|
|||||||
value: none
|
value: none
|
||||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
value: 'true'
|
value: 'true'
|
||||||
|
- name: ENABLE_HPU_GRAPH
|
||||||
|
value: 'true'
|
||||||
|
- name: LIMIT_HPU_GRAPH
|
||||||
|
value: 'true'
|
||||||
|
- name: USE_FLASH_ATTENTION
|
||||||
|
value: 'true'
|
||||||
|
- name: FLASH_ATTENTION_RECOMPUTE
|
||||||
|
value: 'true'
|
||||||
- name: runtime
|
- name: runtime
|
||||||
value: habana
|
value: habana
|
||||||
- name: HABANA_VISIBLE_DEVICES
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
@@ -315,7 +323,7 @@ spec:
|
|||||||
volumes:
|
volumes:
|
||||||
- name: model-volume
|
- name: model-volume
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /home/sdp/cesg
|
path: /mnt/models
|
||||||
type: Directory
|
type: Directory
|
||||||
- name: shm
|
- name: shm
|
||||||
emptyDir:
|
emptyDir:
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
|||||||
service_list="audioqna whisper-gaudi asr llm-tgi speecht5-gaudi tts"
|
service_list="audioqna whisper-gaudi asr llm-tgi speecht5-gaudi tts"
|
||||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||||
|
|
||||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
docker images && sleep 1s
|
docker images && sleep 1s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
|||||||
service_list="audioqna whisper asr llm-tgi speecht5 tts"
|
service_list="audioqna whisper asr llm-tgi speecht5 tts"
|
||||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||||
|
|
||||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
docker images && sleep 1s
|
docker images && sleep 1s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ function validate_audioqa() {
|
|||||||
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
|
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
|
||||||
echo "$CLIENT_POD"
|
echo "$CLIENT_POD"
|
||||||
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
|
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
|
||||||
byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
|
byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
|
||||||
echo "$byte_str" > $LOG_PATH/curl_audioqa.log
|
echo "$byte_str" > $LOG_PATH/curl_audioqa.log
|
||||||
if [ -z "$byte_str" ]; then
|
if [ -z "$byte_str" ]; then
|
||||||
echo "audioqa failed, please check the logs in ${LOG_PATH}!"
|
echo "audioqa failed, please check the logs in ${LOG_PATH}!"
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ function validate_audioqa() {
|
|||||||
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
|
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
|
||||||
echo "$CLIENT_POD"
|
echo "$CLIENT_POD"
|
||||||
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
|
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
|
||||||
byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
|
byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
|
||||||
echo "$byte_str" > $LOG_PATH/curl_audioqa.log
|
echo "$byte_str" > $LOG_PATH/curl_audioqa.log
|
||||||
if [ -z "$byte_str" ]; then
|
if [ -z "$byte_str" ]; then
|
||||||
echo "audioqa failed, please check the logs in ${LOG_PATH}!"
|
echo "audioqa failed, please check the logs in ${LOG_PATH}!"
|
||||||
|
|||||||
@@ -22,7 +22,6 @@ RUN pip install --no-cache-dir --upgrade pip && \
|
|||||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||||
|
|
||||||
COPY ./chatqna.py /home/user/chatqna.py
|
COPY ./chatqna.py /home/user/chatqna.py
|
||||||
COPY ./gateway.py /home/user/gateway.py
|
|
||||||
|
|
||||||
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
|
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,8 @@ Quick Start Deployment Steps:
|
|||||||
2. Run Docker Compose.
|
2. Run Docker Compose.
|
||||||
3. Consume the ChatQnA Service.
|
3. Consume the ChatQnA Service.
|
||||||
|
|
||||||
|
Note: If you do not have docker installed you can run this script to install docker : `bash docker_compose/install_docker.sh`
|
||||||
|
|
||||||
### Quick Start: 1.Setup Environment Variable
|
### Quick Start: 1.Setup Environment Variable
|
||||||
|
|
||||||
To set up environment variables for deploying ChatQnA services, follow these steps:
|
To set up environment variables for deploying ChatQnA services, follow these steps:
|
||||||
@@ -53,6 +55,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste
|
|||||||
### Quick Start: 2.Run Docker Compose
|
### Quick Start: 2.Run Docker Compose
|
||||||
|
|
||||||
Select the compose.yaml file that matches your hardware.
|
Select the compose.yaml file that matches your hardware.
|
||||||
|
|
||||||
CPU example:
|
CPU example:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -69,9 +72,13 @@ docker pull opea/chatqna:latest
|
|||||||
docker pull opea/chatqna-ui:latest
|
docker pull opea/chatqna-ui:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
If you want to build docker by yourself, please refer to `built from source`: [Guide](docker_compose/intel/cpu/xeon/README.md).
|
In following cases, you could build docker image from source by yourself.
|
||||||
|
|
||||||
> Note: The optional docker image **opea/chatqna-without-rerank:latest** has not been published yet, users need to build this docker image from source.
|
- Failed to download the docker image.
|
||||||
|
|
||||||
|
- If you want to use a specific version of Docker image.
|
||||||
|
|
||||||
|
Please refer to the 'Build Docker Images' in [Guide](docker_compose/intel/cpu/xeon/README.md).
|
||||||
|
|
||||||
### QuickStart: 3.Consume the ChatQnA Service
|
### QuickStart: 3.Consume the ChatQnA Service
|
||||||
|
|
||||||
@@ -235,7 +242,7 @@ Refer to the [Kubernetes Guide](./kubernetes/intel/README.md) for instructions o
|
|||||||
|
|
||||||
Install Helm (version >= 3.15) first. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
Install Helm (version >= 3.15) first. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||||
|
|
||||||
Refer to the [ChatQnA helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi.
|
Refer to the [ChatQnA helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna/README.md) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi.
|
||||||
|
|
||||||
### Deploy ChatQnA on AI PC
|
### Deploy ChatQnA on AI PC
|
||||||
|
|
||||||
@@ -245,7 +252,9 @@ Refer to the [AI PC Guide](./docker_compose/intel/cpu/aipc/README.md) for instru
|
|||||||
|
|
||||||
Refer to the [Intel Technology enabling for Openshift readme](https://github.com/intel/intel-technology-enabling-for-openshift/blob/main/workloads/opea/chatqna/README.md) for instructions to deploy ChatQnA prototype on RHOCP with [Red Hat OpenShift AI (RHOAI)](https://www.redhat.com/en/technologies/cloud-computing/openshift/openshift-ai).
|
Refer to the [Intel Technology enabling for Openshift readme](https://github.com/intel/intel-technology-enabling-for-openshift/blob/main/workloads/opea/chatqna/README.md) for instructions to deploy ChatQnA prototype on RHOCP with [Red Hat OpenShift AI (RHOAI)](https://www.redhat.com/en/technologies/cloud-computing/openshift/openshift-ai).
|
||||||
|
|
||||||
## Consume ChatQnA Service
|
## Consume ChatQnA Service with RAG
|
||||||
|
|
||||||
|
### Check Service Status
|
||||||
|
|
||||||
Before consuming ChatQnA Service, make sure the TGI/vLLM service is ready (which takes up to 2 minutes to start).
|
Before consuming ChatQnA Service, make sure the TGI/vLLM service is ready (which takes up to 2 minutes to start).
|
||||||
|
|
||||||
@@ -260,6 +269,23 @@ Consume ChatQnA service until you get the TGI response like below.
|
|||||||
2024-09-03T02:47:53.402023Z INFO text_generation_router::server: router/src/server.rs:2311: Connected
|
2024-09-03T02:47:53.402023Z INFO text_generation_router::server: router/src/server.rs:2311: Connected
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Upload RAG Files (Optional)
|
||||||
|
|
||||||
|
To chat with retrieved information, you need to upload a file using `Dataprep` service.
|
||||||
|
|
||||||
|
Here is an example of `Nike 2023` pdf.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# download pdf file
|
||||||
|
wget https://raw.githubusercontent.com/opea-project/GenAIComps/main/comps/retrievers/redis/data/nke-10k-2023.pdf
|
||||||
|
# upload pdf file with dataprep
|
||||||
|
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
||||||
|
-H "Content-Type: multipart/form-data" \
|
||||||
|
-F "files=@./nke-10k-2023.pdf"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Consume Chat Service
|
||||||
|
|
||||||
Two ways of consuming ChatQnA Service:
|
Two ways of consuming ChatQnA Service:
|
||||||
|
|
||||||
1. Use cURL command on terminal
|
1. Use cURL command on terminal
|
||||||
@@ -282,7 +308,7 @@ Two ways of consuming ChatQnA Service:
|
|||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker_compose/intel/cpu/xeon#validate-microservices) first. A simple example:
|
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker_compose/intel/cpu/xeon/README.md#validate-microservices) first. A simple example:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
http_proxy="" curl ${host_ip}:6006/embed -X POST -d '{"inputs":"What is Deep Learning?"}' -H 'Content-Type: application/json'
|
http_proxy="" curl ${host_ip}:6006/embed -X POST -d '{"inputs":"What is Deep Learning?"}' -H 'Content-Type: application/json'
|
||||||
|
|||||||
@@ -275,10 +275,6 @@ spec:
|
|||||||
- '2048'
|
- '2048'
|
||||||
- --max-total-tokens
|
- --max-total-tokens
|
||||||
- '4096'
|
- '4096'
|
||||||
- --max-batch-total-tokens
|
|
||||||
- '65536'
|
|
||||||
- --max-batch-prefill-tokens
|
|
||||||
- '4096'
|
|
||||||
env:
|
env:
|
||||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
value: none
|
value: none
|
||||||
@@ -275,10 +275,6 @@ spec:
|
|||||||
- '2048'
|
- '2048'
|
||||||
- --max-total-tokens
|
- --max-total-tokens
|
||||||
- '4096'
|
- '4096'
|
||||||
- --max-batch-total-tokens
|
|
||||||
- '65536'
|
|
||||||
- --max-batch-prefill-tokens
|
|
||||||
- '4096'
|
|
||||||
env:
|
env:
|
||||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
value: none
|
value: none
|
||||||
@@ -275,10 +275,6 @@ spec:
|
|||||||
- '2048'
|
- '2048'
|
||||||
- --max-total-tokens
|
- --max-total-tokens
|
||||||
- '4096'
|
- '4096'
|
||||||
- --max-batch-total-tokens
|
|
||||||
- '65536'
|
|
||||||
- --max-batch-prefill-tokens
|
|
||||||
- '4096'
|
|
||||||
env:
|
env:
|
||||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
value: none
|
value: none
|
||||||
@@ -336,10 +336,6 @@ spec:
|
|||||||
- '2048'
|
- '2048'
|
||||||
- --max-total-tokens
|
- --max-total-tokens
|
||||||
- '4096'
|
- '4096'
|
||||||
- --max-batch-total-tokens
|
|
||||||
- '65536'
|
|
||||||
- --max-batch-prefill-tokens
|
|
||||||
- '4096'
|
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- mountPath: /data
|
- mountPath: /data
|
||||||
name: model-volume
|
name: model-volume
|
||||||
@@ -336,10 +336,6 @@ spec:
|
|||||||
- '2048'
|
- '2048'
|
||||||
- --max-total-tokens
|
- --max-total-tokens
|
||||||
- '4096'
|
- '4096'
|
||||||
- --max-batch-total-tokens
|
|
||||||
- '65536'
|
|
||||||
- --max-batch-prefill-tokens
|
|
||||||
- '4096'
|
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- mountPath: /data
|
- mountPath: /data
|
||||||
name: model-volume
|
name: model-volume
|
||||||
@@ -336,10 +336,6 @@ spec:
|
|||||||
- '2048'
|
- '2048'
|
||||||
- --max-total-tokens
|
- --max-total-tokens
|
||||||
- '4096'
|
- '4096'
|
||||||
- --max-batch-total-tokens
|
|
||||||
- '65536'
|
|
||||||
- --max-batch-prefill-tokens
|
|
||||||
- '4096'
|
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- mountPath: /data
|
- mountPath: /data
|
||||||
name: model-volume
|
name: model-volume
|
||||||
@@ -29,6 +29,8 @@ Results will be displayed in the terminal and saved as CSV file named `1_stats.c
|
|||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|
||||||
|
We recommend using Kubernetes to deploy the ChatQnA service, as it offers benefits such as load balancing and improved scalability. However, you can also deploy the service using Docker if that better suits your needs. Below is a description of Kubernetes deployment and benchmarking. For instructions on deploying and benchmarking with Docker, please refer to [this section](#benchmark-with-docker).
|
||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
|
|
||||||
- Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md).
|
- Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md).
|
||||||
@@ -67,7 +69,7 @@ We have created the [BKC manifest](https://github.com/opea-project/GenAIExamples
|
|||||||
```bash
|
```bash
|
||||||
# on k8s-master node
|
# on k8s-master node
|
||||||
git clone https://github.com/opea-project/GenAIExamples.git
|
git clone https://github.com/opea-project/GenAIExamples.git
|
||||||
cd GenAIExamples/ChatQnA/benchmark
|
cd GenAIExamples/ChatQnA/benchmark/performance
|
||||||
|
|
||||||
# replace the image tag from latest to v0.9 since we want to test with v0.9 release
|
# replace the image tag from latest to v0.9 since we want to test with v0.9 release
|
||||||
IMAGE_TAG=v0.9
|
IMAGE_TAG=v0.9
|
||||||
@@ -88,7 +90,7 @@ find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL
|
|||||||
|
|
||||||
### Benchmark tool preparation
|
### Benchmark tool preparation
|
||||||
|
|
||||||
The test uses the [benchmark tool](https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark) to do performance test. We need to set up benchmark tool at the master node of Kubernetes which is k8s-master.
|
The test uses the [benchmark tool](https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark/README.md) to do performance test. We need to set up benchmark tool at the master node of Kubernetes which is k8s-master.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# on k8s-master node
|
# on k8s-master node
|
||||||
@@ -144,11 +146,11 @@ kubectl label nodes k8s-worker1 node-type=chatqna-opea
|
|||||||
|
|
||||||
##### 2. Install ChatQnA
|
##### 2. Install ChatQnA
|
||||||
|
|
||||||
Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark/tuned/with_rerank/single_gaudi) and apply to K8s.
|
Go to [BKC manifest](./tuned/with_rerank/single_gaudi) and apply to K8s.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# on k8s-master node
|
# on k8s-master node
|
||||||
cd GenAIExamples/ChatQnA/benchmark/tuned/with_rerank/single_gaudi
|
cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/single_gaudi
|
||||||
kubectl apply -f .
|
kubectl apply -f .
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -187,10 +189,13 @@ curl -X POST "http://${cluster_ip}:6007/v1/dataprep" \
|
|||||||
|
|
||||||
###### 3.2 Run Benchmark Test
|
###### 3.2 Run Benchmark Test
|
||||||
|
|
||||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export USER_QUERIES="[4, 8, 16, 640]"
|
export DEPLOYMENT_TYPE="k8s"
|
||||||
|
export SERVICE_IP = None
|
||||||
|
export SERVICE_PORT = None
|
||||||
|
export USER_QUERIES="[640, 640, 640, 640]"
|
||||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_1"
|
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_1"
|
||||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||||
```
|
```
|
||||||
@@ -210,7 +215,7 @@ All the test results will come to this folder `/home/sdp/benchmark_output/node_1
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# on k8s-master node
|
# on k8s-master node
|
||||||
cd GenAIExamples/ChatQnA/benchmark/tuned/with_rerank/single_gaudi
|
cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/single_gaudi
|
||||||
kubectl delete -f .
|
kubectl delete -f .
|
||||||
kubectl label nodes k8s-worker1 node-type-
|
kubectl label nodes k8s-worker1 node-type-
|
||||||
```
|
```
|
||||||
@@ -227,30 +232,32 @@ kubectl label nodes k8s-worker1 k8s-worker2 node-type=chatqna-opea
|
|||||||
|
|
||||||
##### 2. Install ChatQnA
|
##### 2. Install ChatQnA
|
||||||
|
|
||||||
Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark/tuned/with_rerank/two_gaudi) and apply to K8s.
|
Go to [BKC manifest](./tuned/with_rerank/two_gaudi) and apply to K8s.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# on k8s-master node
|
# on k8s-master node
|
||||||
cd GenAIExamples/ChatQnA/benchmark/tuned/with_rerank/two_gaudi
|
cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/two_gaudi
|
||||||
kubectl apply -f .
|
kubectl apply -f .
|
||||||
```
|
```
|
||||||
|
|
||||||
##### 3. Run tests
|
##### 3. Run tests
|
||||||
|
|
||||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||||
|
|
||||||
```bash
|
````bash
|
||||||
export USER_QUERIES="[4, 8, 16, 1280]"
|
export DEPLOYMENT_TYPE="k8s"
|
||||||
|
export SERVICE_IP = None
|
||||||
|
export SERVICE_PORT = None
|
||||||
|
export USER_QUERIES="[1280, 1280, 1280, 1280]"
|
||||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_2"
|
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_2"
|
||||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||||
```
|
|
||||||
|
|
||||||
And then run the benchmark tool by:
|
And then run the benchmark tool by:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd GenAIEval/evals/benchmark
|
cd GenAIEval/evals/benchmark
|
||||||
python benchmark.py
|
python benchmark.py
|
||||||
```
|
````
|
||||||
|
|
||||||
##### 4. Data collection
|
##### 4. Data collection
|
||||||
|
|
||||||
@@ -276,20 +283,23 @@ kubectl label nodes k8s-master k8s-worker1 k8s-worker2 k8s-worker3 node-type=cha
|
|||||||
|
|
||||||
##### 2. Install ChatQnA
|
##### 2. Install ChatQnA
|
||||||
|
|
||||||
Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark/tuned/with_rerank/four_gaudi) and apply to K8s.
|
Go to [BKC manifest](./tuned/with_rerank/four_gaudi) and apply to K8s.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# on k8s-master node
|
# on k8s-master node
|
||||||
cd GenAIExamples/ChatQnA/benchmark/tuned/with_rerank/four_gaudi
|
cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/four_gaudi
|
||||||
kubectl apply -f .
|
kubectl apply -f .
|
||||||
```
|
```
|
||||||
|
|
||||||
##### 3. Run tests
|
##### 3. Run tests
|
||||||
|
|
||||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export USER_QUERIES="[4, 8, 16, 2560]"
|
export DEPLOYMENT_TYPE="k8s"
|
||||||
|
export SERVICE_IP = None
|
||||||
|
export SERVICE_PORT = None
|
||||||
|
export USER_QUERIES="[2560, 2560, 2560, 2560]"
|
||||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_4"
|
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_4"
|
||||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||||
```
|
```
|
||||||
@@ -309,11 +319,84 @@ All the test results will come to this folder `/home/sdp/benchmark_output/node_4
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# on k8s-master node
|
# on k8s-master node
|
||||||
cd GenAIExamples/ChatQnA/benchmark/tuned/with_rerank/single_gaudi
|
cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/single_gaudi
|
||||||
kubectl delete -f .
|
kubectl delete -f .
|
||||||
kubectl label nodes k8s-master k8s-worker1 k8s-worker2 k8s-worker3 node-type-
|
kubectl label nodes k8s-master k8s-worker1 k8s-worker2 k8s-worker3 node-type-
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 6. Results
|
## Benchmark with Docker
|
||||||
|
|
||||||
Check OOB performance data [here](/opea_release_data.md#chatqna), tuned performance data will be released soon.
|
### Deploy ChatQnA service with Docker
|
||||||
|
|
||||||
|
In order to set up the environment correctly, you'll need to configure essential environment variables and, if applicable, proxy-related variables.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Example: host_ip="192.168.1.1"
|
||||||
|
export host_ip="External_Public_IP"
|
||||||
|
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||||
|
export no_proxy="Your_No_Proxy"
|
||||||
|
export http_proxy="Your_HTTP_Proxy"
|
||||||
|
export https_proxy="Your_HTTPs_Proxy"
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Deploy ChatQnA on Gaudi
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
Refer to the [Gaudi Guide](../../docker_compose/intel/hpu/gaudi/README.md) to build docker images from source.
|
||||||
|
|
||||||
|
#### Deploy ChatQnA on Xeon
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
Refer to the [Xeon Guide](../../docker_compose/intel/cpu/xeon/README.md) for more instructions on building docker images from source.
|
||||||
|
|
||||||
|
#### Deploy ChatQnA on NVIDIA GPU
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd GenAIExamples/ChatQnA/docker_compose/nvidia/gpu/
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
Refer to the [NVIDIA GPU Guide](../../docker_compose/nvidia/gpu/README.md) for more instructions on building docker images from source.
|
||||||
|
|
||||||
|
### Run tests
|
||||||
|
|
||||||
|
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export DEPLOYMENT_TYPE="docker"
|
||||||
|
export SERVICE_IP = "ChatQnA Service IP"
|
||||||
|
export SERVICE_PORT = "ChatQnA Service Port"
|
||||||
|
export USER_QUERIES="[640, 640, 640, 640]"
|
||||||
|
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/docker"
|
||||||
|
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
And then run the benchmark tool by:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd GenAIEval/evals/benchmark
|
||||||
|
python benchmark.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Data collection
|
||||||
|
|
||||||
|
All the test results will come to this folder `/home/sdp/benchmark_output/docker` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
|
||||||
|
|
||||||
|
### Clean up
|
||||||
|
|
||||||
|
Take gaudi as example, use the below command to clean up system.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd GenAIExamples/docker_compose/intel/hpu/gaudi
|
||||||
|
docker compose stop && docker compose rm -f
|
||||||
|
echo y | docker system prune
|
||||||
|
```
|
||||||
@@ -3,6 +3,9 @@
|
|||||||
|
|
||||||
test_suite_config: # Overall configuration settings for the test suite
|
test_suite_config: # Overall configuration settings for the test suite
|
||||||
examples: ["chatqna"] # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
|
examples: ["chatqna"] # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
|
||||||
|
deployment_type: ${DEPLOYMENT_TYPE} # Default is "k8s", can also be "docker"
|
||||||
|
service_ip: ${SERVICE_IP} # Leave as None for k8s, specify for Docker
|
||||||
|
service_port: ${SERVICE_PORT} # Leave as None for k8s, specify for Docker
|
||||||
concurrent_level: 5 # The concurrency level, adjustable based on requirements
|
concurrent_level: 5 # The concurrency level, adjustable based on requirements
|
||||||
user_queries: ${USER_QUERIES} # Number of test requests at each concurrency level
|
user_queries: ${USER_QUERIES} # Number of test requests at each concurrency level
|
||||||
random_prompt: false # Use random prompts if true, fixed prompts if false
|
random_prompt: false # Use random prompts if true, fixed prompts if false
|
||||||
@@ -41,7 +44,7 @@ test_cases:
|
|||||||
run_test: false
|
run_test: false
|
||||||
service_name: "llm-svc" # Replace with your service name
|
service_name: "llm-svc" # Replace with your service name
|
||||||
parameters:
|
parameters:
|
||||||
max_new_tokens: 128
|
max_tokens: 128
|
||||||
temperature: 0.01
|
temperature: 0.01
|
||||||
top_k: 10
|
top_k: 10
|
||||||
top_p: 0.95
|
top_p: 0.95
|
||||||
23
ChatQnA/benchmark/performance/helm_charts/.helmignore
Normal file
23
ChatQnA/benchmark/performance/helm_charts/.helmignore
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
# Patterns to ignore when building packages.
|
||||||
|
# This supports shell glob matching, relative path matching, and
|
||||||
|
# negation (prefixed with !). Only one pattern per line.
|
||||||
|
.DS_Store
|
||||||
|
# Common VCS dirs
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
.bzr/
|
||||||
|
.bzrignore
|
||||||
|
.hg/
|
||||||
|
.hgignore
|
||||||
|
.svn/
|
||||||
|
# Common backup files
|
||||||
|
*.swp
|
||||||
|
*.bak
|
||||||
|
*.tmp
|
||||||
|
*.orig
|
||||||
|
*~
|
||||||
|
# Various IDEs
|
||||||
|
.project
|
||||||
|
.idea/
|
||||||
|
*.tmproj
|
||||||
|
.vscode/
|
||||||
27
ChatQnA/benchmark/performance/helm_charts/Chart.yaml
Normal file
27
ChatQnA/benchmark/performance/helm_charts/Chart.yaml
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v2
|
||||||
|
name: chatqna-charts
|
||||||
|
description: A Helm chart for Kubernetes
|
||||||
|
|
||||||
|
# A chart can be either an 'application' or a 'library' chart.
|
||||||
|
#
|
||||||
|
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||||
|
# to be deployed.
|
||||||
|
#
|
||||||
|
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||||
|
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||||
|
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||||
|
type: application
|
||||||
|
|
||||||
|
# This is the chart version. This version number should be incremented each time you make changes
|
||||||
|
# to the chart and its templates, including the app version.
|
||||||
|
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||||
|
version: 1.0
|
||||||
|
|
||||||
|
# This is the version number of the application being deployed. This version number should be
|
||||||
|
# incremented each time you make changes to the application. Versions are not expected to
|
||||||
|
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||||
|
# It is recommended to use it with quotes.
|
||||||
|
appVersion: "1.16.0"
|
||||||
38
ChatQnA/benchmark/performance/helm_charts/README.md
Normal file
38
ChatQnA/benchmark/performance/helm_charts/README.md
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# ChatQnA Deployment
|
||||||
|
|
||||||
|
This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
|
||||||
|
|
||||||
|
## Getting Started
|
||||||
|
|
||||||
|
### Preparation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# on k8s-master node
|
||||||
|
cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
|
||||||
|
|
||||||
|
# Replace <your token> with your actual Hugging Face token and run the following command:
|
||||||
|
HUGGINGFACE_TOKEN=<your token>
|
||||||
|
find . -name '*.yaml' -type f -exec sed -i "s#\${HF_TOKEN}#${HUGGINGFACE_TOKEN}#g" {} \;
|
||||||
|
|
||||||
|
# Replace the following placeholders with the desired model IDs:
|
||||||
|
LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||||
|
EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
|
||||||
|
RERANK_MODEL_ID=BAAI/bge-reranker-base
|
||||||
|
find . -name '*.yaml' -type f -exec sed -i "s#\$(LLM_MODEL_ID)#${LLM_MODEL_ID}#g" {} \;
|
||||||
|
find . -name '*.yaml' -type f -exec sed -i "s#\$(EMBEDDING_MODEL_ID)#${EMBEDDING_MODEL_ID}#g" {} \;
|
||||||
|
find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL_ID}#g" {} \;
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### ChatQnA Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Deploy a ChatQnA pipeline using the specified YAML configuration.
|
||||||
|
# To deploy with different configurations, simply provide a different YAML file.
|
||||||
|
helm install chatqna helm_charts/ -f helm_charts/oob_single_node.yaml
|
||||||
|
|
||||||
|
# Tips: To display rendered manifests according to the given yaml.
|
||||||
|
helm template chatqna helm_charts/ -f helm_charts/oob_single_node.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
|
||||||
237
ChatQnA/benchmark/performance/helm_charts/oob_single_node.yaml
Normal file
237
ChatQnA/benchmark/performance/helm_charts/oob_single_node.yaml
Normal file
@@ -0,0 +1,237 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
config:
|
||||||
|
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
INDEX_NAME: rag-redis
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||||
|
NODE_SELECTOR: chatqna-opea
|
||||||
|
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||||
|
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||||
|
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||||
|
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||||
|
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||||
|
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||||
|
|
||||||
|
deployments:
|
||||||
|
- name: chatqna-backend-server-deploy
|
||||||
|
spec:
|
||||||
|
image_name: opea/chatqna-no-wrapper
|
||||||
|
image_tag: latest
|
||||||
|
replicas: 1
|
||||||
|
ports:
|
||||||
|
- containerPort: 8888
|
||||||
|
|
||||||
|
- name: dataprep-deploy
|
||||||
|
spec:
|
||||||
|
image_name: opea/dataprep-redis
|
||||||
|
image_tag: latest
|
||||||
|
replicas: 1
|
||||||
|
ports:
|
||||||
|
- containerPort: 6007
|
||||||
|
|
||||||
|
- name: vector-db
|
||||||
|
spec:
|
||||||
|
image_name: redis/redis-stack
|
||||||
|
image_tag: 7.2.0-v9
|
||||||
|
replicas: 1
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
- containerPort: 8001
|
||||||
|
|
||||||
|
- name: retriever-deploy
|
||||||
|
spec:
|
||||||
|
image_name: opea/retriever-redis
|
||||||
|
image_tag: latest
|
||||||
|
replicas: 1
|
||||||
|
ports:
|
||||||
|
- containerPort: 7000
|
||||||
|
|
||||||
|
- name: embedding-dependency-deploy
|
||||||
|
spec:
|
||||||
|
image_name: ghcr.io/huggingface/text-embeddings-inference
|
||||||
|
image_tag: cpu-1.5
|
||||||
|
replicas: 1
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
args:
|
||||||
|
- name: "--model-id"
|
||||||
|
value: $(EMBEDDING_MODEL_ID)
|
||||||
|
- name: "--auto-truncate"
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
|
||||||
|
- name: reranking-dependency-deploy
|
||||||
|
spec:
|
||||||
|
image_name: opea/tei-gaudi
|
||||||
|
image_tag: latest
|
||||||
|
replicas: 1
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
args:
|
||||||
|
- name: "--model-id"
|
||||||
|
- value: $(RERANK_MODEL_ID)
|
||||||
|
- name: "--auto-truncate"
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: "true"
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||||
|
value: "512"
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
|
||||||
|
- name: llm-dependency-deploy
|
||||||
|
spec:
|
||||||
|
image_name: ghcr.io/huggingface/tgi-gaudi
|
||||||
|
image_tag: 2.0.4
|
||||||
|
replicas: 7
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
args:
|
||||||
|
- name: "--model-id"
|
||||||
|
value: $(LLM_MODEL_ID)
|
||||||
|
- name: "--max-input-length"
|
||||||
|
value: "2048"
|
||||||
|
- name: "--max-total-tokens"
|
||||||
|
value: "4096"
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: "true"
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
|
||||||
|
services:
|
||||||
|
- name: chatqna-backend-server-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
nodePort: 30888
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
selector:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
type: NodePort
|
||||||
|
|
||||||
|
- name: dataprep-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: port1
|
||||||
|
port: 6007
|
||||||
|
targetPort: 6007
|
||||||
|
selector:
|
||||||
|
app: dataprep-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: embedding-dependency-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6006
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: llm-dependency-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9009
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: reranking-dependency-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8808
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: retriever-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7000
|
||||||
|
targetPort: 7000
|
||||||
|
selector:
|
||||||
|
app: retriever-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: vector-db
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: vector-db-service
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
- name: vector-db-insight
|
||||||
|
port: 8001
|
||||||
|
targetPort: 8001
|
||||||
|
selector:
|
||||||
|
app: vector-db
|
||||||
|
type: ClusterIP
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: qna-config
|
||||||
|
namespace: default
|
||||||
|
data:
|
||||||
|
EMBEDDING_MODEL_ID: {{ .Values.config.EMBEDDING_MODEL_ID }}
|
||||||
|
EMBEDDING_SERVER_HOST_IP: {{ .Values.config.EMBEDDING_SERVER_HOST_IP }}
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }}
|
||||||
|
INDEX_NAME: {{ .Values.config.INDEX_NAME }}
|
||||||
|
LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
|
||||||
|
LLM_SERVER_HOST_IP: {{ .Values.config.LLM_SERVER_HOST_IP }}
|
||||||
|
NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
|
||||||
|
REDIS_URL: {{ .Values.config.REDIS_URL }}
|
||||||
|
RERANK_MODEL_ID: {{ .Values.config.RERANK_MODEL_ID }}
|
||||||
|
RERANK_SERVER_HOST_IP: {{ .Values.config.RERANK_SERVER_HOST_IP }}
|
||||||
|
RETRIEVER_SERVICE_HOST_IP: {{ .Values.config.RETRIEVER_SERVICE_HOST_IP }}
|
||||||
|
TEI_EMBEDDING_ENDPOINT: {{ .Values.config.TEI_EMBEDDING_ENDPOINT }}
|
||||||
|
TEI_ENDPOINT: {{ .Values.config.TEI_ENDPOINT }}
|
||||||
|
TEI_RERANKING_ENDPOINT: {{ .Values.config.TEI_RERANKING_ENDPOINT }}
|
||||||
|
TGI_LLM_ENDPOINT: {{ .Values.config.TGI_LLM_ENDPOINT }}
|
||||||
|
---
|
||||||
@@ -0,0 +1,108 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
{{- range $deployment := .Values.deployments }}
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: {{ $deployment.name }}
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: {{ $deployment.spec.replicas }}
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: {{ $deployment.name }}
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: {{ $deployment.name }}
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
{{- if $deployment.spec.args }}
|
||||||
|
args:
|
||||||
|
{{- range $arg := $deployment.spec.args }}
|
||||||
|
{{- if $arg.name }}
|
||||||
|
- {{ $arg.name }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if $arg.value }}
|
||||||
|
- "{{ $arg.value }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- if $deployment.spec.env }}
|
||||||
|
env:
|
||||||
|
{{- range $env := $deployment.spec.env }}
|
||||||
|
- name: {{ $env.name }}
|
||||||
|
value: "{{ $env.value }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
image: {{ $deployment.spec.image_name }}:{{ $deployment.spec.image_tag }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: {{ $deployment.name }}
|
||||||
|
|
||||||
|
{{- if $deployment.spec.ports }}
|
||||||
|
ports:
|
||||||
|
{{- range $port := $deployment.spec.ports }}
|
||||||
|
{{- range $port_name, $port_id := $port }}
|
||||||
|
- {{ $port_name }}: {{ $port_id }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- if $deployment.spec.resources }}
|
||||||
|
resources:
|
||||||
|
{{- range $resourceType, $resource := $deployment.spec.resources }}
|
||||||
|
{{ $resourceType }}:
|
||||||
|
{{- range $limitType, $limit := $resource }}
|
||||||
|
{{ $limitType }}: {{ $limit }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- if $deployment.spec.volumeMounts }}
|
||||||
|
volumeMounts:
|
||||||
|
{{- range $volumeMount := $deployment.spec.volumeMounts }}
|
||||||
|
- mountPath: {{ $volumeMount.mountPath }}
|
||||||
|
name: {{ $volumeMount.name }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: {{ $deployment.name }}
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
|
||||||
|
|
||||||
|
{{- if $deployment.spec.volumes }}
|
||||||
|
volumes:
|
||||||
|
{{- range $index, $volume := $deployment.spec.volumes }}
|
||||||
|
- name: {{ $volume.name }}
|
||||||
|
{{- if $volume.hostPath }}
|
||||||
|
hostPath:
|
||||||
|
path: {{ $volume.hostPath.path }}
|
||||||
|
type: {{ $volume.hostPath.type }}
|
||||||
|
{{- else if $volume.emptyDir }}
|
||||||
|
emptyDir:
|
||||||
|
medium: {{ $volume.emptyDir.medium }}
|
||||||
|
sizeLimit: {{ $volume.emptyDir.sizeLimit }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
{{- end }}
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
{{- range $service := .Values.services }}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: {{ $service.name }}
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
{{- range $port := $service.spec.ports }}
|
||||||
|
- name: {{ $port.name }}
|
||||||
|
{{- range $port_name, $port_id := $port }}
|
||||||
|
{{- if ne $port_name "name"}}
|
||||||
|
{{ $port_name }}: {{ $port_id }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
selector:
|
||||||
|
app: {{ $service.spec.selector.app }}
|
||||||
|
type: {{ $service.spec.type }}
|
||||||
|
---
|
||||||
|
{{- end }}
|
||||||
259
ChatQnA/benchmark/performance/helm_charts/tuned_single_node.yaml
Normal file
259
ChatQnA/benchmark/performance/helm_charts/tuned_single_node.yaml
Normal file
@@ -0,0 +1,259 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
config:
|
||||||
|
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
INDEX_NAME: rag-redis
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||||
|
NODE_SELECTOR: chatqna-opea
|
||||||
|
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||||
|
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||||
|
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||||
|
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||||
|
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||||
|
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||||
|
|
||||||
|
deployments:
|
||||||
|
- name: chatqna-backend-server-deploy
|
||||||
|
spec:
|
||||||
|
image_name: opea/chatqna-no-wrapper
|
||||||
|
image_tag: latest
|
||||||
|
replicas: 2
|
||||||
|
ports:
|
||||||
|
- containerPort: 8888
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: "8"
|
||||||
|
memory: "8000Mi"
|
||||||
|
requests:
|
||||||
|
cpu: "8"
|
||||||
|
memory: "8000Mi"
|
||||||
|
|
||||||
|
- name: dataprep-deploy
|
||||||
|
spec:
|
||||||
|
image_name: opea/dataprep-redis
|
||||||
|
image_tag: latest
|
||||||
|
replicas: 1
|
||||||
|
ports:
|
||||||
|
- containerPort: 6007
|
||||||
|
|
||||||
|
- name: vector-db
|
||||||
|
spec:
|
||||||
|
image_name: redis/redis-stack
|
||||||
|
image_tag: 7.2.0-v9
|
||||||
|
replicas: 1
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
- containerPort: 8001
|
||||||
|
|
||||||
|
- name: retriever-deploy
|
||||||
|
spec:
|
||||||
|
image_name: opea/retriever-redis
|
||||||
|
image_tag: latest
|
||||||
|
replicas: 2
|
||||||
|
ports:
|
||||||
|
- containerPort: 7000
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "4"
|
||||||
|
memory: "4000Mi"
|
||||||
|
|
||||||
|
- name: embedding-dependency-deploy
|
||||||
|
spec:
|
||||||
|
image_name: ghcr.io/huggingface/text-embeddings-inference
|
||||||
|
image_tag: cpu-1.5
|
||||||
|
replicas: 1
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
args:
|
||||||
|
- name: "--model-id"
|
||||||
|
value: $(EMBEDDING_MODEL_ID)
|
||||||
|
- name: "--auto-truncate"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: "80"
|
||||||
|
memory: "20000Mi"
|
||||||
|
requests:
|
||||||
|
cpu: "80"
|
||||||
|
memory: "20000Mi"
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
|
||||||
|
- name: reranking-dependency-deploy
|
||||||
|
spec:
|
||||||
|
image_name: opea/tei-gaudi
|
||||||
|
image_tag: latest
|
||||||
|
replicas: 1
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
args:
|
||||||
|
- name: "--model-id"
|
||||||
|
- value: $(RERANK_MODEL_ID)
|
||||||
|
- name: "--auto-truncate"
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: "true"
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||||
|
value: "512"
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
|
||||||
|
- name: llm-dependency-deploy
|
||||||
|
spec:
|
||||||
|
image_name: ghcr.io/huggingface/tgi-gaudi
|
||||||
|
image_tag: 2.0.4
|
||||||
|
replicas: 7
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
args:
|
||||||
|
- name: "--model-id"
|
||||||
|
value: $(LLM_MODEL_ID)
|
||||||
|
- name: "--max-input-length"
|
||||||
|
value: "1280"
|
||||||
|
- name: "--max-total-tokens"
|
||||||
|
value: "2048"
|
||||||
|
- name: "--max-batch-total-tokens"
|
||||||
|
value: "65536"
|
||||||
|
- name: "--max-batch-prefill-tokens"
|
||||||
|
value: "4096"
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: "true"
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
|
||||||
|
services:
|
||||||
|
- name: chatqna-backend-server-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
nodePort: 30888
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
selector:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
type: NodePort
|
||||||
|
|
||||||
|
- name: dataprep-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: port1
|
||||||
|
port: 6007
|
||||||
|
targetPort: 6007
|
||||||
|
selector:
|
||||||
|
app: dataprep-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: embedding-dependency-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6006
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: llm-dependency-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9009
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: reranking-dependency-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8808
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: retriever-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7000
|
||||||
|
targetPort: 7000
|
||||||
|
selector:
|
||||||
|
app: retriever-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: vector-db
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: vector-db-service
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
- name: vector-db-insight
|
||||||
|
port: 8001
|
||||||
|
targetPort: 8001
|
||||||
|
selector:
|
||||||
|
app: vector-db
|
||||||
|
type: ClusterIP
|
||||||
237
ChatQnA/benchmark/performance/helm_charts/values.yaml
Normal file
237
ChatQnA/benchmark/performance/helm_charts/values.yaml
Normal file
@@ -0,0 +1,237 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
config:
|
||||||
|
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
INDEX_NAME: rag-redis
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||||
|
NODE_SELECTOR: chatqna-opea
|
||||||
|
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||||
|
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||||
|
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||||
|
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||||
|
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||||
|
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||||
|
|
||||||
|
deployments:
|
||||||
|
- name: chatqna-backend-server-deploy
|
||||||
|
spec:
|
||||||
|
image_name: opea/chatqna-no-wrapper
|
||||||
|
image_tag: latest
|
||||||
|
replicas: 1
|
||||||
|
ports:
|
||||||
|
- containerPort: 8888
|
||||||
|
|
||||||
|
- name: dataprep-deploy
|
||||||
|
spec:
|
||||||
|
image_name: opea/dataprep-redis
|
||||||
|
image_tag: latest
|
||||||
|
replicas: 1
|
||||||
|
ports:
|
||||||
|
- containerPort: 6007
|
||||||
|
|
||||||
|
- name: vector-db
|
||||||
|
spec:
|
||||||
|
image_name: redis/redis-stack
|
||||||
|
image_tag: 7.2.0-v9
|
||||||
|
replicas: 1
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
- containerPort: 8001
|
||||||
|
|
||||||
|
- name: retriever-deploy
|
||||||
|
spec:
|
||||||
|
image_name: opea/retriever-redis
|
||||||
|
image_tag: latest
|
||||||
|
replicas: 1
|
||||||
|
ports:
|
||||||
|
- containerPort: 7000
|
||||||
|
|
||||||
|
- name: embedding-dependency-deploy
|
||||||
|
spec:
|
||||||
|
image_name: ghcr.io/huggingface/text-embeddings-inference
|
||||||
|
image_tag: cpu-1.5
|
||||||
|
replicas: 1
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
args:
|
||||||
|
- name: "--model-id"
|
||||||
|
value: $(EMBEDDING_MODEL_ID)
|
||||||
|
- name: "--auto-truncate"
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
|
||||||
|
- name: reranking-dependency-deploy
|
||||||
|
spec:
|
||||||
|
image_name: opea/tei-gaudi
|
||||||
|
image_tag: latest
|
||||||
|
replicas: 1
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
args:
|
||||||
|
- name: "--model-id"
|
||||||
|
- value: $(RERANK_MODEL_ID)
|
||||||
|
- name: "--auto-truncate"
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: "true"
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||||
|
value: "512"
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
|
||||||
|
- name: llm-dependency-deploy
|
||||||
|
spec:
|
||||||
|
image_name: ghcr.io/huggingface/tgi-gaudi
|
||||||
|
image_tag: 2.0.4
|
||||||
|
replicas: 7
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
args:
|
||||||
|
- name: "--model-id"
|
||||||
|
value: $(LLM_MODEL_ID)
|
||||||
|
- name: "--max-input-length"
|
||||||
|
value: "2048"
|
||||||
|
- name: "--max-total-tokens"
|
||||||
|
value: "4096"
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: "true"
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
|
||||||
|
services:
|
||||||
|
- name: chatqna-backend-server-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
nodePort: 30888
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
selector:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
type: NodePort
|
||||||
|
|
||||||
|
- name: dataprep-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: port1
|
||||||
|
port: 6007
|
||||||
|
targetPort: 6007
|
||||||
|
selector:
|
||||||
|
app: dataprep-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: embedding-dependency-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6006
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: llm-dependency-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9009
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: reranking-dependency-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8808
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: retriever-svc
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7000
|
||||||
|
targetPort: 7000
|
||||||
|
selector:
|
||||||
|
app: retriever-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
|
||||||
|
- name: vector-db
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: vector-db-service
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
- name: vector-db-insight
|
||||||
|
port: 8001
|
||||||
|
targetPort: 8001
|
||||||
|
selector:
|
||||||
|
app: vector-db
|
||||||
|
type: ClusterIP
|
||||||
@@ -237,7 +237,7 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: qna-config
|
name: qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: llm-dependency-deploy
|
name: llm-dependency-deploy
|
||||||
ports:
|
ports:
|
||||||
@@ -237,7 +237,7 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: qna-config
|
name: qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: llm-dependency-deploy
|
name: llm-dependency-deploy
|
||||||
ports:
|
ports:
|
||||||
@@ -237,7 +237,7 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: qna-config
|
name: qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: llm-dependency-deploy
|
name: llm-dependency-deploy
|
||||||
ports:
|
ports:
|
||||||
@@ -237,7 +237,7 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: qna-config
|
name: qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: llm-dependency-deploy
|
name: llm-dependency-deploy
|
||||||
ports:
|
ports:
|
||||||
@@ -237,7 +237,7 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: qna-config
|
name: qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: llm-dependency-deploy
|
name: llm-dependency-deploy
|
||||||
ports:
|
ports:
|
||||||
@@ -237,7 +237,7 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: qna-config
|
name: qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: llm-dependency-deploy
|
name: llm-dependency-deploy
|
||||||
ports:
|
ports:
|
||||||
@@ -237,7 +237,7 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: qna-config
|
name: qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: llm-dependency-deploy
|
name: llm-dependency-deploy
|
||||||
ports:
|
ports:
|
||||||
@@ -237,7 +237,7 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: qna-config
|
name: qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: llm-dependency-deploy
|
name: llm-dependency-deploy
|
||||||
ports:
|
ports:
|
||||||
@@ -0,0 +1,641 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
data:
|
||||||
|
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
INDEX_NAME: rag-redis
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
LLM_SERVICE_HOST_IP: llm-svc
|
||||||
|
NODE_SELECTOR: chatqna-opea
|
||||||
|
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||||
|
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||||
|
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||||
|
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||||
|
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||||
|
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: qna-config
|
||||||
|
namespace: default
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/chatqna:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 8888
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
nodePort: 30888
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
selector:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
type: NodePort
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: dataprep-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/dataprep-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: dataprep-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 6007
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: dataprep-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: port1
|
||||||
|
port: 6007
|
||||||
|
targetPort: 6007
|
||||||
|
selector:
|
||||||
|
app: dataprep-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(EMBEDDING_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6006
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/embedding-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 6000
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: embedding-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6000
|
||||||
|
targetPort: 6000
|
||||||
|
selector:
|
||||||
|
app: embedding-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 31
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(LLM_MODEL_ID)
|
||||||
|
- --max-input-length
|
||||||
|
- '2048'
|
||||||
|
- --max-total-tokens
|
||||||
|
- '4096'
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- SYS_NICE
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9009
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/llm-tgi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: llm-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
selector:
|
||||||
|
app: llm-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: reranking-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(RERANK_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||||
|
value: '512'
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/tei-gaudi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: reranking-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: reranking-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8808
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: reranking-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: reranking-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/reranking-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: reranking-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 8000
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: reranking-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8000
|
||||||
|
targetPort: 8000
|
||||||
|
selector:
|
||||||
|
app: reranking-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: retriever-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: retriever-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/retriever-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: retriever-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 7000
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: retriever-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7000
|
||||||
|
targetPort: 7000
|
||||||
|
selector:
|
||||||
|
app: retriever-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: vector-db
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: redis/redis-stack:7.2.0-v9
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: vector-db
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
- containerPort: 8001
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: vector-db-service
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
- name: vector-db-insight
|
||||||
|
port: 8001
|
||||||
|
targetPort: 8001
|
||||||
|
selector:
|
||||||
|
app: vector-db
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
@@ -0,0 +1,641 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
data:
|
||||||
|
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
INDEX_NAME: rag-redis
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
LLM_SERVICE_HOST_IP: llm-svc
|
||||||
|
NODE_SELECTOR: chatqna-opea
|
||||||
|
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||||
|
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||||
|
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||||
|
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||||
|
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||||
|
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: qna-config
|
||||||
|
namespace: default
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/chatqna:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 8888
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
nodePort: 30888
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
selector:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
type: NodePort
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: dataprep-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/dataprep-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: dataprep-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 6007
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: dataprep-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: port1
|
||||||
|
port: 6007
|
||||||
|
targetPort: 6007
|
||||||
|
selector:
|
||||||
|
app: dataprep-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(EMBEDDING_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6006
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/embedding-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 6000
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: embedding-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6000
|
||||||
|
targetPort: 6000
|
||||||
|
selector:
|
||||||
|
app: embedding-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 7
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(LLM_MODEL_ID)
|
||||||
|
- --max-input-length
|
||||||
|
- '2048'
|
||||||
|
- --max-total-tokens
|
||||||
|
- '4096'
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- SYS_NICE
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9009
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/llm-tgi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: llm-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
selector:
|
||||||
|
app: llm-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: reranking-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(RERANK_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||||
|
value: '512'
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/tei-gaudi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: reranking-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: reranking-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8808
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: reranking-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: reranking-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/reranking-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: reranking-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 8000
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: reranking-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8000
|
||||||
|
targetPort: 8000
|
||||||
|
selector:
|
||||||
|
app: reranking-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: retriever-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: retriever-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/retriever-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: retriever-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 7000
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: retriever-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7000
|
||||||
|
targetPort: 7000
|
||||||
|
selector:
|
||||||
|
app: retriever-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: vector-db
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: redis/redis-stack:7.2.0-v9
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: vector-db
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
- containerPort: 8001
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: vector-db-service
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
- name: vector-db-insight
|
||||||
|
port: 8001
|
||||||
|
targetPort: 8001
|
||||||
|
selector:
|
||||||
|
app: vector-db
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
@@ -0,0 +1,641 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
data:
|
||||||
|
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
INDEX_NAME: rag-redis
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
LLM_SERVICE_HOST_IP: llm-svc
|
||||||
|
NODE_SELECTOR: chatqna-opea
|
||||||
|
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||||
|
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||||
|
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||||
|
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||||
|
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||||
|
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: qna-config
|
||||||
|
namespace: default
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/chatqna:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 8888
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
nodePort: 30888
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
selector:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
type: NodePort
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: dataprep-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/dataprep-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: dataprep-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 6007
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: dataprep-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: port1
|
||||||
|
port: 6007
|
||||||
|
targetPort: 6007
|
||||||
|
selector:
|
||||||
|
app: dataprep-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(EMBEDDING_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6006
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/embedding-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 6000
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: embedding-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6000
|
||||||
|
targetPort: 6000
|
||||||
|
selector:
|
||||||
|
app: embedding-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 15
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(LLM_MODEL_ID)
|
||||||
|
- --max-input-length
|
||||||
|
- '2048'
|
||||||
|
- --max-total-tokens
|
||||||
|
- '4096'
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- SYS_NICE
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9009
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/llm-tgi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: llm-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
selector:
|
||||||
|
app: llm-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: reranking-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(RERANK_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||||
|
value: '512'
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/tei-gaudi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: reranking-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: reranking-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8808
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: reranking-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: reranking-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/reranking-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: reranking-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 8000
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: reranking-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8000
|
||||||
|
targetPort: 8000
|
||||||
|
selector:
|
||||||
|
app: reranking-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: retriever-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: retriever-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/retriever-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: retriever-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 7000
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: retriever-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7000
|
||||||
|
targetPort: 7000
|
||||||
|
selector:
|
||||||
|
app: retriever-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: vector-db
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: redis/redis-stack:7.2.0-v9
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: vector-db
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
- containerPort: 8001
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: vector-db-service
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
- name: vector-db-insight
|
||||||
|
port: 8001
|
||||||
|
targetPort: 8001
|
||||||
|
selector:
|
||||||
|
app: vector-db
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
@@ -0,0 +1,730 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: qna-config
|
||||||
|
namespace: default
|
||||||
|
data:
|
||||||
|
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||||
|
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||||
|
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||||
|
INDEX_NAME: rag-redis
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||||
|
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||||
|
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||||
|
NODE_SELECTOR: chatqna-opea
|
||||||
|
LLM_SERVICE_HOST_IP: llm-svc
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/chatqna-without-rerank:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 8888
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: NodePort
|
||||||
|
selector:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
nodePort: 30888
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: dataprep-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: REDIS_URL
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: REDIS_URL
|
||||||
|
- name: TEI_ENDPOINT
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: TEI_EMBEDDING_ENDPOINT
|
||||||
|
- name: INDEX_NAME
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: INDEX_NAME
|
||||||
|
image: opea/dataprep-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: dataprep-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 6007
|
||||||
|
- containerPort: 6008
|
||||||
|
- containerPort: 6009
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: dataprep-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: dataprep-deploy
|
||||||
|
ports:
|
||||||
|
- name: port1
|
||||||
|
port: 6007
|
||||||
|
targetPort: 6007
|
||||||
|
- name: port2
|
||||||
|
port: 6008
|
||||||
|
targetPort: 6008
|
||||||
|
- name: port3
|
||||||
|
port: 6009
|
||||||
|
targetPort: 6009
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
args:
|
||||||
|
- --model-id
|
||||||
|
- $(EMBEDDING_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
serviceAccountName: default
|
||||||
|
volumes:
|
||||||
|
- name: model-volume
|
||||||
|
hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
- name: shm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6006
|
||||||
|
targetPort: 80
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/embedding-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 6000
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: embedding-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: embedding-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6000
|
||||||
|
targetPort: 6000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 32
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||||
|
name: llm-dependency-deploy-demo
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- SYS_NICE
|
||||||
|
args:
|
||||||
|
- --model-id
|
||||||
|
- $(LLM_MODEL_ID)
|
||||||
|
- --max-input-length
|
||||||
|
- '2048'
|
||||||
|
- --max-total-tokens
|
||||||
|
- '4096'
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
serviceAccountName: default
|
||||||
|
volumes:
|
||||||
|
- name: model-volume
|
||||||
|
hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
- name: shm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9009
|
||||||
|
targetPort: 80
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/llm-tgi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: llm-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: llm-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: reranking-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/tei-gaudi:latest
|
||||||
|
name: reranking-dependency-deploy
|
||||||
|
args:
|
||||||
|
- --model-id
|
||||||
|
- $(RERANK_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||||
|
value: '512'
|
||||||
|
serviceAccountName: default
|
||||||
|
volumes:
|
||||||
|
- name: model-volume
|
||||||
|
hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
- name: shm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: reranking-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8808
|
||||||
|
targetPort: 80
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: reranking-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: reranking-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/reranking-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: reranking-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 8000
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: reranking-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: reranking-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8000
|
||||||
|
targetPort: 8000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: retriever-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: retriever-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: REDIS_URL
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: REDIS_URL
|
||||||
|
- name: TEI_EMBEDDING_ENDPOINT
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: TEI_EMBEDDING_ENDPOINT
|
||||||
|
- name: HUGGINGFACEHUB_API_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: HUGGINGFACEHUB_API_TOKEN
|
||||||
|
- name: INDEX_NAME
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: INDEX_NAME
|
||||||
|
image: opea/retriever-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: retriever-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 7000
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: retriever-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: retriever-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7000
|
||||||
|
targetPort: 7000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: vector-db
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
containers:
|
||||||
|
- name: vector-db
|
||||||
|
image: redis/redis-stack:7.2.0-v9
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
- containerPort: 8001
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: vector-db
|
||||||
|
ports:
|
||||||
|
- name: vector-db-service
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
- name: vector-db-insight
|
||||||
|
port: 8001
|
||||||
|
targetPort: 8001
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
@@ -0,0 +1,579 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: qna-config
|
||||||
|
namespace: default
|
||||||
|
data:
|
||||||
|
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||||
|
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||||
|
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||||
|
INDEX_NAME: rag-redis
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||||
|
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||||
|
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||||
|
NODE_SELECTOR: chatqna-opea
|
||||||
|
LLM_SERVICE_HOST_IP: llm-svc
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/chatqna-without-rerank:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 8888
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: NodePort
|
||||||
|
selector:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
nodePort: 30888
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: dataprep-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: REDIS_URL
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: REDIS_URL
|
||||||
|
- name: TEI_ENDPOINT
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: TEI_EMBEDDING_ENDPOINT
|
||||||
|
- name: INDEX_NAME
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: INDEX_NAME
|
||||||
|
image: opea/dataprep-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: dataprep-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 6007
|
||||||
|
- containerPort: 6008
|
||||||
|
- containerPort: 6009
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: dataprep-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: dataprep-deploy
|
||||||
|
ports:
|
||||||
|
- name: port1
|
||||||
|
port: 6007
|
||||||
|
targetPort: 6007
|
||||||
|
- name: port2
|
||||||
|
port: 6008
|
||||||
|
targetPort: 6008
|
||||||
|
- name: port3
|
||||||
|
port: 6009
|
||||||
|
targetPort: 6009
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
args:
|
||||||
|
- --model-id
|
||||||
|
- $(EMBEDDING_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
serviceAccountName: default
|
||||||
|
volumes:
|
||||||
|
- name: model-volume
|
||||||
|
hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
- name: shm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6006
|
||||||
|
targetPort: 80
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/embedding-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 6000
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: embedding-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: embedding-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6000
|
||||||
|
targetPort: 6000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 8
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||||
|
name: llm-dependency-deploy-demo
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- SYS_NICE
|
||||||
|
args:
|
||||||
|
- --model-id
|
||||||
|
- $(LLM_MODEL_ID)
|
||||||
|
- --max-input-length
|
||||||
|
- '2048'
|
||||||
|
- --max-total-tokens
|
||||||
|
- '4096'
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
serviceAccountName: default
|
||||||
|
volumes:
|
||||||
|
- name: model-volume
|
||||||
|
hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
- name: shm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9009
|
||||||
|
targetPort: 80
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/llm-tgi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: llm-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: llm-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: retriever-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: retriever-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: REDIS_URL
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: REDIS_URL
|
||||||
|
- name: TEI_EMBEDDING_ENDPOINT
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: TEI_EMBEDDING_ENDPOINT
|
||||||
|
- name: HUGGINGFACEHUB_API_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: HUGGINGFACEHUB_API_TOKEN
|
||||||
|
- name: INDEX_NAME
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: INDEX_NAME
|
||||||
|
image: opea/retriever-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: retriever-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 7000
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: retriever-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: retriever-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7000
|
||||||
|
targetPort: 7000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: vector-db
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
containers:
|
||||||
|
- name: vector-db
|
||||||
|
image: redis/redis-stack:7.2.0-v9
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
- containerPort: 8001
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: vector-db
|
||||||
|
ports:
|
||||||
|
- name: vector-db-service
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
- name: vector-db-insight
|
||||||
|
port: 8001
|
||||||
|
targetPort: 8001
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
@@ -0,0 +1,579 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: qna-config
|
||||||
|
namespace: default
|
||||||
|
data:
|
||||||
|
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||||
|
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||||
|
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||||
|
INDEX_NAME: rag-redis
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||||
|
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||||
|
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||||
|
NODE_SELECTOR: chatqna-opea
|
||||||
|
LLM_SERVICE_HOST_IP: llm-svc
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/chatqna-without-rerank:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 8888
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: NodePort
|
||||||
|
selector:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
nodePort: 30888
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: dataprep-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: REDIS_URL
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: REDIS_URL
|
||||||
|
- name: TEI_ENDPOINT
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: TEI_EMBEDDING_ENDPOINT
|
||||||
|
- name: INDEX_NAME
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: INDEX_NAME
|
||||||
|
image: opea/dataprep-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: dataprep-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 6007
|
||||||
|
- containerPort: 6008
|
||||||
|
- containerPort: 6009
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: dataprep-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: dataprep-deploy
|
||||||
|
ports:
|
||||||
|
- name: port1
|
||||||
|
port: 6007
|
||||||
|
targetPort: 6007
|
||||||
|
- name: port2
|
||||||
|
port: 6008
|
||||||
|
targetPort: 6008
|
||||||
|
- name: port3
|
||||||
|
port: 6009
|
||||||
|
targetPort: 6009
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
args:
|
||||||
|
- --model-id
|
||||||
|
- $(EMBEDDING_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
serviceAccountName: default
|
||||||
|
volumes:
|
||||||
|
- name: model-volume
|
||||||
|
hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
- name: shm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6006
|
||||||
|
targetPort: 80
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/embedding-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 6000
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: embedding-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: embedding-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6000
|
||||||
|
targetPort: 6000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 16
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||||
|
name: llm-dependency-deploy-demo
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- SYS_NICE
|
||||||
|
args:
|
||||||
|
- --model-id
|
||||||
|
- $(LLM_MODEL_ID)
|
||||||
|
- --max-input-length
|
||||||
|
- '2048'
|
||||||
|
- --max-total-tokens
|
||||||
|
- '4096'
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
serviceAccountName: default
|
||||||
|
volumes:
|
||||||
|
- name: model-volume
|
||||||
|
hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
- name: shm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9009
|
||||||
|
targetPort: 80
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/llm-tgi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: llm-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: llm-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: retriever-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: retriever-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: REDIS_URL
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: REDIS_URL
|
||||||
|
- name: TEI_EMBEDDING_ENDPOINT
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: TEI_EMBEDDING_ENDPOINT
|
||||||
|
- name: HUGGINGFACEHUB_API_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: HUGGINGFACEHUB_API_TOKEN
|
||||||
|
- name: INDEX_NAME
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: INDEX_NAME
|
||||||
|
image: opea/retriever-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: retriever-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 7000
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: retriever-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: retriever-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7000
|
||||||
|
targetPort: 7000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: vector-db
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
containers:
|
||||||
|
- name: vector-db
|
||||||
|
image: redis/redis-stack:7.2.0-v9
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
- containerPort: 8001
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: vector-db
|
||||||
|
ports:
|
||||||
|
- name: vector-db-service
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
- name: vector-db-insight
|
||||||
|
port: 8001
|
||||||
|
targetPort: 8001
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
@@ -255,7 +255,7 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: qna-config
|
name: qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: llm-dependency-deploy
|
name: llm-dependency-deploy
|
||||||
ports:
|
ports:
|
||||||
@@ -255,7 +255,7 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: qna-config
|
name: qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: llm-dependency-deploy
|
name: llm-dependency-deploy
|
||||||
ports:
|
ports:
|
||||||
@@ -255,7 +255,7 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: qna-config
|
name: qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: llm-dependency-deploy
|
name: llm-dependency-deploy
|
||||||
ports:
|
ports:
|
||||||
@@ -255,7 +255,7 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: qna-config
|
name: qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: llm-dependency-deploy
|
name: llm-dependency-deploy
|
||||||
ports:
|
ports:
|
||||||
@@ -255,7 +255,7 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: qna-config
|
name: qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: llm-dependency-deploy
|
name: llm-dependency-deploy
|
||||||
ports:
|
ports:
|
||||||
@@ -255,7 +255,7 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: qna-config
|
name: qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: llm-dependency-deploy
|
name: llm-dependency-deploy
|
||||||
ports:
|
ports:
|
||||||
@@ -255,7 +255,7 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: qna-config
|
name: qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: llm-dependency-deploy
|
name: llm-dependency-deploy
|
||||||
ports:
|
ports:
|
||||||
@@ -255,7 +255,7 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: qna-config
|
name: qna-config
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
name: llm-dependency-deploy
|
name: llm-dependency-deploy
|
||||||
ports:
|
ports:
|
||||||
@@ -0,0 +1,675 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
data:
|
||||||
|
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
INDEX_NAME: rag-redis
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
LLM_SERVICE_HOST_IP: llm-svc
|
||||||
|
NODE_SELECTOR: chatqna-opea
|
||||||
|
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||||
|
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||||
|
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||||
|
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||||
|
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||||
|
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: qna-config
|
||||||
|
namespace: default
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 4
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/chatqna:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 8888
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 8
|
||||||
|
memory: 8000Mi
|
||||||
|
requests:
|
||||||
|
cpu: 8
|
||||||
|
memory: 8000Mi
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
nodePort: 30888
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
selector:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
type: NodePort
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: dataprep-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/dataprep-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: dataprep-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 6007
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: dataprep-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: port1
|
||||||
|
port: 6007
|
||||||
|
targetPort: 6007
|
||||||
|
selector:
|
||||||
|
app: dataprep-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 4
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(EMBEDDING_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 76
|
||||||
|
memory: 20000Mi
|
||||||
|
requests:
|
||||||
|
cpu: 76
|
||||||
|
memory: 20000Mi
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6006
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 4
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/embedding-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 6000
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
memory: 4000Mi
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: embedding-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6000
|
||||||
|
targetPort: 6000
|
||||||
|
selector:
|
||||||
|
app: embedding-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 31
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(LLM_MODEL_ID)
|
||||||
|
- --max-input-length
|
||||||
|
- '1024'
|
||||||
|
- --max-total-tokens
|
||||||
|
- '2048'
|
||||||
|
- --max-batch-total-tokens
|
||||||
|
- '65536'
|
||||||
|
- --max-batch-prefill-tokens
|
||||||
|
- '4096'
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- SYS_NICE
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9009
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 4
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/llm-tgi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
memory: 4000Mi
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: llm-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
selector:
|
||||||
|
app: llm-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: reranking-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(RERANK_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||||
|
value: '512'
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/tei-gaudi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: reranking-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: reranking-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8808
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: reranking-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 4
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: reranking-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/reranking-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: reranking-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 8000
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
memory: 4000Mi
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: reranking-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8000
|
||||||
|
targetPort: 8000
|
||||||
|
selector:
|
||||||
|
app: reranking-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: retriever-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 4
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: retriever-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/retriever-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: retriever-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 7000
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
memory: 4000Mi
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: retriever-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7000
|
||||||
|
targetPort: 7000
|
||||||
|
selector:
|
||||||
|
app: retriever-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: vector-db
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: redis/redis-stack:7.2.0-v9
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: vector-db
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
- containerPort: 8001
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: vector-db-service
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
- name: vector-db-insight
|
||||||
|
port: 8001
|
||||||
|
targetPort: 8001
|
||||||
|
selector:
|
||||||
|
app: vector-db
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
@@ -0,0 +1,675 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
data:
|
||||||
|
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
INDEX_NAME: rag-redis
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
LLM_SERVICE_HOST_IP: llm-svc
|
||||||
|
NODE_SELECTOR: chatqna-opea
|
||||||
|
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||||
|
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||||
|
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||||
|
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||||
|
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||||
|
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: qna-config
|
||||||
|
namespace: default
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/chatqna:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 8888
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 8
|
||||||
|
memory: 8000Mi
|
||||||
|
requests:
|
||||||
|
cpu: 8
|
||||||
|
memory: 8000Mi
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
nodePort: 30888
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
selector:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
type: NodePort
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: dataprep-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/dataprep-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: dataprep-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 6007
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: dataprep-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: port1
|
||||||
|
port: 6007
|
||||||
|
targetPort: 6007
|
||||||
|
selector:
|
||||||
|
app: dataprep-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(EMBEDDING_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 76
|
||||||
|
memory: 20000Mi
|
||||||
|
requests:
|
||||||
|
cpu: 76
|
||||||
|
memory: 20000Mi
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6006
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/embedding-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 6000
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
memory: 4000Mi
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: embedding-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6000
|
||||||
|
targetPort: 6000
|
||||||
|
selector:
|
||||||
|
app: embedding-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 7
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(LLM_MODEL_ID)
|
||||||
|
- --max-input-length
|
||||||
|
- '1024'
|
||||||
|
- --max-total-tokens
|
||||||
|
- '2048'
|
||||||
|
- --max-batch-total-tokens
|
||||||
|
- '65536'
|
||||||
|
- --max-batch-prefill-tokens
|
||||||
|
- '4096'
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- SYS_NICE
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9009
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/llm-tgi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
memory: 4000Mi
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: llm-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
selector:
|
||||||
|
app: llm-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: reranking-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(RERANK_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||||
|
value: '512'
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/tei-gaudi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: reranking-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: reranking-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8808
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: reranking-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: reranking-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/reranking-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: reranking-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 8000
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
memory: 4000Mi
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: reranking-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8000
|
||||||
|
targetPort: 8000
|
||||||
|
selector:
|
||||||
|
app: reranking-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: retriever-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: retriever-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/retriever-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: retriever-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 7000
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
memory: 4000Mi
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: retriever-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7000
|
||||||
|
targetPort: 7000
|
||||||
|
selector:
|
||||||
|
app: retriever-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: vector-db
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: redis/redis-stack:7.2.0-v9
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: vector-db
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
- containerPort: 8001
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: vector-db-service
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
- name: vector-db-insight
|
||||||
|
port: 8001
|
||||||
|
targetPort: 8001
|
||||||
|
selector:
|
||||||
|
app: vector-db
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
@@ -0,0 +1,675 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
data:
|
||||||
|
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
INDEX_NAME: rag-redis
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
LLM_SERVICE_HOST_IP: llm-svc
|
||||||
|
NODE_SELECTOR: chatqna-opea
|
||||||
|
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||||
|
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||||
|
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||||
|
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||||
|
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||||
|
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: qna-config
|
||||||
|
namespace: default
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/chatqna:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 8888
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 8
|
||||||
|
memory: 8000Mi
|
||||||
|
requests:
|
||||||
|
cpu: 8
|
||||||
|
memory: 8000Mi
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
nodePort: 30888
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
selector:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
type: NodePort
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: dataprep-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/dataprep-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: dataprep-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 6007
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: dataprep-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: port1
|
||||||
|
port: 6007
|
||||||
|
targetPort: 6007
|
||||||
|
selector:
|
||||||
|
app: dataprep-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(EMBEDDING_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 76
|
||||||
|
memory: 20000Mi
|
||||||
|
requests:
|
||||||
|
cpu: 76
|
||||||
|
memory: 20000Mi
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6006
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/embedding-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 6000
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
memory: 4000Mi
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: embedding-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6000
|
||||||
|
targetPort: 6000
|
||||||
|
selector:
|
||||||
|
app: embedding-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 15
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(LLM_MODEL_ID)
|
||||||
|
- --max-input-length
|
||||||
|
- '1024'
|
||||||
|
- --max-total-tokens
|
||||||
|
- '2048'
|
||||||
|
- --max-batch-total-tokens
|
||||||
|
- '65536'
|
||||||
|
- --max-batch-prefill-tokens
|
||||||
|
- '4096'
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- SYS_NICE
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9009
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/llm-tgi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
memory: 4000Mi
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: llm-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
selector:
|
||||||
|
app: llm-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: reranking-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --model-id
|
||||||
|
- $(RERANK_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||||
|
value: '512'
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/tei-gaudi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: reranking-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
name: model-volume
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
name: shm
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: reranking-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8808
|
||||||
|
targetPort: 80
|
||||||
|
selector:
|
||||||
|
app: reranking-dependency-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: reranking-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: reranking-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/reranking-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: reranking-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 8000
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
memory: 4000Mi
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: reranking-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: reranking-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8000
|
||||||
|
targetPort: 8000
|
||||||
|
selector:
|
||||||
|
app: reranking-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: retriever-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: retriever-deploy
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/retriever-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: retriever-deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 7000
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
memory: 4000Mi
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: retriever-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7000
|
||||||
|
targetPort: 7000
|
||||||
|
selector:
|
||||||
|
app: retriever-deploy
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: vector-db
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: redis/redis-stack:7.2.0-v9
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: vector-db
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
- containerPort: 8001
|
||||||
|
hostIPC: true
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
serviceAccountName: default
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: vector-db-service
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
- name: vector-db-insight
|
||||||
|
port: 8001
|
||||||
|
targetPort: 8001
|
||||||
|
selector:
|
||||||
|
app: vector-db
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
@@ -0,0 +1,614 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: qna-config
|
||||||
|
namespace: default
|
||||||
|
data:
|
||||||
|
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||||
|
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||||
|
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||||
|
INDEX_NAME: rag-redis
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||||
|
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||||
|
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||||
|
NODE_SELECTOR: chatqna-opea
|
||||||
|
LLM_SERVICE_HOST_IP: llm-svc
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 4
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/chatqna-without-rerank:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 8888
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 8
|
||||||
|
memory: 4000Mi
|
||||||
|
requests:
|
||||||
|
cpu: 8
|
||||||
|
memory: 4000Mi
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: NodePort
|
||||||
|
selector:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
nodePort: 30888
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: dataprep-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: REDIS_URL
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: REDIS_URL
|
||||||
|
- name: TEI_ENDPOINT
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: TEI_EMBEDDING_ENDPOINT
|
||||||
|
- name: INDEX_NAME
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: INDEX_NAME
|
||||||
|
image: opea/dataprep-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: dataprep-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 6007
|
||||||
|
- containerPort: 6008
|
||||||
|
- containerPort: 6009
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: dataprep-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: dataprep-deploy
|
||||||
|
ports:
|
||||||
|
- name: port1
|
||||||
|
port: 6007
|
||||||
|
targetPort: 6007
|
||||||
|
- name: port2
|
||||||
|
port: 6008
|
||||||
|
targetPort: 6008
|
||||||
|
- name: port3
|
||||||
|
port: 6009
|
||||||
|
targetPort: 6009
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 4
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
args:
|
||||||
|
- --model-id
|
||||||
|
- $(EMBEDDING_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 76
|
||||||
|
memory: 20000Mi
|
||||||
|
requests:
|
||||||
|
cpu: 76
|
||||||
|
memory: 20000Mi
|
||||||
|
serviceAccountName: default
|
||||||
|
volumes:
|
||||||
|
- name: model-volume
|
||||||
|
hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
- name: shm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6006
|
||||||
|
targetPort: 80
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 4
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/embedding-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 6000
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 4
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: embedding-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: embedding-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6000
|
||||||
|
targetPort: 6000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 32
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||||
|
name: llm-dependency-deploy-demo
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- SYS_NICE
|
||||||
|
args:
|
||||||
|
- --model-id
|
||||||
|
- $(LLM_MODEL_ID)
|
||||||
|
- --max-input-length
|
||||||
|
- '1024'
|
||||||
|
- --max-total-tokens
|
||||||
|
- '2048'
|
||||||
|
- --max-batch-total-tokens
|
||||||
|
- '65536'
|
||||||
|
- --max-batch-prefill-tokens
|
||||||
|
- '4096'
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
serviceAccountName: default
|
||||||
|
volumes:
|
||||||
|
- name: model-volume
|
||||||
|
hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
- name: shm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9009
|
||||||
|
targetPort: 80
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 4
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/llm-tgi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 4
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: llm-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: llm-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: retriever-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 4
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: retriever-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: REDIS_URL
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: REDIS_URL
|
||||||
|
- name: TEI_EMBEDDING_ENDPOINT
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: TEI_EMBEDDING_ENDPOINT
|
||||||
|
- name: HUGGINGFACEHUB_API_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: HUGGINGFACEHUB_API_TOKEN
|
||||||
|
- name: INDEX_NAME
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: INDEX_NAME
|
||||||
|
image: opea/retriever-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: retriever-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 7000
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 8
|
||||||
|
memory: 2500Mi
|
||||||
|
requests:
|
||||||
|
cpu: 8
|
||||||
|
memory: 2500Mi
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: retriever-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: retriever-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7000
|
||||||
|
targetPort: 7000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: vector-db
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
containers:
|
||||||
|
- name: vector-db
|
||||||
|
image: redis/redis-stack:7.2.0-v9
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
- containerPort: 8001
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: vector-db
|
||||||
|
ports:
|
||||||
|
- name: vector-db-service
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
- name: vector-db-insight
|
||||||
|
port: 8001
|
||||||
|
targetPort: 8001
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
@@ -0,0 +1,614 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: qna-config
|
||||||
|
namespace: default
|
||||||
|
data:
|
||||||
|
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||||
|
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||||
|
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||||
|
INDEX_NAME: rag-redis
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||||
|
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||||
|
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||||
|
NODE_SELECTOR: chatqna-opea
|
||||||
|
LLM_SERVICE_HOST_IP: llm-svc
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/chatqna-without-rerank:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 8888
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 8
|
||||||
|
memory: 4000Mi
|
||||||
|
requests:
|
||||||
|
cpu: 8
|
||||||
|
memory: 4000Mi
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: NodePort
|
||||||
|
selector:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
nodePort: 30888
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: dataprep-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: REDIS_URL
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: REDIS_URL
|
||||||
|
- name: TEI_ENDPOINT
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: TEI_EMBEDDING_ENDPOINT
|
||||||
|
- name: INDEX_NAME
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: INDEX_NAME
|
||||||
|
image: opea/dataprep-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: dataprep-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 6007
|
||||||
|
- containerPort: 6008
|
||||||
|
- containerPort: 6009
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: dataprep-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: dataprep-deploy
|
||||||
|
ports:
|
||||||
|
- name: port1
|
||||||
|
port: 6007
|
||||||
|
targetPort: 6007
|
||||||
|
- name: port2
|
||||||
|
port: 6008
|
||||||
|
targetPort: 6008
|
||||||
|
- name: port3
|
||||||
|
port: 6009
|
||||||
|
targetPort: 6009
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
args:
|
||||||
|
- --model-id
|
||||||
|
- $(EMBEDDING_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 76
|
||||||
|
memory: 20000Mi
|
||||||
|
requests:
|
||||||
|
cpu: 76
|
||||||
|
memory: 20000Mi
|
||||||
|
serviceAccountName: default
|
||||||
|
volumes:
|
||||||
|
- name: model-volume
|
||||||
|
hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
- name: shm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6006
|
||||||
|
targetPort: 80
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/embedding-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 6000
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 4
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: embedding-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: embedding-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6000
|
||||||
|
targetPort: 6000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 8
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||||
|
name: llm-dependency-deploy-demo
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- SYS_NICE
|
||||||
|
args:
|
||||||
|
- --model-id
|
||||||
|
- $(LLM_MODEL_ID)
|
||||||
|
- --max-input-length
|
||||||
|
- '1024'
|
||||||
|
- --max-total-tokens
|
||||||
|
- '2048'
|
||||||
|
- --max-batch-total-tokens
|
||||||
|
- '65536'
|
||||||
|
- --max-batch-prefill-tokens
|
||||||
|
- '4096'
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
serviceAccountName: default
|
||||||
|
volumes:
|
||||||
|
- name: model-volume
|
||||||
|
hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
- name: shm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9009
|
||||||
|
targetPort: 80
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/llm-tgi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 4
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: llm-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: llm-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: retriever-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: retriever-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: REDIS_URL
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: REDIS_URL
|
||||||
|
- name: TEI_EMBEDDING_ENDPOINT
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: TEI_EMBEDDING_ENDPOINT
|
||||||
|
- name: HUGGINGFACEHUB_API_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: HUGGINGFACEHUB_API_TOKEN
|
||||||
|
- name: INDEX_NAME
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: INDEX_NAME
|
||||||
|
image: opea/retriever-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: retriever-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 7000
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 8
|
||||||
|
memory: 2500Mi
|
||||||
|
requests:
|
||||||
|
cpu: 8
|
||||||
|
memory: 2500Mi
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: retriever-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: retriever-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7000
|
||||||
|
targetPort: 7000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: vector-db
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
containers:
|
||||||
|
- name: vector-db
|
||||||
|
image: redis/redis-stack:7.2.0-v9
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
- containerPort: 8001
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: vector-db
|
||||||
|
ports:
|
||||||
|
- name: vector-db-service
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
- name: vector-db-insight
|
||||||
|
port: 8001
|
||||||
|
targetPort: 8001
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
@@ -0,0 +1,614 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: qna-config
|
||||||
|
namespace: default
|
||||||
|
data:
|
||||||
|
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||||
|
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||||
|
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||||
|
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||||
|
INDEX_NAME: rag-redis
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||||
|
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||||
|
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||||
|
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||||
|
NODE_SELECTOR: chatqna-opea
|
||||||
|
LLM_SERVICE_HOST_IP: llm-svc
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/chatqna-without-rerank:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: chatqna-backend-server-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 8888
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 8
|
||||||
|
memory: 4000Mi
|
||||||
|
requests:
|
||||||
|
cpu: 8
|
||||||
|
memory: 4000Mi
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: chatqna-backend-server-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: NodePort
|
||||||
|
selector:
|
||||||
|
app: chatqna-backend-server-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
nodePort: 30888
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: dataprep-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: dataprep-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: REDIS_URL
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: REDIS_URL
|
||||||
|
- name: TEI_ENDPOINT
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: TEI_EMBEDDING_ENDPOINT
|
||||||
|
- name: INDEX_NAME
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: INDEX_NAME
|
||||||
|
image: opea/dataprep-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: dataprep-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 6007
|
||||||
|
- containerPort: 6008
|
||||||
|
- containerPort: 6009
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: dataprep-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: dataprep-deploy
|
||||||
|
ports:
|
||||||
|
- name: port1
|
||||||
|
port: 6007
|
||||||
|
targetPort: 6007
|
||||||
|
- name: port2
|
||||||
|
port: 6008
|
||||||
|
targetPort: 6008
|
||||||
|
- name: port3
|
||||||
|
port: 6009
|
||||||
|
targetPort: 6009
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
|
name: embedding-dependency-deploy
|
||||||
|
args:
|
||||||
|
- --model-id
|
||||||
|
- $(EMBEDDING_MODEL_ID)
|
||||||
|
- --auto-truncate
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 76
|
||||||
|
memory: 20000Mi
|
||||||
|
requests:
|
||||||
|
cpu: 76
|
||||||
|
memory: 20000Mi
|
||||||
|
serviceAccountName: default
|
||||||
|
volumes:
|
||||||
|
- name: model-volume
|
||||||
|
hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
- name: shm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: embedding-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: embedding-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6006
|
||||||
|
targetPort: 80
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: embedding-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: embedding-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: embedding-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/embedding-tei:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: embedding-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 6000
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 4
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: embedding-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: embedding-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 6000
|
||||||
|
targetPort: 6000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 16
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||||
|
name: llm-dependency-deploy-demo
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- SYS_NICE
|
||||||
|
args:
|
||||||
|
- --model-id
|
||||||
|
- $(LLM_MODEL_ID)
|
||||||
|
- --max-input-length
|
||||||
|
- '1024'
|
||||||
|
- --max-total-tokens
|
||||||
|
- '2048'
|
||||||
|
- --max-batch-total-tokens
|
||||||
|
- '65536'
|
||||||
|
- --max-batch-prefill-tokens
|
||||||
|
- '4096'
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: model-volume
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: shm
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
env:
|
||||||
|
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||||
|
value: none
|
||||||
|
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||||
|
value: 'true'
|
||||||
|
- name: runtime
|
||||||
|
value: habana
|
||||||
|
- name: HABANA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: HF_TOKEN
|
||||||
|
value: ${HF_TOKEN}
|
||||||
|
serviceAccountName: default
|
||||||
|
volumes:
|
||||||
|
- name: model-volume
|
||||||
|
hostPath:
|
||||||
|
path: /mnt/models
|
||||||
|
type: Directory
|
||||||
|
- name: shm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 1Gi
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: llm-dependency-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: llm-dependency-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9009
|
||||||
|
targetPort: 80
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: llm-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: qna-config
|
||||||
|
image: opea/llm-tgi:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: llm-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 4
|
||||||
|
requests:
|
||||||
|
cpu: 4
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: llm-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: llm-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: retriever-deploy
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||||
|
labels:
|
||||||
|
app: retriever-deploy
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: retriever-deploy
|
||||||
|
hostIPC: true
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: REDIS_URL
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: REDIS_URL
|
||||||
|
- name: TEI_EMBEDDING_ENDPOINT
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: TEI_EMBEDDING_ENDPOINT
|
||||||
|
- name: HUGGINGFACEHUB_API_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: HUGGINGFACEHUB_API_TOKEN
|
||||||
|
- name: INDEX_NAME
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: qna-config
|
||||||
|
key: INDEX_NAME
|
||||||
|
image: opea/retriever-redis:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: retriever-deploy
|
||||||
|
args: null
|
||||||
|
ports:
|
||||||
|
- containerPort: 7000
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 8
|
||||||
|
memory: 2500Mi
|
||||||
|
requests:
|
||||||
|
cpu: 8
|
||||||
|
memory: 2500Mi
|
||||||
|
serviceAccountName: default
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: retriever-svc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: retriever-deploy
|
||||||
|
ports:
|
||||||
|
- name: service
|
||||||
|
port: 7000
|
||||||
|
targetPort: 7000
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: vector-db
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-type: chatqna-opea
|
||||||
|
topologySpreadConstraints:
|
||||||
|
- maxSkew: 1
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
whenUnsatisfiable: ScheduleAnyway
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: vector-db
|
||||||
|
containers:
|
||||||
|
- name: vector-db
|
||||||
|
image: redis/redis-stack:7.2.0-v9
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
- containerPort: 8001
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: vector-db
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: vector-db
|
||||||
|
ports:
|
||||||
|
- name: vector-db-service
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
- name: vector-db-insight
|
||||||
|
port: 8001
|
||||||
|
targetPort: 8001
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
@@ -3,8 +3,7 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from comps import MicroService, ServiceOrchestrator, ServiceType
|
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
|
||||||
from gateway import ChatQnAGateway
|
|
||||||
|
|
||||||
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
|
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
|
||||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ opea_micro_services:
|
|||||||
tei-embedding-service:
|
tei-embedding-service:
|
||||||
host: ${TEI_EMBEDDING_SERVICE_IP}
|
host: ${TEI_EMBEDDING_SERVICE_IP}
|
||||||
ports: ${TEI_EMBEDDING_SERVICE_PORT}
|
ports: ${TEI_EMBEDDING_SERVICE_PORT}
|
||||||
image: opea/tei-gaudi:latest
|
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||||
volumes:
|
volumes:
|
||||||
- "./data:/data"
|
- "./data:/data"
|
||||||
runtime: habana
|
runtime: habana
|
||||||
@@ -48,7 +48,7 @@ opea_micro_services:
|
|||||||
tgi-service:
|
tgi-service:
|
||||||
host: ${TGI_SERVICE_IP}
|
host: ${TGI_SERVICE_IP}
|
||||||
ports: ${TGI_SERVICE_PORT}
|
ports: ${TGI_SERVICE_PORT}
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
volumes:
|
volumes:
|
||||||
- "./data:/data"
|
- "./data:/data"
|
||||||
runtime: habana
|
runtime: habana
|
||||||
@@ -56,10 +56,13 @@ opea_micro_services:
|
|||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
ipc: host
|
ipc: host
|
||||||
environment:
|
environment:
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
HABANA_VISIBLE_DEVICES: all
|
HABANA_VISIBLE_DEVICES: all
|
||||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
ENABLE_HPU_GRAPH: true
|
||||||
|
LIMIT_HPU_GRAPH: true
|
||||||
|
USE_FLASH_ATTENTION: true
|
||||||
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
model-id: ${LLM_MODEL_ID}
|
model-id: ${LLM_MODEL_ID}
|
||||||
llm:
|
llm:
|
||||||
host: ${LLM_SERVICE_HOST_IP}
|
host: ${LLM_SERVICE_HOST_IP}
|
||||||
|
|||||||
@@ -69,10 +69,12 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
|
|||||||
next_inputs = {}
|
next_inputs = {}
|
||||||
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
|
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
|
||||||
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
|
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
|
||||||
next_inputs["max_tokens"] = llm_parameters_dict["max_new_tokens"]
|
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
|
||||||
next_inputs["top_p"] = llm_parameters_dict["top_p"]
|
next_inputs["top_p"] = llm_parameters_dict["top_p"]
|
||||||
next_inputs["stream"] = inputs["streaming"]
|
next_inputs["stream"] = inputs["streaming"]
|
||||||
next_inputs["frequency_penalty"] = inputs["repetition_penalty"]
|
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
|
||||||
|
next_inputs["presence_penalty"] = inputs["presence_penalty"]
|
||||||
|
next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
|
||||||
next_inputs["temperature"] = inputs["temperature"]
|
next_inputs["temperature"] = inputs["temperature"]
|
||||||
inputs = next_inputs
|
inputs = next_inputs
|
||||||
|
|
||||||
|
|||||||
35
ChatQnA/docker_compose/install_docker.sh
Normal file
35
ChatQnA/docker_compose/install_docker.sh
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
# Update the package index
|
||||||
|
sudo apt-get -y update
|
||||||
|
|
||||||
|
# Install prerequisites
|
||||||
|
sudo apt-get -y install ca-certificates curl
|
||||||
|
|
||||||
|
# Create the directory for the Docker GPG key
|
||||||
|
sudo install -m 0755 -d /etc/apt/keyrings
|
||||||
|
|
||||||
|
# Add Docker's official GPG key
|
||||||
|
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
|
||||||
|
|
||||||
|
# Set permissions for the GPG key
|
||||||
|
sudo chmod a+r /etc/apt/keyrings/docker.asc
|
||||||
|
|
||||||
|
# Add Docker repository to the sources list
|
||||||
|
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
|
||||||
|
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||||
|
|
||||||
|
# Update the package index with Docker packages
|
||||||
|
sudo apt-get -y update
|
||||||
|
|
||||||
|
# Install Docker packages
|
||||||
|
sudo apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
|
||||||
|
|
||||||
|
# add existing user
|
||||||
|
sudo usermod -aG docker $USER
|
||||||
|
|
||||||
|
# Optional: Verify that Docker is installed correctly
|
||||||
|
sudo docker --version
|
||||||
@@ -11,36 +11,84 @@ git clone https://github.com/opea-project/GenAIComps.git
|
|||||||
cd GenAIComps
|
cd GenAIComps
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If you are in a proxy environment, set the proxy-related environment variables:
|
||||||
|
|
||||||
|
export http_proxy="Your_HTTP_Proxy"
|
||||||
|
export https_proxy="Your_HTTPs_Proxy"
|
||||||
|
|
||||||
### 1. Build Embedding Image
|
### 1. Build Embedding Image
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build --no-cache -t opea/embedding-tei:latest -f comps/embeddings/tei/langchain/Dockerfile .
|
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
|
||||||
```
|
```
|
||||||
|
|
||||||
### 2. Build Retriever Image
|
### 2. Build Retriever Image
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build --no-cache -t opea/retriever-redis:latest -f comps/retrievers/redis/langchain/Dockerfile .
|
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
|
||||||
```
|
```
|
||||||
|
|
||||||
### 3. Build Rerank Image
|
### 3. Build Rerank Image
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build --no-cache -t opea/reranking-tei:latest -f comps/reranks/tei/Dockerfile .
|
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
|
||||||
```
|
```
|
||||||
|
|
||||||
### 4. Build LLM Image
|
### 4. Set up Ollama Service and Build LLM Image
|
||||||
|
|
||||||
We use [Ollama](https://ollama.com/) as our LLM service for AIPC. Please pre-download Ollama on your PC.
|
We use [Ollama](https://ollama.com/) as our LLM service for AIPC.
|
||||||
|
|
||||||
|
Please set up Ollama on your PC follow the instructions. This will set the entrypoint needed for the Ollama to suit the ChatQnA examples.
|
||||||
|
|
||||||
|
#### 4.1 Set Up Ollama LLM Service
|
||||||
|
|
||||||
|
Install Ollama service with one command
|
||||||
|
|
||||||
|
curl -fsSL https://ollama.com/install.sh | sh
|
||||||
|
|
||||||
|
##### Set Ollama Service Configuration
|
||||||
|
|
||||||
|
Ollama Service Configuration file is /etc/systemd/system/ollama.service. Edit the file to set OLLAMA_HOST environment (Replace **${host_ip}** with your host IPV4).
|
||||||
|
|
||||||
|
```
|
||||||
|
Environment="OLLAMA_HOST=${host_ip}:11434"
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Set https_proxy environment for Ollama
|
||||||
|
|
||||||
|
if your system access network through proxy, add https_proxy in Ollama Service Configuration file
|
||||||
|
|
||||||
|
```
|
||||||
|
Environment="https_proxy="Your_HTTPS_Proxy"
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Restart Ollam services
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo systemctl daemon-reload
|
||||||
|
$ sudo systemctl restart ollama.service
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Pull LLM model
|
||||||
|
|
||||||
|
```
|
||||||
|
#export OLLAMA_HOST=http://${host_ip}:11434
|
||||||
|
#ollama pull llam3
|
||||||
|
#ollama lists
|
||||||
|
NAME ID SIZE MODIFIED
|
||||||
|
llama3:latest 365c0bd3c000 4.7 GB 5 days ago
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 4.2 Build LLM Image
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build --no-cache -t opea/llm-ollama:latest -f comps/llms/text-generation/ollama/langchain/Dockerfile .
|
docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/langchain/Dockerfile .
|
||||||
```
|
```
|
||||||
|
|
||||||
### 5. Build Dataprep Image
|
### 5. Build Dataprep Image
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build --no-cache -t opea/dataprep-redis:latest -f comps/dataprep/redis/langchain/Dockerfile .
|
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
|
||||||
cd ..
|
cd ..
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -61,7 +109,7 @@ Build frontend Docker image via below command:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd GenAIExamples/ChatQnA/ui
|
cd GenAIExamples/ChatQnA/ui
|
||||||
docker build --no-cache -t opea/chatqna-ui:latest -f ./docker/Dockerfile .
|
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
|
||||||
cd ../../../..
|
cd ../../../..
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -171,6 +219,9 @@ OLLAMA_HOST=${host_ip}:11434 ollama run $OLLAMA_MODEL
|
|||||||
|
|
||||||
### Validate Microservices
|
### Validate Microservices
|
||||||
|
|
||||||
|
Follow the instructions to validate MicroServices.
|
||||||
|
For details on how to verify the correctness of the response, refer to [how-to-validate_service](../../hpu/gaudi/how_to_validate_service.md).
|
||||||
|
|
||||||
1. TEI Embedding Service
|
1. TEI Embedding Service
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -229,7 +280,7 @@ OLLAMA_HOST=${host_ip}:11434 ollama run $OLLAMA_MODEL
|
|||||||
```bash
|
```bash
|
||||||
curl http://${host_ip}:9000/v1/chat/completions\
|
curl http://${host_ip}:9000/v1/chat/completions\
|
||||||
-X POST \
|
-X POST \
|
||||||
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||||
-H 'Content-Type: application/json'
|
-H 'Content-Type: application/json'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,69 @@
|
|||||||
|
|
||||||
This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service.
|
This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service.
|
||||||
|
|
||||||
|
Quick Start:
|
||||||
|
|
||||||
|
1. Set up the environment variables.
|
||||||
|
2. Run Docker Compose.
|
||||||
|
3. Consume the ChatQnA Service.
|
||||||
|
|
||||||
|
## Quick Start: 1.Setup Environment Variable
|
||||||
|
|
||||||
|
To set up environment variables for deploying ChatQnA services, follow these steps:
|
||||||
|
|
||||||
|
1. Set the required environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Example: host_ip="192.168.1.1"
|
||||||
|
export host_ip="External_Public_IP"
|
||||||
|
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||||
|
export no_proxy="Your_No_Proxy"
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||||
|
```
|
||||||
|
|
||||||
|
2. If you are in a proxy environment, also set the proxy-related environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export http_proxy="Your_HTTP_Proxy"
|
||||||
|
export https_proxy="Your_HTTPs_Proxy"
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Set up other environment variables:
|
||||||
|
```bash
|
||||||
|
source ./set_env.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quick Start: 2.Run Docker Compose
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
It will automatically download the docker image on `docker hub`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker pull opea/chatqna:latest
|
||||||
|
docker pull opea/chatqna-ui:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
In following cases, you could build docker image from source by yourself.
|
||||||
|
|
||||||
|
- Failed to download the docker image.
|
||||||
|
|
||||||
|
- If you want to use a specific version of Docker image.
|
||||||
|
|
||||||
|
Please refer to 'Build Docker Images' in below.
|
||||||
|
|
||||||
|
## QuickStart: 3.Consume the ChatQnA Service
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://${host_ip}:8888/v1/chatqna \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"messages": "What is the revenue of Nike in 2023?"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
## 🚀 Apply Xeon Server on AWS
|
## 🚀 Apply Xeon Server on AWS
|
||||||
|
|
||||||
To apply a Xeon server on AWS, start by creating an AWS account if you don't have one already. Then, head to the [EC2 Console](https://console.aws.amazon.com/ec2/v2/home) to begin the process. Within the EC2 service, select the Amazon EC2 M7i or M7i-flex instance type to leverage 4th Generation Intel Xeon Scalable processors that are optimized for demanding workloads.
|
To apply a Xeon server on AWS, start by creating an AWS account if you don't have one already. Then, head to the [EC2 Console](https://console.aws.amazon.com/ec2/v2/home) to begin the process. Within the EC2 service, select the Amazon EC2 M7i or M7i-flex instance type to leverage 4th Generation Intel Xeon Scalable processors that are optimized for demanding workloads.
|
||||||
@@ -10,52 +73,25 @@ For detailed information about these instance types, you can refer to this [link
|
|||||||
|
|
||||||
After launching your instance, you can connect to it using SSH (for Linux instances) or Remote Desktop Protocol (RDP) (for Windows instances). From there, you'll have full access to your Xeon server, allowing you to install, configure, and manage your applications as needed.
|
After launching your instance, you can connect to it using SSH (for Linux instances) or Remote Desktop Protocol (RDP) (for Windows instances). From there, you'll have full access to your Xeon server, allowing you to install, configure, and manage your applications as needed.
|
||||||
|
|
||||||
**Certain ports in the EC2 instance need to opened up in the security group, for the microservices to work with the curl commands**
|
### Network Port & Security
|
||||||
|
|
||||||
> See one example below. Please open up these ports in the EC2 instance based on the IP addresses you want to allow
|
- Access the ChatQnA UI by web browser
|
||||||
|
|
||||||
```
|
It supports to access by `80` port. Please confirm the `80` port is opened in the firewall of EC2 instance.
|
||||||
redis-vector-db
|
|
||||||
===============
|
|
||||||
Port 6379 - Open to 0.0.0.0/0
|
|
||||||
Port 8001 - Open to 0.0.0.0/0
|
|
||||||
|
|
||||||
tei_embedding_service
|
- Access the microservice by tool or API
|
||||||
=====================
|
|
||||||
Port 6006 - Open to 0.0.0.0/0
|
|
||||||
|
|
||||||
embedding
|
1. Login to the EC2 instance and access by **local IP address** and port.
|
||||||
=========
|
|
||||||
Port 6000 - Open to 0.0.0.0/0
|
|
||||||
|
|
||||||
retriever
|
It's recommended and do nothing of the network port setting.
|
||||||
=========
|
|
||||||
Port 7000 - Open to 0.0.0.0/0
|
|
||||||
|
|
||||||
tei_xeon_service
|
2. Login to a remote client and access by **public IP address** and port.
|
||||||
================
|
|
||||||
Port 8808 - Open to 0.0.0.0/0
|
|
||||||
|
|
||||||
reranking
|
You need to open the port of the microservice in the security group setting of firewall of EC2 instance setting.
|
||||||
=========
|
|
||||||
Port 8000 - Open to 0.0.0.0/0
|
|
||||||
|
|
||||||
tgi-service or vLLM_service
|
For detailed guide, please refer to [Validate Microservices](#validate-microservices).
|
||||||
===========
|
|
||||||
Port 9009 - Open to 0.0.0.0/0
|
|
||||||
|
|
||||||
llm
|
Note, it will increase the risk of security, so please confirm before do it.
|
||||||
===
|
|
||||||
Port 9000 - Open to 0.0.0.0/0
|
|
||||||
|
|
||||||
chaqna-xeon-backend-server
|
|
||||||
==========================
|
|
||||||
Port 8888 - Open to 0.0.0.0/0
|
|
||||||
|
|
||||||
chaqna-xeon-ui-server
|
|
||||||
=====================
|
|
||||||
Port 5173 - Open to 0.0.0.0/0
|
|
||||||
```
|
|
||||||
|
|
||||||
## 🚀 Build Docker Images
|
## 🚀 Build Docker Images
|
||||||
|
|
||||||
@@ -157,7 +193,14 @@ cd GenAIExamples/ChatQnA/ui
|
|||||||
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
|
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
|
||||||
```
|
```
|
||||||
|
|
||||||
Then run the command `docker images`, you will have the following 7 Docker Images:
|
### 9. Build Nginx Docker Image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd GenAIComps
|
||||||
|
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
|
||||||
|
```
|
||||||
|
|
||||||
|
Then run the command `docker images`, you will have the following 8 Docker Images:
|
||||||
|
|
||||||
1. `opea/dataprep-redis:latest`
|
1. `opea/dataprep-redis:latest`
|
||||||
2. `opea/embedding-tei:latest`
|
2. `opea/embedding-tei:latest`
|
||||||
@@ -166,6 +209,7 @@ Then run the command `docker images`, you will have the following 7 Docker Image
|
|||||||
5. `opea/llm-tgi:latest` or `opea/llm-vllm:latest`
|
5. `opea/llm-tgi:latest` or `opea/llm-vllm:latest`
|
||||||
6. `opea/chatqna:latest` or `opea/chatqna-without-rerank:latest`
|
6. `opea/chatqna:latest` or `opea/chatqna-without-rerank:latest`
|
||||||
7. `opea/chatqna-ui:latest`
|
7. `opea/chatqna-ui:latest`
|
||||||
|
8. `opea/nginx:latest`
|
||||||
|
|
||||||
## 🚀 Start Microservices
|
## 🚀 Start Microservices
|
||||||
|
|
||||||
@@ -189,7 +233,7 @@ For users in China who are unable to download models directly from Huggingface,
|
|||||||
export HF_TOKEN=${your_hf_token}
|
export HF_TOKEN=${your_hf_token}
|
||||||
export HF_ENDPOINT="https://hf-mirror.com"
|
export HF_ENDPOINT="https://hf-mirror.com"
|
||||||
model_name="Intel/neural-chat-7b-v3-3"
|
model_name="Intel/neural-chat-7b-v3-3"
|
||||||
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.2.0 --model-id $model_name
|
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id $model_name
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Offline
|
2. Offline
|
||||||
@@ -203,62 +247,35 @@ For users in China who are unable to download models directly from Huggingface,
|
|||||||
```bash
|
```bash
|
||||||
export HF_TOKEN=${your_hf_token}
|
export HF_TOKEN=${your_hf_token}
|
||||||
export model_path="/path/to/model"
|
export model_path="/path/to/model"
|
||||||
docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.2.0 --model-id /data
|
docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id /data
|
||||||
```
|
```
|
||||||
|
|
||||||
### Setup Environment Variables
|
### Setup Environment Variables
|
||||||
|
|
||||||
Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
|
1. Set the required environment variables:
|
||||||
|
|
||||||
**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
|
```bash
|
||||||
|
# Example: host_ip="192.168.1.1"
|
||||||
|
export host_ip="External_Public_IP"
|
||||||
|
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||||
|
export no_proxy="Your_No_Proxy"
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||||
|
# Example: NGINX_PORT=80
|
||||||
|
export NGINX_PORT=${your_nginx_port}
|
||||||
|
```
|
||||||
|
|
||||||
> Change the External_Public_IP below with the actual IPV4 value
|
2. If you are in a proxy environment, also set the proxy-related environment variables:
|
||||||
|
|
||||||
```
|
```bash
|
||||||
export host_ip="External_Public_IP"
|
export http_proxy="Your_HTTP_Proxy"
|
||||||
```
|
export https_proxy="Your_HTTPs_Proxy"
|
||||||
|
```
|
||||||
|
|
||||||
**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable**
|
3. Set up other environment variables:
|
||||||
|
|
||||||
> Change the Your_Huggingface_API_Token below with tyour actual Huggingface API Token value
|
```bash
|
||||||
|
source ./set_env.sh
|
||||||
```
|
```
|
||||||
export your_hf_api_token="Your_Huggingface_API_Token"
|
|
||||||
```
|
|
||||||
|
|
||||||
**Append the value of the public IP address to the no_proxy list**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export your_no_proxy=${your_no_proxy},"External_Public_IP"
|
|
||||||
```
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export no_proxy=${your_no_proxy}
|
|
||||||
export http_proxy=${your_http_proxy}
|
|
||||||
export https_proxy=${your_http_proxy}
|
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
|
||||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
||||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
|
||||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
|
|
||||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
|
||||||
export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
|
|
||||||
export vLLM_LLM_ENDPOINT="http://${host_ip}:9009"
|
|
||||||
export LLM_SERVICE_PORT=9000
|
|
||||||
export REDIS_URL="redis://${host_ip}:6379"
|
|
||||||
export INDEX_NAME="rag-redis"
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
|
|
||||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
|
||||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
|
|
||||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
|
|
||||||
```
|
|
||||||
|
|
||||||
Note: Please replace with `host_ip` with you external IP address, do not use localhost.
|
|
||||||
|
|
||||||
### Start all the services Docker Containers
|
### Start all the services Docker Containers
|
||||||
|
|
||||||
@@ -285,6 +302,10 @@ docker compose -f compose_vllm.yaml up -d
|
|||||||
|
|
||||||
### Validate Microservices
|
### Validate Microservices
|
||||||
|
|
||||||
|
Note, when verify the microservices by curl or API from remote client, please make sure the **ports** of the microservices are opened in the firewall of the cloud node.
|
||||||
|
Follow the instructions to validate MicroServices.
|
||||||
|
For details on how to verify the correctness of the response, refer to [how-to-validate_service](../../hpu/gaudi/how_to_validate_service.md).
|
||||||
|
|
||||||
1. TEI Embedding Service
|
1. TEI Embedding Service
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -379,102 +400,125 @@ docker compose -f compose_vllm.yaml up -d
|
|||||||
This service depends on above LLM backend service startup. It will be ready after long time, to wait for them being ready in first startup.
|
This service depends on above LLM backend service startup. It will be ready after long time, to wait for them being ready in first startup.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# TGI service
|
||||||
curl http://${host_ip}:9000/v1/chat/completions\
|
curl http://${host_ip}:9000/v1/chat/completions\
|
||||||
-X POST \
|
-X POST \
|
||||||
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||||
-H 'Content-Type: application/json'
|
-H 'Content-Type: application/json'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
For parameters in TGI modes, please refer to [HuggingFace InferenceClient API](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation) (except we rename "max_new_tokens" to "max_tokens".)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# vLLM Service
|
||||||
|
curl http://${host_ip}:9000/v1/chat/completions \
|
||||||
|
-X POST \
|
||||||
|
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":1,"temperature":0.7,"frequency_penalty":0,"presence_penalty":0, "streaming":false}' \
|
||||||
|
-H 'Content-Type: application/json'
|
||||||
|
```
|
||||||
|
|
||||||
|
For parameters in vLLM modes, can refer to [LangChain VLLMOpenAI API](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.vllm.VLLMOpenAI.html)
|
||||||
|
|
||||||
8. MegaService
|
8. MegaService
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
|
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
|
||||||
"messages": "What is the revenue of Nike in 2023?"
|
"messages": "What is the revenue of Nike in 2023?"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
9. Dataprep Microservice(Optional)
|
9. Nginx Service
|
||||||
|
|
||||||
If you want to update the default knowledge base, you can use the following commands:
|
|
||||||
|
|
||||||
Update Knowledge Base via Local File [nke-10k-2023.pdf](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/redis/data/nke-10k-2023.pdf). Or
|
|
||||||
click [here](https://raw.githubusercontent.com/opea-project/GenAIComps/main/comps/retrievers/redis/data/nke-10k-2023.pdf) to download the file via any web browser.
|
|
||||||
Or run this command to get the file on a terminal.
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
wget https://raw.githubusercontent.com/opea-project/GenAIComps/main/comps/retrievers/redis/data/nke-10k-2023.pdf
|
curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"messages": "What is the revenue of Nike in 2023?"}'
|
||||||
```
|
```
|
||||||
|
|
||||||
Upload:
|
10. Dataprep Microservice(Optional)
|
||||||
|
|
||||||
```bash
|
If you want to update the default knowledge base, you can use the following commands:
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
|
||||||
-H "Content-Type: multipart/form-data" \
|
|
||||||
-F "files=@./nke-10k-2023.pdf"
|
|
||||||
```
|
|
||||||
|
|
||||||
This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
|
Update Knowledge Base via Local File [nke-10k-2023.pdf](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/redis/data/nke-10k-2023.pdf). Or
|
||||||
|
click [here](https://raw.githubusercontent.com/opea-project/GenAIComps/main/comps/retrievers/redis/data/nke-10k-2023.pdf) to download the file via any web browser.
|
||||||
|
Or run this command to get the file on a terminal.
|
||||||
|
|
||||||
Add Knowledge Base via HTTP Links:
|
```bash
|
||||||
|
wget https://raw.githubusercontent.com/opea-project/GenAIComps/main/comps/retrievers/redis/data/nke-10k-2023.pdf
|
||||||
|
|
||||||
```bash
|
```
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
|
||||||
-H "Content-Type: multipart/form-data" \
|
|
||||||
-F 'link_list=["https://opea.dev"]'
|
|
||||||
```
|
|
||||||
|
|
||||||
This command updates a knowledge base by submitting a list of HTTP links for processing.
|
Upload:
|
||||||
|
|
||||||
Also, you are able to get the file list that you uploaded:
|
```bash
|
||||||
|
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
||||||
|
-H "Content-Type: multipart/form-data" \
|
||||||
|
-F "files=@./nke-10k-2023.pdf"
|
||||||
|
```
|
||||||
|
|
||||||
```bash
|
This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
|
|
||||||
-H "Content-Type: application/json"
|
|
||||||
```
|
|
||||||
|
|
||||||
Then you will get the response JSON like this. Notice that the returned `name`/`id` of the uploaded link is `https://xxx.txt`.
|
Add Knowledge Base via HTTP Links:
|
||||||
|
|
||||||
```json
|
```bash
|
||||||
[
|
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
||||||
{
|
-H "Content-Type: multipart/form-data" \
|
||||||
"name": "nke-10k-2023.pdf",
|
-F 'link_list=["https://opea.dev"]'
|
||||||
"id": "nke-10k-2023.pdf",
|
```
|
||||||
"type": "File",
|
|
||||||
"parent": ""
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "https://opea.dev.txt",
|
|
||||||
"id": "https://opea.dev.txt",
|
|
||||||
"type": "File",
|
|
||||||
"parent": ""
|
|
||||||
}
|
|
||||||
]
|
|
||||||
```
|
|
||||||
|
|
||||||
To delete the file/link you uploaded:
|
This command updates a knowledge base by submitting a list of HTTP links for processing.
|
||||||
|
|
||||||
The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API.
|
Also, you are able to get the file list that you uploaded:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# delete link
|
curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
-H "Content-Type: application/json"
|
||||||
-d '{"file_path": "https://opea.dev.txt"}' \
|
```
|
||||||
-H "Content-Type: application/json"
|
|
||||||
|
|
||||||
# delete file
|
Then you will get the response JSON like this. Notice that the returned `name`/`id` of the uploaded link is `https://xxx.txt`.
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
|
||||||
-d '{"file_path": "nke-10k-2023.pdf"}' \
|
|
||||||
-H "Content-Type: application/json"
|
|
||||||
|
|
||||||
# delete all uploaded files and links
|
```json
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
[
|
||||||
-d '{"file_path": "all"}' \
|
{
|
||||||
-H "Content-Type: application/json"
|
"name": "nke-10k-2023.pdf",
|
||||||
```
|
"id": "nke-10k-2023.pdf",
|
||||||
|
"type": "File",
|
||||||
|
"parent": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "https://opea.dev.txt",
|
||||||
|
"id": "https://opea.dev.txt",
|
||||||
|
"type": "File",
|
||||||
|
"parent": ""
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
To delete the file/link you uploaded:
|
||||||
|
|
||||||
|
The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# delete link
|
||||||
|
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
||||||
|
-d '{"file_path": "https://opea.dev.txt"}' \
|
||||||
|
-H "Content-Type: application/json"
|
||||||
|
|
||||||
|
# delete file
|
||||||
|
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
||||||
|
-d '{"file_path": "nke-10k-2023.pdf"}' \
|
||||||
|
-H "Content-Type: application/json"
|
||||||
|
|
||||||
|
# delete all uploaded files and links
|
||||||
|
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
||||||
|
-d '{"file_path": "all"}' \
|
||||||
|
-H "Content-Type: application/json"
|
||||||
|
```
|
||||||
|
|
||||||
## 🚀 Launch the UI
|
## 🚀 Launch the UI
|
||||||
|
|
||||||
|
### Launch with origin port
|
||||||
|
|
||||||
To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
|
To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -485,6 +529,10 @@ To access the frontend, open the following URL in your browser: http://{host_ip}
|
|||||||
- "80:5173"
|
- "80:5173"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Launch with Nginx
|
||||||
|
|
||||||
|
If you want to launch the UI using Nginx, open this URL: `http://${host_ip}:${NGINX_PORT}` in your browser to access the frontend.
|
||||||
|
|
||||||
## 🚀 Launch the Conversational UI (Optional)
|
## 🚀 Launch the Conversational UI (Optional)
|
||||||
|
|
||||||
To access the Conversational UI (react based) frontend, modify the UI service in the `compose.yaml` file. Replace `chaqna-xeon-ui-server` service with the `chatqna-xeon-conversation-ui-server` service as per the config below:
|
To access the Conversational UI (react based) frontend, modify the UI service in the `compose.yaml` file. Replace `chaqna-xeon-ui-server` service with the `chatqna-xeon-conversation-ui-server` service as per the config below:
|
||||||
|
|||||||
@@ -222,6 +222,9 @@ docker compose -f compose_qdrant.yaml up -d
|
|||||||
|
|
||||||
### Validate Microservices
|
### Validate Microservices
|
||||||
|
|
||||||
|
Follow the instructions to validate MicroServices.
|
||||||
|
For details on how to verify the correctness of the response, refer to [how-to-validate_service](../../hpu/gaudi/how_to_validate_service.md).
|
||||||
|
|
||||||
1. TEI Embedding Service
|
1. TEI Embedding Service
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -304,7 +307,7 @@ docker compose -f compose_qdrant.yaml up -d
|
|||||||
```bash
|
```bash
|
||||||
curl http://${host_ip}:6047/v1/chat/completions\
|
curl http://${host_ip}:6047/v1/chat/completions\
|
||||||
-X POST \
|
-X POST \
|
||||||
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||||
-H 'Content-Type: application/json'
|
-H 'Content-Type: application/json'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -178,6 +178,25 @@ services:
|
|||||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||||
ipc: host
|
ipc: host
|
||||||
restart: always
|
restart: always
|
||||||
|
chaqna-xeon-nginx-server:
|
||||||
|
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||||
|
container_name: chaqna-xeon-nginx-server
|
||||||
|
depends_on:
|
||||||
|
- chaqna-xeon-backend-server
|
||||||
|
- chaqna-xeon-ui-server
|
||||||
|
ports:
|
||||||
|
- "${NGINX_PORT:-80}:80"
|
||||||
|
environment:
|
||||||
|
- no_proxy=${no_proxy}
|
||||||
|
- https_proxy=${https_proxy}
|
||||||
|
- http_proxy=${http_proxy}
|
||||||
|
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
|
||||||
|
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
|
||||||
|
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
|
||||||
|
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
|
||||||
|
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
|
||||||
|
ipc: host
|
||||||
|
restart: always
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
default:
|
default:
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ services:
|
|||||||
INDEX_NAME: ${INDEX_NAME}
|
INDEX_NAME: ${INDEX_NAME}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
tei-reranking-service:
|
tei-reranking-service:
|
||||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
container_name: tei-reranking-server
|
container_name: tei-reranking-server
|
||||||
ports:
|
ports:
|
||||||
- "6041:80"
|
- "6041:80"
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ services:
|
|||||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
tei-embedding-service:
|
tei-embedding-service:
|
||||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
container_name: tei-embedding-server
|
container_name: tei-embedding-server
|
||||||
ports:
|
ports:
|
||||||
- "6006:80"
|
- "6006:80"
|
||||||
@@ -75,7 +75,7 @@ services:
|
|||||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
tei-reranking-service:
|
tei-reranking-service:
|
||||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
container_name: tei-reranking-server
|
container_name: tei-reranking-server
|
||||||
ports:
|
ports:
|
||||||
- "8808:80"
|
- "8808:80"
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
|||||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
|
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
|
||||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||||
export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
|
export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
|
||||||
|
export vLLM_LLM_ENDPOINT="http://${host_ip}:9009"
|
||||||
export REDIS_URL="redis://${host_ip}:6379"
|
export REDIS_URL="redis://${host_ip}:6379"
|
||||||
export INDEX_NAME="rag-redis"
|
export INDEX_NAME="rag-redis"
|
||||||
export REDIS_HOST=${host_ip}
|
export REDIS_HOST=${host_ip}
|
||||||
@@ -22,3 +23,8 @@ export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
|
|||||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
||||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
|
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
|
||||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
|
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
|
||||||
|
export FRONTEND_SERVICE_IP=${host_ip}
|
||||||
|
export FRONTEND_SERVICE_PORT=5173
|
||||||
|
export BACKEND_SERVICE_NAME=chatqna
|
||||||
|
export BACKEND_SERVICE_IP=${host_ip}
|
||||||
|
export BACKEND_SERVICE_PORT=8888
|
||||||
|
|||||||
@@ -2,6 +2,70 @@
|
|||||||
|
|
||||||
This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as embedding, retriever, rerank, and llm. We will publish the Docker images to Docker Hub, it will simplify the deployment process for this service.
|
This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as embedding, retriever, rerank, and llm. We will publish the Docker images to Docker Hub, it will simplify the deployment process for this service.
|
||||||
|
|
||||||
|
Quick Start:
|
||||||
|
|
||||||
|
1. Set up the environment variables.
|
||||||
|
2. Run Docker Compose.
|
||||||
|
3. Consume the ChatQnA Service.
|
||||||
|
|
||||||
|
## Quick Start: 1.Setup Environment Variable
|
||||||
|
|
||||||
|
To set up environment variables for deploying ChatQnA services, follow these steps:
|
||||||
|
|
||||||
|
1. Set the required environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Example: host_ip="192.168.1.1"
|
||||||
|
export host_ip="External_Public_IP"
|
||||||
|
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||||
|
export no_proxy="Your_No_Proxy"
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||||
|
```
|
||||||
|
|
||||||
|
2. If you are in a proxy environment, also set the proxy-related environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export http_proxy="Your_HTTP_Proxy"
|
||||||
|
export https_proxy="Your_HTTPs_Proxy"
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Set up other environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
source ./set_env.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quick Start: 2.Run Docker Compose
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
It will automatically download the docker image on `docker hub`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker pull opea/chatqna:latest
|
||||||
|
docker pull opea/chatqna-ui:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
In following cases, you could build docker image from source by yourself.
|
||||||
|
|
||||||
|
- Failed to download the docker image.
|
||||||
|
|
||||||
|
- If you want to use a specific version of Docker image.
|
||||||
|
|
||||||
|
Please refer to 'Build Docker Images' in below.
|
||||||
|
|
||||||
|
## QuickStart: 3.Consume the ChatQnA Service
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://${host_ip}:8888/v1/chatqna \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"messages": "What is the revenue of Nike in 2023?"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
## 🚀 Build Docker Images
|
## 🚀 Build Docker Images
|
||||||
|
|
||||||
First of all, you need to build Docker Images locally. This step can be ignored after the Docker images published to Docker hub.
|
First of all, you need to build Docker Images locally. This step can be ignored after the Docker images published to Docker hub.
|
||||||
@@ -132,7 +196,14 @@ cd GenAIExamples/ChatQnA/ui
|
|||||||
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
|
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
|
||||||
```
|
```
|
||||||
|
|
||||||
Then run the command `docker images`, you will have the following 7 Docker Images:
|
### 10. Build Nginx Docker Image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd GenAIComps
|
||||||
|
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
|
||||||
|
```
|
||||||
|
|
||||||
|
Then run the command `docker images`, you will have the following 8 Docker Images:
|
||||||
|
|
||||||
- `opea/embedding-tei:latest`
|
- `opea/embedding-tei:latest`
|
||||||
- `opea/retriever-redis:latest`
|
- `opea/retriever-redis:latest`
|
||||||
@@ -141,6 +212,7 @@ Then run the command `docker images`, you will have the following 7 Docker Image
|
|||||||
- `opea/dataprep-redis:latest`
|
- `opea/dataprep-redis:latest`
|
||||||
- `opea/chatqna:latest` or `opea/chatqna-guardrails:latest` or `opea/chatqna-without-rerank:latest`
|
- `opea/chatqna:latest` or `opea/chatqna-guardrails:latest` or `opea/chatqna-without-rerank:latest`
|
||||||
- `opea/chatqna-ui:latest`
|
- `opea/chatqna-ui:latest`
|
||||||
|
- `opea/nginx:latest`
|
||||||
|
|
||||||
If Conversation React UI is built, you will find one more image:
|
If Conversation React UI is built, you will find one more image:
|
||||||
|
|
||||||
@@ -191,51 +263,30 @@ For users in China who are unable to download models directly from Huggingface,
|
|||||||
|
|
||||||
### Setup Environment Variables
|
### Setup Environment Variables
|
||||||
|
|
||||||
Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
|
1. Set the required environment variables:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export no_proxy=${your_no_proxy}
|
# Example: host_ip="192.168.1.1"
|
||||||
export http_proxy=${your_http_proxy}
|
export host_ip="External_Public_IP"
|
||||||
export https_proxy=${your_http_proxy}
|
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
export no_proxy="Your_No_Proxy"
|
||||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
# Example: NGINX_PORT=80
|
||||||
export LLM_MODEL_ID_NAME="neural-chat-7b-v3-3"
|
export NGINX_PORT=${your_nginx_port}
|
||||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
|
```
|
||||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
|
||||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8005"
|
|
||||||
export vLLM_LLM_ENDPOINT="http://${host_ip}:8007"
|
|
||||||
export vLLM_RAY_LLM_ENDPOINT="http://${host_ip}:8006"
|
|
||||||
export LLM_SERVICE_PORT=9000
|
|
||||||
export REDIS_URL="redis://${host_ip}:6379"
|
|
||||||
export INDEX_NAME="rag-redis"
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
|
|
||||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
|
||||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
|
|
||||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
|
|
||||||
|
|
||||||
export llm_service_devices=all
|
2. If you are in a proxy environment, also set the proxy-related environment variables:
|
||||||
export tei_embedding_devices=all
|
|
||||||
```
|
|
||||||
|
|
||||||
To specify the device ids, "llm_service_devices" and "tei_embedding_devices"` can be set as "0,1,2,3" alike. More info in [gaudi docs](https://docs.habana.ai/en/latest/Orchestration/Multiple_Tenants_on_HPU/Multiple_Dockers_each_with_Single_Workload.html).
|
```bash
|
||||||
|
export http_proxy="Your_HTTP_Proxy"
|
||||||
|
export https_proxy="Your_HTTPs_Proxy"
|
||||||
|
```
|
||||||
|
|
||||||
If guardrails microservice is enabled in the pipeline, the below environment variables are necessary to be set.
|
3. Set up other environment variables:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export GURADRAILS_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
|
source ./set_env.sh
|
||||||
export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
|
```
|
||||||
export SAFETY_GUARD_ENDPOINT="http://${host_ip}:8088"
|
|
||||||
export GUARDRAIL_SERVICE_HOST_IP=${host_ip}
|
|
||||||
```
|
|
||||||
|
|
||||||
Note: Please replace `host_ip` with your external IP address, do **NOT** use localhost.
|
|
||||||
|
|
||||||
### Start all the services Docker Containers
|
### Start all the services Docker Containers
|
||||||
|
|
||||||
@@ -382,88 +433,119 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
|
|||||||
7. LLM Microservice
|
7. LLM Microservice
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://${host_ip}:9000/v1/chat/completions \
|
# TGI service
|
||||||
|
curl http://${host_ip}:9000/v1/chat/completions\
|
||||||
-X POST \
|
-X POST \
|
||||||
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||||
-H 'Content-Type: application/json'
|
-H 'Content-Type: application/json'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
For parameters in TGI mode, please refer to [HuggingFace InferenceClient API](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation) (except we rename "max_new_tokens" to "max_tokens".)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# vLLM Service
|
||||||
|
curl http://${host_ip}:9000/v1/chat/completions \
|
||||||
|
-X POST \
|
||||||
|
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":1,"temperature":0.7,"frequency_penalty":0,"presence_penalty":0, "streaming":false}' \
|
||||||
|
-H 'Content-Type: application/json'
|
||||||
|
```
|
||||||
|
|
||||||
|
For parameters in vLLM Mode, can refer to [LangChain VLLMOpenAI API](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.vllm.VLLMOpenAI.html)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# vLLM-on-Ray Service
|
||||||
|
curl http://${host_ip}:9000/v1/chat/completions \
|
||||||
|
-X POST \
|
||||||
|
-d '{"query":"What is Deep Learning?","max_tokens":17,"presence_penalty":1.03","streaming":false}' \
|
||||||
|
-H 'Content-Type: application/json'
|
||||||
|
```
|
||||||
|
|
||||||
|
For parameters in vLLM-on-Ray mode, can refer to [LangChain ChatOpenAI API](https://python.langchain.com/v0.2/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html)
|
||||||
|
|
||||||
8. MegaService
|
8. MegaService
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
|
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
|
||||||
"messages": "What is the revenue of Nike in 2023?"
|
"messages": "What is the revenue of Nike in 2023?"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
9. Dataprep Microservice(Optional)
|
9. Nginx Service
|
||||||
|
|
||||||
If you want to update the default knowledge base, you can use the following commands:
|
|
||||||
|
|
||||||
Update Knowledge Base via Local File Upload:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \
|
||||||
-H "Content-Type: multipart/form-data" \
|
-H "Content-Type: application/json" \
|
||||||
-F "files=@./nke-10k-2023.pdf"
|
-d '{"messages": "What is the revenue of Nike in 2023?"}'
|
||||||
```
|
```
|
||||||
|
|
||||||
This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
|
10. Dataprep Microservice(Optional)
|
||||||
|
|
||||||
Add Knowledge Base via HTTP Links:
|
If you want to update the default knowledge base, you can use the following commands:
|
||||||
|
|
||||||
```bash
|
Update Knowledge Base via Local File Upload:
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
|
||||||
-H "Content-Type: multipart/form-data" \
|
|
||||||
-F 'link_list=["https://opea.dev"]'
|
|
||||||
```
|
|
||||||
|
|
||||||
This command updates a knowledge base by submitting a list of HTTP links for processing.
|
```bash
|
||||||
|
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
||||||
|
-H "Content-Type: multipart/form-data" \
|
||||||
|
-F "files=@./nke-10k-2023.pdf"
|
||||||
|
```
|
||||||
|
|
||||||
Also, you are able to get the file/link list that you uploaded:
|
This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
|
||||||
|
|
||||||
```bash
|
Add Knowledge Base via HTTP Links:
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
|
|
||||||
-H "Content-Type: application/json"
|
|
||||||
```
|
|
||||||
|
|
||||||
Then you will get the response JSON like this. Notice that the returned `name`/`id` of the uploaded link is `https://xxx.txt`.
|
```bash
|
||||||
|
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
||||||
|
-H "Content-Type: multipart/form-data" \
|
||||||
|
-F 'link_list=["https://opea.dev"]'
|
||||||
|
```
|
||||||
|
|
||||||
```json
|
This command updates a knowledge base by submitting a list of HTTP links for processing.
|
||||||
[
|
|
||||||
{
|
|
||||||
"name": "nke-10k-2023.pdf",
|
|
||||||
"id": "nke-10k-2023.pdf",
|
|
||||||
"type": "File",
|
|
||||||
"parent": ""
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "https://opea.dev.txt",
|
|
||||||
"id": "https://opea.dev.txt",
|
|
||||||
"type": "File",
|
|
||||||
"parent": ""
|
|
||||||
}
|
|
||||||
]
|
|
||||||
```
|
|
||||||
|
|
||||||
To delete the file/link you uploaded:
|
Also, you are able to get the file/link list that you uploaded:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# delete link
|
curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
-H "Content-Type: application/json"
|
||||||
-d '{"file_path": "https://opea.dev.txt"}' \
|
```
|
||||||
-H "Content-Type: application/json"
|
|
||||||
|
|
||||||
# delete file
|
Then you will get the response JSON like this. Notice that the returned `name`/`id` of the uploaded link is `https://xxx.txt`.
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
|
||||||
-d '{"file_path": "nke-10k-2023.pdf"}' \
|
|
||||||
-H "Content-Type: application/json"
|
|
||||||
|
|
||||||
# delete all uploaded files and links
|
```json
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
[
|
||||||
-d '{"file_path": "all"}' \
|
{
|
||||||
-H "Content-Type: application/json"
|
"name": "nke-10k-2023.pdf",
|
||||||
```
|
"id": "nke-10k-2023.pdf",
|
||||||
|
"type": "File",
|
||||||
|
"parent": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "https://opea.dev.txt",
|
||||||
|
"id": "https://opea.dev.txt",
|
||||||
|
"type": "File",
|
||||||
|
"parent": ""
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
To delete the file/link you uploaded:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# delete link
|
||||||
|
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
||||||
|
-d '{"file_path": "https://opea.dev.txt"}' \
|
||||||
|
-H "Content-Type: application/json"
|
||||||
|
|
||||||
|
# delete file
|
||||||
|
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
||||||
|
-d '{"file_path": "nke-10k-2023.pdf"}' \
|
||||||
|
-H "Content-Type: application/json"
|
||||||
|
|
||||||
|
# delete all uploaded files and links
|
||||||
|
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
||||||
|
-d '{"file_path": "all"}' \
|
||||||
|
-H "Content-Type: application/json"
|
||||||
|
```
|
||||||
|
|
||||||
10. Guardrails (Optional)
|
10. Guardrails (Optional)
|
||||||
|
|
||||||
@@ -476,6 +558,8 @@ curl http://${host_ip}:9090/v1/guardrails\
|
|||||||
|
|
||||||
## 🚀 Launch the UI
|
## 🚀 Launch the UI
|
||||||
|
|
||||||
|
### Launch with origin port
|
||||||
|
|
||||||
To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
|
To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -486,11 +570,9 @@ To access the frontend, open the following URL in your browser: http://{host_ip}
|
|||||||
- "80:5173"
|
- "80:5173"
|
||||||
```
|
```
|
||||||
|
|
||||||

|
### Launch with Nginx
|
||||||
|
|
||||||
Here is an example of running ChatQnA:
|
If you want to launch the UI using Nginx, open this URL: `http://${host_ip}:${NGINX_PORT}` in your browser to access the frontend.
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
## 🚀 Launch the Conversational UI (Optional)
|
## 🚀 Launch the Conversational UI (Optional)
|
||||||
|
|
||||||
@@ -521,6 +603,12 @@ Once the services are up, open the following URL in your browser: http://{host_i
|
|||||||
- "80:80"
|
- "80:80"
|
||||||
```
|
```
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Here is an example of running ChatQnA:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
Here is an example of running ChatQnA with Conversational UI (React):
|
Here is an example of running ChatQnA with Conversational UI (React):
|
||||||
|
|
||||||

|

|
||||||
|
|||||||
@@ -25,7 +25,7 @@ services:
|
|||||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
tei-embedding-service:
|
tei-embedding-service:
|
||||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||||
container_name: tei-embedding-gaudi-server
|
container_name: tei-embedding-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8090:80"
|
- "8090:80"
|
||||||
@@ -108,7 +108,7 @@ services:
|
|||||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
tgi-service:
|
tgi-service:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
container_name: tgi-gaudi-server
|
container_name: tgi-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8005:80"
|
- "8005:80"
|
||||||
@@ -118,11 +118,15 @@ services:
|
|||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
HABANA_VISIBLE_DEVICES: ${llm_service_devices}
|
HABANA_VISIBLE_DEVICES: ${llm_service_devices}
|
||||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
|
ENABLE_HPU_GRAPH: true
|
||||||
|
LIMIT_HPU_GRAPH: true
|
||||||
|
USE_FLASH_ATTENTION: true
|
||||||
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
@@ -187,6 +191,25 @@ services:
|
|||||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||||
ipc: host
|
ipc: host
|
||||||
restart: always
|
restart: always
|
||||||
|
chaqna-gaudi-nginx-server:
|
||||||
|
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||||
|
container_name: chaqna-gaudi-nginx-server
|
||||||
|
depends_on:
|
||||||
|
- chaqna-gaudi-backend-server
|
||||||
|
- chaqna-gaudi-ui-server
|
||||||
|
ports:
|
||||||
|
- "${NGINX_PORT:-80}:80"
|
||||||
|
environment:
|
||||||
|
- no_proxy=${no_proxy}
|
||||||
|
- https_proxy=${https_proxy}
|
||||||
|
- http_proxy=${http_proxy}
|
||||||
|
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
|
||||||
|
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
|
||||||
|
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
|
||||||
|
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
|
||||||
|
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
|
||||||
|
ipc: host
|
||||||
|
restart: always
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
default:
|
default:
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ services:
|
|||||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
tgi-guardrails-service:
|
tgi-guardrails-service:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
container_name: tgi-guardrails-server
|
container_name: tgi-guardrails-server
|
||||||
ports:
|
ports:
|
||||||
- "8088:80"
|
- "8088:80"
|
||||||
@@ -35,11 +35,15 @@ services:
|
|||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
HABANA_VISIBLE_DEVICES: all
|
HABANA_VISIBLE_DEVICES: all
|
||||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
|
ENABLE_HPU_GRAPH: true
|
||||||
|
LIMIT_HPU_GRAPH: true
|
||||||
|
USE_FLASH_ATTENTION: true
|
||||||
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
@@ -60,7 +64,7 @@ services:
|
|||||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
tei-embedding-service:
|
tei-embedding-service:
|
||||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||||
container_name: tei-embedding-gaudi-server
|
container_name: tei-embedding-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8090:80"
|
- "8090:80"
|
||||||
@@ -141,7 +145,7 @@ services:
|
|||||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
tgi-service:
|
tgi-service:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
container_name: tgi-gaudi-server
|
container_name: tgi-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8008:80"
|
- "8008:80"
|
||||||
@@ -151,11 +155,15 @@ services:
|
|||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
HABANA_VISIBLE_DEVICES: all
|
HABANA_VISIBLE_DEVICES: all
|
||||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
|
ENABLE_HPU_GRAPH: true
|
||||||
|
LIMIT_HPU_GRAPH: true
|
||||||
|
USE_FLASH_ATTENTION: true
|
||||||
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ services:
|
|||||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
tei-embedding-service:
|
tei-embedding-service:
|
||||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||||
container_name: tei-embedding-gaudi-server
|
container_name: tei-embedding-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8090:80"
|
- "8090:80"
|
||||||
@@ -108,7 +108,7 @@ services:
|
|||||||
# HF_HUB_ENABLE_HF_TRANSFER: 0
|
# HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
# restart: unless-stopped
|
# restart: unless-stopped
|
||||||
tgi-service:
|
tgi-service:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
container_name: tgi-gaudi-server
|
container_name: tgi-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8005:80"
|
- "8005:80"
|
||||||
@@ -118,11 +118,15 @@ services:
|
|||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
HABANA_VISIBLE_DEVICES: all
|
HABANA_VISIBLE_DEVICES: all
|
||||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
|
ENABLE_HPU_GRAPH: true
|
||||||
|
LIMIT_HPU_GRAPH: true
|
||||||
|
USE_FLASH_ATTENTION: true
|
||||||
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ services:
|
|||||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
tei-embedding-service:
|
tei-embedding-service:
|
||||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||||
container_name: tei-embedding-gaudi-server
|
container_name: tei-embedding-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8090:80"
|
- "8090:80"
|
||||||
@@ -73,7 +73,7 @@ services:
|
|||||||
INDEX_NAME: ${INDEX_NAME}
|
INDEX_NAME: ${INDEX_NAME}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
tei-reranking-service:
|
tei-reranking-service:
|
||||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
container_name: tei-reranking-gaudi-server
|
container_name: tei-reranking-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8808:80"
|
- "8808:80"
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ services:
|
|||||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
tei-embedding-service:
|
tei-embedding-service:
|
||||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||||
container_name: tei-embedding-gaudi-server
|
container_name: tei-embedding-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8090:80"
|
- "8090:80"
|
||||||
@@ -73,7 +73,7 @@ services:
|
|||||||
INDEX_NAME: ${INDEX_NAME}
|
INDEX_NAME: ${INDEX_NAME}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
tei-reranking-service:
|
tei-reranking-service:
|
||||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
container_name: tei-reranking-gaudi-server
|
container_name: tei-reranking-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8808:80"
|
- "8808:80"
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ services:
|
|||||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
tei-embedding-service:
|
tei-embedding-service:
|
||||||
image: ${REGISTRY:-opea}/tei-gaudi:${TAG:-latest}
|
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||||
container_name: tei-embedding-gaudi-server
|
container_name: tei-embedding-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8090:80"
|
- "8090:80"
|
||||||
@@ -75,7 +75,7 @@ services:
|
|||||||
INDEX_NAME: ${INDEX_NAME}
|
INDEX_NAME: ${INDEX_NAME}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
tgi-service:
|
tgi-service:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
container_name: tgi-gaudi-server
|
container_name: tgi-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8005:80"
|
- "8005:80"
|
||||||
@@ -85,11 +85,15 @@ services:
|
|||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
HABANA_VISIBLE_DEVICES: all
|
HABANA_VISIBLE_DEVICES: all
|
||||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
|
ENABLE_HPU_GRAPH: true
|
||||||
|
LIMIT_HPU_GRAPH: true
|
||||||
|
USE_FLASH_ATTENTION: true
|
||||||
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
|
|||||||
@@ -56,16 +56,16 @@ f810f3b4d329 opea/embedding-tei:latest "python e
|
|||||||
2fa17d84605f opea/dataprep-redis:latest "python prepare_doc_…" 2 minutes ago Up 2 minutes 0.0.0.0:6007->6007/tcp, :::6007->6007/tcp dataprep-redis-server
|
2fa17d84605f opea/dataprep-redis:latest "python prepare_doc_…" 2 minutes ago Up 2 minutes 0.0.0.0:6007->6007/tcp, :::6007->6007/tcp dataprep-redis-server
|
||||||
69e1fb59e92c opea/retriever-redis:latest "/home/user/comps/re…" 2 minutes ago Up 2 minutes 0.0.0.0:7000->7000/tcp, :::7000->7000/tcp retriever-redis-server
|
69e1fb59e92c opea/retriever-redis:latest "/home/user/comps/re…" 2 minutes ago Up 2 minutes 0.0.0.0:7000->7000/tcp, :::7000->7000/tcp retriever-redis-server
|
||||||
313b9d14928a opea/reranking-tei:latest "python reranking_te…" 2 minutes ago Up 2 minutes 0.0.0.0:8000->8000/tcp, :::8000->8000/tcp reranking-tei-gaudi-server
|
313b9d14928a opea/reranking-tei:latest "python reranking_te…" 2 minutes ago Up 2 minutes 0.0.0.0:8000->8000/tcp, :::8000->8000/tcp reranking-tei-gaudi-server
|
||||||
05c40b636239 ghcr.io/huggingface/tgi-gaudi:1.2.1 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
|
05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
|
||||||
174bd43fa6b5 opea/tei-gaudi:latest "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, :::8090->80/tcp tei-embedding-gaudi-server
|
174bd43fa6b5 ghcr.io/huggingface/tei-gaudi:latest "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, :::8090->80/tcp tei-embedding-gaudi-server
|
||||||
74084469aa33 redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 2 minutes ago Up 2 minutes 0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp redis-vector-db
|
74084469aa33 redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 2 minutes ago Up 2 minutes 0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp redis-vector-db
|
||||||
88399dbc9e43 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8808->80/tcp, :::8808->80/tcp tei-reranking-gaudi-server
|
88399dbc9e43 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8808->80/tcp, :::8808->80/tcp tei-reranking-gaudi-server
|
||||||
```
|
```
|
||||||
|
|
||||||
In this case, `ghcr.io/huggingface/tgi-gaudi:1.2.1` Existed.
|
In this case, `ghcr.io/huggingface/tgi-gaudi:2.0.5` Existed.
|
||||||
|
|
||||||
```
|
```
|
||||||
05c40b636239 ghcr.io/huggingface/tgi-gaudi:1.2.1 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
|
05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server
|
||||||
```
|
```
|
||||||
|
|
||||||
Next we can check the container logs to get to know what happened during the docker start.
|
Next we can check the container logs to get to know what happened during the docker start.
|
||||||
@@ -76,7 +76,7 @@ Check the log of container by:
|
|||||||
|
|
||||||
`docker logs <CONTAINER ID> -t`
|
`docker logs <CONTAINER ID> -t`
|
||||||
|
|
||||||
View the logs of `ghcr.io/huggingface/tgi-gaudi:1.2.1`
|
View the logs of `ghcr.io/huggingface/tgi-gaudi:2.0.5`
|
||||||
|
|
||||||
`docker logs 05c40b636239 -t`
|
`docker logs 05c40b636239 -t`
|
||||||
|
|
||||||
@@ -105,7 +105,7 @@ So just make sure the devices are available.
|
|||||||
Here is another failure example:
|
Here is another failure example:
|
||||||
|
|
||||||
```
|
```
|
||||||
f7a08f9867f9 ghcr.io/huggingface/tgi-gaudi:1.2.1 "text-generation-lau…" 16 seconds ago Exited (2) 14 seconds ago tgi-gaudi-server
|
f7a08f9867f9 ghcr.io/huggingface/tgi-gaudi:2.0.5 "text-generation-lau…" 16 seconds ago Exited (2) 14 seconds ago tgi-gaudi-server
|
||||||
```
|
```
|
||||||
|
|
||||||
Check the log by `docker logs f7a08f9867f9 -t`.
|
Check the log by `docker logs f7a08f9867f9 -t`.
|
||||||
@@ -122,7 +122,7 @@ View the docker input parameters in `./ChatQnA/docker_compose/intel/hpu/gaudi/co
|
|||||||
|
|
||||||
```
|
```
|
||||||
tgi-service:
|
tgi-service:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:1.2.1
|
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||||
container_name: tgi-gaudi-server
|
container_name: tgi-gaudi-server
|
||||||
ports:
|
ports:
|
||||||
- "8008:80"
|
- "8008:80"
|
||||||
@@ -131,9 +131,13 @@ View the docker input parameters in `./ChatQnA/docker_compose/intel/hpu/gaudi/co
|
|||||||
environment:
|
environment:
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
HABANA_VISIBLE_DEVICES: all
|
HABANA_VISIBLE_DEVICES: all
|
||||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
|
ENABLE_HPU_GRAPH: true
|
||||||
|
LIMIT_HPU_GRAPH: true
|
||||||
|
USE_FLASH_ATTENTION: true
|
||||||
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
runtime: habana
|
runtime: habana
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
@@ -278,7 +282,7 @@ and the log shows model warm up, please wait for a while and try it later.
|
|||||||
```
|
```
|
||||||
curl http://${host_ip}:9000/v1/chat/completions\
|
curl http://${host_ip}:9000/v1/chat/completions\
|
||||||
-X POST \
|
-X POST \
|
||||||
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||||
-H 'Content-Type: application/json'
|
-H 'Content-Type: application/json'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -21,3 +21,8 @@ export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
|
|||||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
||||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
|
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
|
||||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
|
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
|
||||||
|
export FRONTEND_SERVICE_IP=${host_ip}
|
||||||
|
export FRONTEND_SERVICE_PORT=5173
|
||||||
|
export BACKEND_SERVICE_NAME=chatqna
|
||||||
|
export BACKEND_SERVICE_IP=${host_ip}
|
||||||
|
export BACKEND_SERVICE_PORT=8888
|
||||||
|
|||||||
@@ -2,6 +2,70 @@
|
|||||||
|
|
||||||
This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on NVIDIA GPU platform. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as embedding, retriever, rerank, and llm. We will publish the Docker images to Docker Hub, it will simplify the deployment process for this service.
|
This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on NVIDIA GPU platform. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as embedding, retriever, rerank, and llm. We will publish the Docker images to Docker Hub, it will simplify the deployment process for this service.
|
||||||
|
|
||||||
|
Quick Start Deployment Steps:
|
||||||
|
|
||||||
|
1. Set up the environment variables.
|
||||||
|
2. Run Docker Compose.
|
||||||
|
3. Consume the ChatQnA Service.
|
||||||
|
|
||||||
|
## Quick Start: 1.Setup Environment Variable
|
||||||
|
|
||||||
|
To set up environment variables for deploying ChatQnA services, follow these steps:
|
||||||
|
|
||||||
|
1. Set the required environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Example: host_ip="192.168.1.1"
|
||||||
|
export host_ip="External_Public_IP"
|
||||||
|
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||||
|
export no_proxy="Your_No_Proxy"
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||||
|
```
|
||||||
|
|
||||||
|
2. If you are in a proxy environment, also set the proxy-related environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export http_proxy="Your_HTTP_Proxy"
|
||||||
|
export https_proxy="Your_HTTPs_Proxy"
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Set up other environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
source ./set_env.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quick Start: 2.Run Docker Compose
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
It will automatically download the docker image on `docker hub`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker pull opea/chatqna:latest
|
||||||
|
docker pull opea/chatqna-ui:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
In following cases, you could build docker image from source by yourself.
|
||||||
|
|
||||||
|
- Failed to download the docker image.
|
||||||
|
|
||||||
|
- If you want to use a specific version of Docker image.
|
||||||
|
|
||||||
|
Please refer to 'Build Docker Images' in below.
|
||||||
|
|
||||||
|
## QuickStart: 3.Consume the ChatQnA Service
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://${host_ip}:8888/v1/chatqna \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"messages": "What is the revenue of Nike in 2023?"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
## 🚀 Build Docker Images
|
## 🚀 Build Docker Images
|
||||||
|
|
||||||
First of all, you need to build Docker Images locally. This step can be ignored after the Docker images published to Docker hub.
|
First of all, you need to build Docker Images locally. This step can be ignored after the Docker images published to Docker hub.
|
||||||
@@ -74,7 +138,14 @@ docker build --no-cache -t opea/chatqna-react-ui:latest --build-arg https_proxy=
|
|||||||
cd ../../../..
|
cd ../../../..
|
||||||
```
|
```
|
||||||
|
|
||||||
Then run the command `docker images`, you will have the following 7 Docker Images:
|
### 10. Build Nginx Docker Image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd GenAIComps
|
||||||
|
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
|
||||||
|
```
|
||||||
|
|
||||||
|
Then run the command `docker images`, you will have the following 8 Docker Images:
|
||||||
|
|
||||||
1. `opea/embedding-tei:latest`
|
1. `opea/embedding-tei:latest`
|
||||||
2. `opea/retriever-redis:latest`
|
2. `opea/retriever-redis:latest`
|
||||||
@@ -82,8 +153,8 @@ Then run the command `docker images`, you will have the following 7 Docker Image
|
|||||||
4. `opea/llm-tgi:latest`
|
4. `opea/llm-tgi:latest`
|
||||||
5. `opea/dataprep-redis:latest`
|
5. `opea/dataprep-redis:latest`
|
||||||
6. `opea/chatqna:latest`
|
6. `opea/chatqna:latest`
|
||||||
7. `opea/chatqna-ui:latest`
|
7. `opea/chatqna-ui:latest` or `opea/chatqna-react-ui:latest`
|
||||||
8. `opea/chatqna-react-ui:latest`
|
8. `opea/nginx:latest`
|
||||||
|
|
||||||
## 🚀 Start MicroServices and MegaService
|
## 🚀 Start MicroServices and MegaService
|
||||||
|
|
||||||
@@ -101,33 +172,30 @@ Change the `xxx_MODEL_ID` below for your needs.
|
|||||||
|
|
||||||
### Setup Environment Variables
|
### Setup Environment Variables
|
||||||
|
|
||||||
Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
|
1. Set the required environment variables:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export no_proxy=${your_no_proxy}
|
# Example: host_ip="192.168.1.1"
|
||||||
export http_proxy=${your_http_proxy}
|
export host_ip="External_Public_IP"
|
||||||
export https_proxy=${your_http_proxy}
|
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
export no_proxy="Your_No_Proxy"
|
||||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
# Example: NGINX_PORT=80
|
||||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
|
export NGINX_PORT=${your_nginx_port}
|
||||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
```
|
||||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
|
||||||
export REDIS_URL="redis://${host_ip}:6379"
|
|
||||||
export INDEX_NAME="rag-redis"
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
|
|
||||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
|
||||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
|
|
||||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
|
|
||||||
```
|
|
||||||
|
|
||||||
Note: Please replace with `host_ip` with you external IP address, do **NOT** use localhost.
|
2. If you are in a proxy environment, also set the proxy-related environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export http_proxy="Your_HTTP_Proxy"
|
||||||
|
export https_proxy="Your_HTTPs_Proxy"
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Set up other environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
source ./set_env.sh
|
||||||
|
```
|
||||||
|
|
||||||
### Start all the services Docker Containers
|
### Start all the services Docker Containers
|
||||||
|
|
||||||
@@ -220,7 +288,7 @@ docker compose up -d
|
|||||||
```bash
|
```bash
|
||||||
curl http://${host_ip}:9000/v1/chat/completions \
|
curl http://${host_ip}:9000/v1/chat/completions \
|
||||||
-X POST \
|
-X POST \
|
||||||
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||||
-H 'Content-Type: application/json'
|
-H 'Content-Type: application/json'
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -232,58 +300,68 @@ docker compose up -d
|
|||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
9. Dataprep Microservice(Optional)
|
9. Nginx Service
|
||||||
|
|
||||||
If you want to update the default knowledge base, you can use the following commands:
|
|
||||||
|
|
||||||
Update Knowledge Base via Local File Upload:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \
|
||||||
-H "Content-Type: multipart/form-data" \
|
-H "Content-Type: application/json" \
|
||||||
-F "files=@./nke-10k-2023.pdf"
|
-d '{"messages": "What is the revenue of Nike in 2023?"}'
|
||||||
```
|
```
|
||||||
|
|
||||||
This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
|
10. Dataprep Microservice(Optional)
|
||||||
|
|
||||||
Add Knowledge Base via HTTP Links:
|
If you want to update the default knowledge base, you can use the following commands:
|
||||||
|
|
||||||
```bash
|
Update Knowledge Base via Local File Upload:
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
|
||||||
-H "Content-Type: multipart/form-data" \
|
|
||||||
-F 'link_list=["https://opea.dev"]'
|
|
||||||
```
|
|
||||||
|
|
||||||
This command updates a knowledge base by submitting a list of HTTP links for processing.
|
```bash
|
||||||
|
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
||||||
|
-H "Content-Type: multipart/form-data" \
|
||||||
|
-F "files=@./nke-10k-2023.pdf"
|
||||||
|
```
|
||||||
|
|
||||||
Also, you are able to get the file list that you uploaded:
|
This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
|
||||||
|
|
||||||
```bash
|
Add Knowledge Base via HTTP Links:
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
|
|
||||||
-H "Content-Type: application/json"
|
|
||||||
```
|
|
||||||
|
|
||||||
To delete the file/link you uploaded:
|
```bash
|
||||||
|
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
||||||
|
-H "Content-Type: multipart/form-data" \
|
||||||
|
-F 'link_list=["https://opea.dev"]'
|
||||||
|
```
|
||||||
|
|
||||||
```bash
|
This command updates a knowledge base by submitting a list of HTTP links for processing.
|
||||||
# delete link
|
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
|
||||||
-d '{"file_path": "https://opea.dev"}' \
|
|
||||||
-H "Content-Type: application/json"
|
|
||||||
|
|
||||||
# delete file
|
Also, you are able to get the file list that you uploaded:
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
|
||||||
-d '{"file_path": "nke-10k-2023.pdf"}' \
|
|
||||||
-H "Content-Type: application/json"
|
|
||||||
|
|
||||||
# delete all uploaded files and links
|
```bash
|
||||||
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
|
||||||
-d '{"file_path": "all"}' \
|
-H "Content-Type: application/json"
|
||||||
-H "Content-Type: application/json"
|
```
|
||||||
```
|
|
||||||
|
To delete the file/link you uploaded:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# delete link
|
||||||
|
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
||||||
|
-d '{"file_path": "https://opea.dev"}' \
|
||||||
|
-H "Content-Type: application/json"
|
||||||
|
|
||||||
|
# delete file
|
||||||
|
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
||||||
|
-d '{"file_path": "nke-10k-2023.pdf"}' \
|
||||||
|
-H "Content-Type: application/json"
|
||||||
|
|
||||||
|
# delete all uploaded files and links
|
||||||
|
curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
||||||
|
-d '{"file_path": "all"}' \
|
||||||
|
-H "Content-Type: application/json"
|
||||||
|
```
|
||||||
|
|
||||||
## 🚀 Launch the UI
|
## 🚀 Launch the UI
|
||||||
|
|
||||||
|
### Launch with origin port
|
||||||
|
|
||||||
To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
|
To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -294,6 +372,10 @@ To access the frontend, open the following URL in your browser: http://{host_ip}
|
|||||||
- "80:5173"
|
- "80:5173"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Launch with Nginx
|
||||||
|
|
||||||
|
If you want to launch the UI using Nginx, open this URL: `http://${host_ip}:${NGINX_PORT}` in your browser to access the frontend.
|
||||||
|
|
||||||
## 🚀 Launch the Conversational UI (Optional)
|
## 🚀 Launch the Conversational UI (Optional)
|
||||||
|
|
||||||
To access the Conversational UI (react based) frontend, modify the UI service in the `compose.yaml` file. Replace `chaqna-ui-server` service with the `chatqna-react-ui-server` service as per the config below:
|
To access the Conversational UI (react based) frontend, modify the UI service in the `compose.yaml` file. Replace `chaqna-ui-server` service with the `chatqna-react-ui-server` service as per the config below:
|
||||||
@@ -324,3 +406,11 @@ Once the services are up, open the following URL in your browser: http://{host_i
|
|||||||
```
|
```
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
|
Here is an example of running ChatQnA:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Here is an example of running ChatQnA with Conversational UI (React):
|
||||||
|
|
||||||
|

|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user