Compare commits
98 Commits
llama3.2_s
...
helmcharts
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ebd2ab0222 | ||
|
|
2f1f80bbae | ||
|
|
5158b5e822 | ||
|
|
1c3f55602a | ||
|
|
bb4c1dbc44 | ||
|
|
16018085b0 | ||
|
|
93bbd5131f | ||
|
|
4f32f867ec | ||
|
|
4f183c2a0d | ||
|
|
1046aad26f | ||
|
|
2876677214 | ||
|
|
a9536321a0 | ||
|
|
24de14e58a | ||
|
|
065222f29b | ||
|
|
3f596d9747 | ||
|
|
9da0c09b18 | ||
|
|
b9c646a2b8 | ||
|
|
27e9832af4 | ||
|
|
f3cbcadfa2 | ||
|
|
e21ee76f24 | ||
|
|
8effe7a4eb | ||
|
|
0d3876d6fa | ||
|
|
bb46f5b355 | ||
|
|
bcaffd7db4 | ||
|
|
124143ea40 | ||
|
|
6dc4bb5c79 | ||
|
|
d290bd811f | ||
|
|
d68ce801e4 | ||
|
|
048b4e1df9 | ||
|
|
fdb8a33a6e | ||
|
|
4e1237d410 | ||
|
|
58ff7d9518 | ||
|
|
9ee1a7410b | ||
|
|
24166615d7 | ||
|
|
a0b2263fd3 | ||
|
|
5c2f3f0301 | ||
|
|
a70775d3d6 | ||
|
|
3dd5475773 | ||
|
|
d6b04b3405 | ||
|
|
184e9a43b8 | ||
|
|
658867fce4 | ||
|
|
620ef76d16 | ||
|
|
23b820e740 | ||
|
|
3c164f3aa2 | ||
|
|
7669c42085 | ||
|
|
256b58c07e | ||
|
|
3c3a5bed67 | ||
|
|
37c74b232c | ||
|
|
4a265abb73 | ||
|
|
b0487fe92b | ||
|
|
d486bbbe10 | ||
|
|
b0f7c9cfc2 | ||
|
|
eeced9b31c | ||
|
|
b377c2b8f8 | ||
|
|
5dae713793 | ||
|
|
c930bea172 | ||
|
|
0edff26ee5 | ||
|
|
778afb50ac | ||
|
|
40800b0848 | ||
|
|
f2f6c09a0f | ||
|
|
c6fc92d37c | ||
|
|
c0643b71e8 | ||
|
|
088ab98f31 | ||
|
|
441f8cc6ba | ||
|
|
b056ce6617 | ||
|
|
773c32b38b | ||
|
|
619d941047 | ||
|
|
b71a12d424 | ||
|
|
12469c92d8 | ||
|
|
fbde15b40d | ||
|
|
ae10712fe8 | ||
|
|
373fa88033 | ||
|
|
e2f9037344 | ||
|
|
afc39fa4c0 | ||
|
|
e1c476c185 | ||
|
|
77920613dc | ||
|
|
7dec00176e | ||
|
|
bf28c7f098 | ||
|
|
63bad29794 | ||
|
|
36d3ef2b17 | ||
|
|
0c6b044139 | ||
|
|
d23cd799e9 | ||
|
|
644c3a67ce | ||
|
|
ffecd182db | ||
|
|
d16c80e493 | ||
|
|
2de1bfc5bb | ||
|
|
75df2c9979 | ||
|
|
62e06a0aff | ||
|
|
bd32b03e3c | ||
|
|
9d0b49c2d6 | ||
|
|
75ce2a3ca6 | ||
|
|
99c10933b4 | ||
|
|
8bcd82e82d | ||
|
|
c1038d2193 | ||
|
|
33b9d4e421 | ||
|
|
c9553c6f9a | ||
|
|
3e796ba73d | ||
|
|
5ed776709d |
8
.github/CODEOWNERS
vendored
8
.github/CODEOWNERS
vendored
@@ -1,13 +1,17 @@
|
||||
/AgentQnA/ xuhui.ren@intel.com
|
||||
/AgentQnA/ kaokao.lv@intel.com
|
||||
/AudioQnA/ sihan.chen@intel.com
|
||||
/ChatQnA/ liang1.lv@intel.com
|
||||
/CodeGen/ liang1.lv@intel.com
|
||||
/CodeTrans/ sihan.chen@intel.com
|
||||
/DocSum/ letong.han@intel.com
|
||||
/DocIndexRetriever/ xuhui.ren@intel.com chendi.xue@intel.com
|
||||
/DocIndexRetriever/ kaokao.lv@intel.com chendi.xue@intel.com
|
||||
/InstructionTuning xinyu.ye@intel.com
|
||||
/RerankFinetuning xinyu.ye@intel.com
|
||||
/MultimodalQnA tiep.le@intel.com
|
||||
/FaqGen/ xinyao.wang@intel.com
|
||||
/SearchQnA/ sihan.chen@intel.com
|
||||
/Translation/ liang1.lv@intel.com
|
||||
/VisualQnA/ liang1.lv@intel.com
|
||||
/ProductivitySuite/ hoong.tee.yeoh@intel.com
|
||||
/VideoQnA huiling.bao@intel.com
|
||||
/*/ liang1.lv@intel.com
|
||||
|
||||
6
.github/workflows/_example-workflow.yml
vendored
6
.github/workflows/_example-workflow.yml
vendored
@@ -12,6 +12,10 @@ on:
|
||||
example:
|
||||
required: true
|
||||
type: string
|
||||
services:
|
||||
default: ""
|
||||
required: false
|
||||
type: string
|
||||
tag:
|
||||
default: "latest"
|
||||
required: false
|
||||
@@ -77,6 +81,7 @@ jobs:
|
||||
with:
|
||||
work_dir: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
|
||||
docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
|
||||
service_list: ${{ inputs.services }}
|
||||
registry: ${OPEA_IMAGE_REPO}opea
|
||||
tag: ${{ inputs.tag }}
|
||||
|
||||
@@ -105,7 +110,6 @@ jobs:
|
||||
example: ${{ inputs.example }}
|
||||
hardware: ${{ inputs.node }}
|
||||
tag: ${{ inputs.tag }}
|
||||
context: "CD"
|
||||
secrets: inherit
|
||||
|
||||
####################################################################################################
|
||||
|
||||
8
.github/workflows/_manifest-e2e.yml
vendored
8
.github/workflows/_manifest-e2e.yml
vendored
@@ -20,11 +20,6 @@ on:
|
||||
description: "Tag to apply to images, default is latest"
|
||||
required: false
|
||||
type: string
|
||||
context:
|
||||
default: "CI"
|
||||
description: "CI or CD"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
manifest-test:
|
||||
@@ -51,7 +46,7 @@ jobs:
|
||||
|
||||
- name: Set variables
|
||||
run: |
|
||||
echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
|
||||
echo "IMAGE_REPO=${OPEA_IMAGE_REPO}opea" >> $GITHUB_ENV
|
||||
echo "IMAGE_TAG=${{ inputs.tag }}" >> $GITHUB_ENV
|
||||
lower_example=$(echo "${{ inputs.example }}" | tr '[:upper:]' '[:lower:]')
|
||||
echo "NAMESPACE=$lower_example-$(tr -dc a-z0-9 </dev/urandom | head -c 16)" >> $GITHUB_ENV
|
||||
@@ -60,7 +55,6 @@ jobs:
|
||||
echo "continue_test=true" >> $GITHUB_ENV
|
||||
echo "should_cleanup=false" >> $GITHUB_ENV
|
||||
echo "skip_validate=true" >> $GITHUB_ENV
|
||||
echo "CONTEXT=${{ inputs.context }}" >> $GITHUB_ENV
|
||||
echo "NAMESPACE=$NAMESPACE"
|
||||
|
||||
- name: Kubectl install
|
||||
|
||||
1
.github/workflows/_run-docker-compose.yml
vendored
1
.github/workflows/_run-docker-compose.yml
vendored
@@ -118,6 +118,7 @@ jobs:
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
|
||||
PINECONE_KEY_LANGCHAIN_TEST: ${{ secrets.PINECONE_KEY_LANGCHAIN_TEST }}
|
||||
IMAGE_REPO: ${{ inputs.registry }}
|
||||
IMAGE_TAG: ${{ inputs.tag }}
|
||||
example: ${{ inputs.example }}
|
||||
|
||||
16
.github/workflows/manual-docker-publish.yml
vendored
16
.github/workflows/manual-docker-publish.yml
vendored
@@ -11,23 +11,23 @@ on:
|
||||
required: true
|
||||
type: string
|
||||
examples:
|
||||
default: "Translation"
|
||||
description: 'List of examples to publish [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
|
||||
default: ""
|
||||
description: 'List of examples to publish [AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA]'
|
||||
required: false
|
||||
type: string
|
||||
images:
|
||||
default: "gmcmanager,gmcrouter"
|
||||
description: 'List of images to publish [gmcmanager,gmcrouter, ...]'
|
||||
default: ""
|
||||
description: 'List of images to publish [gmcmanager,gmcrouter]'
|
||||
required: false
|
||||
type: string
|
||||
tag:
|
||||
default: "v0.9"
|
||||
description: "Tag to publish"
|
||||
default: "rc"
|
||||
description: "Tag to publish, like [1.0rc]"
|
||||
required: true
|
||||
type: string
|
||||
publish_tags:
|
||||
default: "latest,v0.9"
|
||||
description: 'Tag list apply to publish images'
|
||||
default: "latest,1.x"
|
||||
description: "Tag list apply to publish images, like [latest,1.0]"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
|
||||
8
.github/workflows/manual-docker-scan.yml
vendored
8
.github/workflows/manual-docker-scan.yml
vendored
@@ -11,13 +11,13 @@ on:
|
||||
required: true
|
||||
type: string
|
||||
examples:
|
||||
default: "ChatQnA"
|
||||
description: 'List of examples to scan [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
|
||||
default: ""
|
||||
description: 'List of examples to publish "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"'
|
||||
required: false
|
||||
type: string
|
||||
images:
|
||||
default: "gmcmanager,gmcrouter"
|
||||
description: 'List of images to scan [gmcmanager,gmcrouter, ...]'
|
||||
default: ""
|
||||
description: 'List of images to publish "gmcmanager,gmcrouter"'
|
||||
required: false
|
||||
type: string
|
||||
tag:
|
||||
|
||||
59
.github/workflows/manual-image-build.yml
vendored
Normal file
59
.github/workflows/manual-image-build.yml
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Build specific images on manual event
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
nodes:
|
||||
default: "gaudi,xeon"
|
||||
description: "Hardware to run test"
|
||||
required: true
|
||||
type: string
|
||||
example:
|
||||
default: "ChatQnA"
|
||||
description: 'Build images belong to which example?'
|
||||
required: true
|
||||
type: string
|
||||
services:
|
||||
default: "chatqna,chatqna-without-rerank"
|
||||
description: 'Service list to build'
|
||||
required: true
|
||||
type: string
|
||||
tag:
|
||||
default: "latest"
|
||||
description: "Tag to apply to images"
|
||||
required: true
|
||||
type: string
|
||||
opea_branch:
|
||||
default: "main"
|
||||
description: 'OPEA branch for image build'
|
||||
required: false
|
||||
type: string
|
||||
jobs:
|
||||
get-test-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
nodes: ${{ steps.get-matrix.outputs.nodes }}
|
||||
steps:
|
||||
- name: Create Matrix
|
||||
id: get-matrix
|
||||
run: |
|
||||
nodes=($(echo ${{ inputs.nodes }} | tr ',' ' '))
|
||||
nodes_json=$(printf '%s\n' "${nodes[@]}" | sort -u | jq -R '.' | jq -sc '.')
|
||||
echo "nodes=$nodes_json" >> $GITHUB_OUTPUT
|
||||
|
||||
image-build:
|
||||
needs: get-test-matrix
|
||||
strategy:
|
||||
matrix:
|
||||
node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_example-workflow.yml
|
||||
with:
|
||||
node: ${{ matrix.node }}
|
||||
example: ${{ inputs.example }}
|
||||
services: ${{ inputs.services }}
|
||||
tag: ${{ inputs.tag }}
|
||||
opea_branch: ${{ inputs.opea_branch }}
|
||||
secrets: inherit
|
||||
50
.github/workflows/pr-bum_list_check.yml
vendored
50
.github/workflows/pr-bum_list_check.yml
vendored
@@ -1,50 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Check Requirements
|
||||
|
||||
on: [pull_request]
|
||||
|
||||
jobs:
|
||||
check-requirements:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout PR branch
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Save PR requirements
|
||||
run: |
|
||||
find . -name "requirements.txt" -exec cat {} \; | \
|
||||
grep -v '^\s*#' | \
|
||||
grep -v '^\s*$' | \
|
||||
grep -v '^\s*-' | \
|
||||
sed 's/^\s*//' | \
|
||||
awk -F'[>=<]' '{print $1}' | \
|
||||
sort -u > pr-requirements.txt
|
||||
cat pr-requirements.txt
|
||||
|
||||
- name: Checkout main branch
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: main
|
||||
path: main-branch
|
||||
|
||||
- name: Save main branch requirements
|
||||
run: |
|
||||
find ./main-branch -name "requirements.txt" -exec cat {} \; | \
|
||||
grep -v '^\s*#' | \
|
||||
grep -v '^\s*$' | \
|
||||
grep -v '^\s*-' | \
|
||||
sed 's/^\s*//' | \
|
||||
awk -F'[>=<]' '{print $1}' | \
|
||||
sort -u > main-requirements.txt
|
||||
cat main-requirements.txt
|
||||
|
||||
- name: Compare requirements
|
||||
run: |
|
||||
comm -23 pr-requirements.txt main-requirements.txt > added-packages.txt
|
||||
if [ -s added-packages.txt ]; then
|
||||
echo "New packages found in PR:" && cat added-packages.txt
|
||||
else
|
||||
echo "No new packages found😊."
|
||||
fi
|
||||
2
.github/workflows/pr-manifest-e2e.yml
vendored
2
.github/workflows/pr-manifest-e2e.yml
vendored
@@ -8,6 +8,8 @@ on:
|
||||
branches: ["main", "*rc"]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- "**/Dockerfile**"
|
||||
- "**.py"
|
||||
- "**/kubernetes/**/manifests/**"
|
||||
- "**/tests/test_manifest**"
|
||||
- "!**.md"
|
||||
|
||||
54
.github/workflows/pr-manifest-validate.yml
vendored
54
.github/workflows/pr-manifest-validate.yml
vendored
@@ -1,54 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Manifests Validate
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- "**/kubernetes/manifests/**"
|
||||
- .github/workflows/manifest-validate.yml
|
||||
workflow_dispatch:
|
||||
|
||||
# If there is a new commit, the previous jobs will be canceled
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
MANIFEST_DIR: "manifests"
|
||||
|
||||
jobs:
|
||||
manifests-validate:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: changed files
|
||||
id: changed_files
|
||||
run: |
|
||||
set -xe
|
||||
changed_folder=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} | \
|
||||
grep "kubernetes/manifests" | grep -vE '.github|README.md|*.txt|*.sh' | cut -d'/' -f1 | sort -u )
|
||||
echo "changed_folder: $changed_folder"
|
||||
if [ -z "$changed_folder" ]; then
|
||||
echo "No changes in manifests folder"
|
||||
echo "SKIP=true" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
echo "SKIP=false" >> $GITHUB_OUTPUT
|
||||
for folder in $changed_folder; do
|
||||
folder_str="$folder_str $folder/kubernetes/manifests/"
|
||||
done
|
||||
echo "folder_str=$folder_str"
|
||||
echo "folder_str=$folder_str" >> $GITHUB_ENV
|
||||
|
||||
- uses: docker://ghcr.io/yannh/kubeconform:latest
|
||||
if: steps.changed_files.outputs.SKIP == 'false'
|
||||
with:
|
||||
args: "-summary -output json ${{env.folder_str}}"
|
||||
88
.github/workflows/pr-path-detection.yml
vendored
88
.github/workflows/pr-path-detection.yml
vendored
@@ -50,28 +50,40 @@ jobs:
|
||||
|
||||
- name: Checkout Repo GenAIExamples
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check the Validity of Hyperlinks
|
||||
run: |
|
||||
cd ${{github.workspace}}
|
||||
fail="FALSE"
|
||||
url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .)
|
||||
if [ -n "$url_lines" ]; then
|
||||
for url_line in $url_lines; do
|
||||
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
|
||||
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
|
||||
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response" -ne 200 ]; then
|
||||
echo "**********Validation failed, try again**********"
|
||||
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response_retry" -eq 200 ]; then
|
||||
echo "*****Retry successfully*****"
|
||||
else
|
||||
echo "Invalid link from ${{github.workspace}}/$path: $url"
|
||||
fail="TRUE"
|
||||
fi
|
||||
merged_commit=$(git log -1 --format='%H')
|
||||
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
|
||||
if [ -n "$changed_files" ]; then
|
||||
for changed_file in $changed_files; do
|
||||
echo $changed_file
|
||||
url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIExamples/blob/main') || true
|
||||
if [ -n "$url_lines" ]; then
|
||||
for url_line in $url_lines; do
|
||||
echo $url_line
|
||||
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
|
||||
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
|
||||
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response" -ne 200 ]; then
|
||||
echo "**********Validation failed, try again**********"
|
||||
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response_retry" -eq 200 ]; then
|
||||
echo "*****Retry successfully*****"
|
||||
else
|
||||
echo "Invalid link from ${{github.workspace}}/$path: $url"
|
||||
fail="TRUE"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
fi
|
||||
done
|
||||
else
|
||||
echo "No changed .md file."
|
||||
fi
|
||||
|
||||
if [[ "$fail" == "TRUE" ]]; then
|
||||
@@ -89,6 +101,8 @@ jobs:
|
||||
|
||||
- name: Checkout Repo GenAIExamples
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Checking Relative Path Validity
|
||||
run: |
|
||||
@@ -102,33 +116,34 @@ jobs:
|
||||
branch="https://github.com/opea-project/GenAIExamples/blob/${{ github.event.pull_request.head.ref }}"
|
||||
fi
|
||||
link_head="https://github.com/opea-project/GenAIExamples/blob/main"
|
||||
|
||||
merged_commit=$(git log -1 --format='%H')
|
||||
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
|
||||
png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http')
|
||||
if [ -n "$png_lines" ]; then
|
||||
for png_line in $png_lines; do
|
||||
refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-)
|
||||
png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1)
|
||||
|
||||
if [[ "${png_path:0:1}" == "/" ]]; then
|
||||
check_path=${{github.workspace}}$png_path
|
||||
elif [[ "${png_path:0:1}" == "#" ]]; then
|
||||
check_path=${{github.workspace}}/$refer_path$png_path
|
||||
check_path=$png_path
|
||||
elif [[ "$png_path" == *#* ]]; then
|
||||
relative_path=$(echo "$png_path" | cut -d '#' -f1)
|
||||
if [ -n "$relative_path" ]; then
|
||||
check_path=$(dirname "$refer_path")/$relative_path
|
||||
png_path=$(echo "$png_path" | awk -F'#' '{print "#" $2}')
|
||||
else
|
||||
check_path=$refer_path
|
||||
fi
|
||||
else
|
||||
check_path=${{github.workspace}}/$(dirname "$refer_path")/$png_path
|
||||
check_path=$(dirname "$refer_path")/$png_path
|
||||
fi
|
||||
real_path=$(realpath $check_path)
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Path $png_path in file ${{github.workspace}}/$refer_path does not exist"
|
||||
fail="TRUE"
|
||||
else
|
||||
url=$link_head$(echo "$real_path" | sed 's|.*/GenAIExamples||')
|
||||
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response" -ne 200 ]; then
|
||||
echo "**********Validation failed, try again**********"
|
||||
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response_retry" -eq 200 ]; then
|
||||
echo "*****Retry successfully*****"
|
||||
else
|
||||
echo "Retry failed. Check branch ${{ github.event.pull_request.head.ref }}"
|
||||
url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIExamples||')
|
||||
|
||||
if [ -e "$check_path" ]; then
|
||||
real_path=$(realpath $check_path)
|
||||
if [[ "$png_line" == *#* ]]; then
|
||||
if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then
|
||||
url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIExamples||')$png_path
|
||||
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev")
|
||||
if [ "$response" -ne 200 ]; then
|
||||
echo "**********Validation failed, try again**********"
|
||||
@@ -140,10 +155,13 @@ jobs:
|
||||
fail="TRUE"
|
||||
fi
|
||||
else
|
||||
echo "Check branch ${{ github.event.pull_request.head.ref }} successfully."
|
||||
echo "Validation succeed $png_line"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "${{github.workspace}}/$refer_path:$png_path does not exist"
|
||||
fail="TRUE"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
7
.github/workflows/push-image-build.yml
vendored
7
.github/workflows/push-image-build.yml
vendored
@@ -9,7 +9,6 @@ on:
|
||||
paths:
|
||||
- "**.py"
|
||||
- "**Dockerfile"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-on-push
|
||||
@@ -24,12 +23,10 @@ jobs:
|
||||
image-build:
|
||||
needs: job1
|
||||
strategy:
|
||||
matrix:
|
||||
example: ${{ fromJSON(needs.job1.outputs.run_matrix).include.*.example }}
|
||||
node: ["gaudi","xeon"]
|
||||
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_example-workflow.yml
|
||||
with:
|
||||
node: ${{ matrix.node }}
|
||||
node: ${{ matrix.hardware }}
|
||||
example: ${{ matrix.example }}
|
||||
secrets: inherit
|
||||
|
||||
@@ -18,8 +18,6 @@ repos:
|
||||
SearchQnA/ui/svelte/tsconfig.json|
|
||||
DocSum/ui/svelte/tsconfig.json
|
||||
)$
|
||||
- id: check-yaml
|
||||
args: [--allow-multiple-documents]
|
||||
- id: debug-statements
|
||||
- id: requirements-txt-fixer
|
||||
- id: trailing-whitespace
|
||||
@@ -81,7 +79,7 @@ repos:
|
||||
- id: isort
|
||||
|
||||
- repo: https://github.com/PyCQA/docformatter
|
||||
rev: v1.7.5
|
||||
rev: 06907d0
|
||||
hooks:
|
||||
- id: docformatter
|
||||
args: [
|
||||
|
||||
@@ -5,6 +5,73 @@
|
||||
This example showcases a hierarchical multi-agent system for question-answering applications. The architecture diagram is shown below. The supervisor agent interfaces with the user and dispatch tasks to the worker agent and other tools to gather information and come up with answers. The worker agent uses the retrieval tool to generate answers to the queries posted by the supervisor agent. Other tools used by the supervisor agent may include APIs to interface knowledge graphs, SQL databases, external knowledge bases, etc.
|
||||

|
||||
|
||||
The AgentQnA example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
|
||||
|
||||
```mermaid
|
||||
---
|
||||
config:
|
||||
flowchart:
|
||||
nodeSpacing: 400
|
||||
rankSpacing: 100
|
||||
curve: linear
|
||||
themeVariables:
|
||||
fontSize: 50px
|
||||
---
|
||||
flowchart LR
|
||||
%% Colors %%
|
||||
classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef invisible fill:transparent,stroke:transparent;
|
||||
|
||||
%% Subgraphs %%
|
||||
subgraph DocIndexRetriever-MegaService["DocIndexRetriever MegaService "]
|
||||
direction LR
|
||||
EM([Embedding MicroService]):::blue
|
||||
RET([Retrieval MicroService]):::blue
|
||||
RER([Rerank MicroService]):::blue
|
||||
end
|
||||
subgraph UserInput[" User Input "]
|
||||
direction LR
|
||||
a([User Input Query]):::orchid
|
||||
Ingest([Ingest data]):::orchid
|
||||
end
|
||||
AG_REACT([Agent MicroService - react]):::blue
|
||||
AG_RAG([Agent MicroService - rag]):::blue
|
||||
LLM_gen{{LLM Service <br>}}
|
||||
DP([Data Preparation MicroService]):::blue
|
||||
TEI_RER{{Reranking service<br>}}
|
||||
TEI_EM{{Embedding service <br>}}
|
||||
VDB{{Vector DB<br><br>}}
|
||||
R_RET{{Retriever service <br>}}
|
||||
|
||||
|
||||
|
||||
%% Questions interaction
|
||||
direction LR
|
||||
a[User Input Query] --> AG_REACT
|
||||
AG_REACT --> AG_RAG
|
||||
AG_RAG --> DocIndexRetriever-MegaService
|
||||
EM ==> RET
|
||||
RET ==> RER
|
||||
Ingest[Ingest data] --> DP
|
||||
|
||||
%% Embedding service flow
|
||||
direction LR
|
||||
AG_RAG <-.-> LLM_gen
|
||||
AG_REACT <-.-> LLM_gen
|
||||
EM <-.-> TEI_EM
|
||||
RET <-.-> R_RET
|
||||
RER <-.-> TEI_RER
|
||||
|
||||
direction TB
|
||||
%% Vector DB interaction
|
||||
R_RET <-.-> VDB
|
||||
DP <-.-> VDB
|
||||
|
||||
|
||||
```
|
||||
|
||||
### Why Agent for question answering?
|
||||
|
||||
1. Improve relevancy of retrieved context.
|
||||
@@ -103,4 +170,4 @@ curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: app
|
||||
|
||||
## How to register your own tools with agent
|
||||
|
||||
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain#5-customize-agent-strategy).
|
||||
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md#5-customize-agent-strategy).
|
||||
|
||||
@@ -8,7 +8,6 @@ FROM python:3.11-slim
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
vim \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
@@ -20,10 +19,9 @@ RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
|
||||
pip install --no-cache-dir langchain_core
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||
|
||||
COPY ./chatqna_no_wrapper.py /home/user/chatqna_no_wrapper.py
|
||||
COPY ./audioqna_multilang.py /home/user/audioqna_multilang.py
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
|
||||
|
||||
@@ -31,4 +29,4 @@ USER user
|
||||
|
||||
WORKDIR /home/user
|
||||
|
||||
ENTRYPOINT ["python", "chatqna_no_wrapper.py"]
|
||||
ENTRYPOINT ["python", "audioqna_multilang.py"]
|
||||
@@ -2,6 +2,63 @@
|
||||
|
||||
AudioQnA is an example that demonstrates the integration of Generative AI (GenAI) models for performing question-answering (QnA) on audio files, with the added functionality of Text-to-Speech (TTS) for generating spoken responses. The example showcases how to convert audio input to text using Automatic Speech Recognition (ASR), generate answers to user queries using a language model, and then convert those answers back to speech using Text-to-Speech (TTS).
|
||||
|
||||
The AudioQnA example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
|
||||
|
||||
```mermaid
|
||||
---
|
||||
config:
|
||||
flowchart:
|
||||
nodeSpacing: 400
|
||||
rankSpacing: 100
|
||||
curve: linear
|
||||
themeVariables:
|
||||
fontSize: 50px
|
||||
---
|
||||
flowchart LR
|
||||
%% Colors %%
|
||||
classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef invisible fill:transparent,stroke:transparent;
|
||||
style AudioQnA-MegaService stroke:#000000
|
||||
|
||||
%% Subgraphs %%
|
||||
subgraph AudioQnA-MegaService["AudioQnA MegaService "]
|
||||
direction LR
|
||||
ASR([ASR MicroService]):::blue
|
||||
LLM([LLM MicroService]):::blue
|
||||
TTS([TTS MicroService]):::blue
|
||||
end
|
||||
subgraph UserInterface[" User Interface "]
|
||||
direction LR
|
||||
a([User Input Query]):::orchid
|
||||
UI([UI server<br>]):::orchid
|
||||
end
|
||||
|
||||
|
||||
|
||||
WSP_SRV{{whisper service<br>}}
|
||||
SPC_SRV{{speecht5 service <br>}}
|
||||
LLM_gen{{LLM Service <br>}}
|
||||
GW([AudioQnA GateWay<br>]):::orange
|
||||
|
||||
|
||||
%% Questions interaction
|
||||
direction LR
|
||||
a[User Audio Query] --> UI
|
||||
UI --> GW
|
||||
GW <==> AudioQnA-MegaService
|
||||
ASR ==> LLM
|
||||
LLM ==> TTS
|
||||
|
||||
%% Embedding service flow
|
||||
direction LR
|
||||
ASR <-.-> WSP_SRV
|
||||
LLM <-.-> LLM_gen
|
||||
TTS <-.-> SPC_SRV
|
||||
|
||||
```
|
||||
|
||||
## Deploy AudioQnA Service
|
||||
|
||||
The AudioQnA service can be deployed on either Intel Gaudi2 or Intel Xeon Scalable Processor.
|
||||
|
||||
98
AudioQnA/audioqna_multilang.py
Normal file
98
AudioQnA/audioqna_multilang.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import os
|
||||
|
||||
from comps import AudioQnAGateway, MicroService, ServiceOrchestrator, ServiceType
|
||||
|
||||
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
|
||||
WHISPER_SERVER_HOST_IP = os.getenv("WHISPER_SERVER_HOST_IP", "0.0.0.0")
|
||||
WHISPER_SERVER_PORT = int(os.getenv("WHISPER_SERVER_PORT", 7066))
|
||||
GPT_SOVITS_SERVER_HOST_IP = os.getenv("GPT_SOVITS_SERVER_HOST_IP", "0.0.0.0")
|
||||
GPT_SOVITS_SERVER_PORT = int(os.getenv("GPT_SOVITS_SERVER_PORT", 9088))
|
||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888))
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
print(inputs)
|
||||
if self.services[cur_node].service_type == ServiceType.ASR:
|
||||
# {'byte_str': 'UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA'}
|
||||
inputs["audio"] = inputs["byte_str"]
|
||||
del inputs["byte_str"]
|
||||
elif self.services[cur_node].service_type == ServiceType.LLM:
|
||||
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
|
||||
next_inputs = {}
|
||||
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
|
||||
next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
|
||||
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
|
||||
next_inputs["top_p"] = llm_parameters_dict["top_p"]
|
||||
next_inputs["stream"] = inputs["streaming"] # False as default
|
||||
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
|
||||
# next_inputs["presence_penalty"] = inputs["presence_penalty"]
|
||||
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
|
||||
next_inputs["temperature"] = inputs["temperature"]
|
||||
inputs = next_inputs
|
||||
elif self.services[cur_node].service_type == ServiceType.TTS:
|
||||
next_inputs = {}
|
||||
next_inputs["text"] = inputs["choices"][0]["message"]["content"]
|
||||
next_inputs["text_language"] = kwargs["tts_text_language"] if "tts_text_language" in kwargs else "zh"
|
||||
inputs = next_inputs
|
||||
return inputs
|
||||
|
||||
|
||||
def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
if self.services[cur_node].service_type == ServiceType.TTS:
|
||||
audio_base64 = base64.b64encode(data).decode("utf-8")
|
||||
return {"byte_str": audio_base64}
|
||||
return data
|
||||
|
||||
|
||||
class AudioQnAService:
|
||||
def __init__(self, host="0.0.0.0", port=8000):
|
||||
self.host = host
|
||||
self.port = port
|
||||
ServiceOrchestrator.align_inputs = align_inputs
|
||||
ServiceOrchestrator.align_outputs = align_outputs
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
|
||||
def add_remote_service(self):
|
||||
asr = MicroService(
|
||||
name="asr",
|
||||
host=WHISPER_SERVER_HOST_IP,
|
||||
port=WHISPER_SERVER_PORT,
|
||||
# endpoint="/v1/audio/transcriptions",
|
||||
endpoint="/v1/asr",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.ASR,
|
||||
)
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVER_HOST_IP,
|
||||
port=LLM_SERVER_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
tts = MicroService(
|
||||
name="tts",
|
||||
host=GPT_SOVITS_SERVER_HOST_IP,
|
||||
port=GPT_SOVITS_SERVER_PORT,
|
||||
# endpoint="/v1/audio/speech",
|
||||
endpoint="/",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.TTS,
|
||||
)
|
||||
self.megaservice.add(asr).add(llm).add(tts)
|
||||
self.megaservice.flow_to(asr, llm)
|
||||
self.megaservice.flow_to(llm, tts)
|
||||
self.gateway = AudioQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
audioqna = AudioQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
|
||||
audioqna.add_remote_service()
|
||||
@@ -1,4 +1,4 @@
|
||||
# AudioQnA accuracy Evaluation
|
||||
# AudioQnA Accuracy
|
||||
|
||||
AudioQnA is an example that demonstrates the integration of Generative AI (GenAI) models for performing question-answering (QnA) on audio scene, which contains Automatic Speech Recognition (ASR) and Text-to-Speech (TTS). The following is the piepline for evaluating the ASR accuracy.
|
||||
|
||||
@@ -14,7 +14,7 @@ We evaluate the WER (Word Error Rate) metric of the ASR microservice.
|
||||
|
||||
### Launch ASR microservice
|
||||
|
||||
Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr).
|
||||
Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr/whisper/README.md).
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps
|
||||
|
||||
5
AudioQnA/benchmark/accuracy/run_acc.sh
Normal file
5
AudioQnA/benchmark/accuracy/run_acc.sh
Normal file
@@ -0,0 +1,5 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
python online_evaluate.py
|
||||
@@ -127,9 +127,13 @@ curl http://${host_ip}:3002/v1/audio/speech \
|
||||
|
||||
## 🚀 Test MegaService
|
||||
|
||||
Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the
|
||||
base64 string to the megaservice endpoint. The megaservice will return a spoken response as a base64 string. To listen
|
||||
to the response, decode the base64 string and save it as a .wav file.
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:3008/v1/audioqna \
|
||||
-X POST \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
|
||||
-H 'Content-Type: application/json'
|
||||
-H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
|
||||
```
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
whisper-service:
|
||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||
container_name: whisper-service
|
||||
ports:
|
||||
- "7066:7066"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
command: --language "zh"
|
||||
gpt-sovits-service:
|
||||
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
|
||||
container_name: gpt-sovits-service
|
||||
ports:
|
||||
- "9880:9880"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "3006:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
audioqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/audioqna-multilang:${TAG:-latest}
|
||||
container_name: audioqna-xeon-backend-server
|
||||
ports:
|
||||
- "3008:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
|
||||
- LLM_MODEL_ID=${LLM_MODEL_ID}
|
||||
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
|
||||
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
|
||||
- GPT_SOVITS_SERVER_HOST_IP=${GPT_SOVITS_SERVER_HOST_IP}
|
||||
- GPT_SOVITS_SERVER_PORT=${GPT_SOVITS_SERVER_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
@@ -79,6 +79,8 @@ export LLM_SERVICE_PORT=3007
|
||||
|
||||
## 🚀 Start the MegaService
|
||||
|
||||
> **_NOTE:_** Users will need at least three Gaudi cards for AudioQnA.
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/AudioQnA/docker_compose/intel/hpu/gaudi/
|
||||
docker compose up -d
|
||||
@@ -127,9 +129,13 @@ curl http://${host_ip}:3002/v1/audio/speech \
|
||||
|
||||
## 🚀 Test MegaService
|
||||
|
||||
Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the
|
||||
base64 string to the megaservice endpoint. The megaservice will return a spoken response as a base64 string. To listen
|
||||
to the response, decode the base64 string and save it as a .wav file.
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:3008/v1/audioqna \
|
||||
-X POST \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
|
||||
-H 'Content-Type: application/json'
|
||||
-H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
|
||||
```
|
||||
|
||||
@@ -53,3 +53,9 @@ services:
|
||||
dockerfile: comps/tts/speecht5/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
gpt-sovits:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/tts/gpt-sovits/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
|
||||
|
||||
@@ -4,7 +4,7 @@ This document outlines the deployment process for a AudioQnA application utilizi
|
||||
|
||||
The AudioQnA Service leverages a Kubernetes operator called genai-microservices-connector(GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere.
|
||||
|
||||
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
|
||||
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
|
||||
|
||||
|
||||
The AudioQnA application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not starts them and then proceeds to connect them. When the AudioQnA pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular `asr`, `tts`, and `llm`.
|
||||
|
||||
@@ -19,7 +19,8 @@ RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
|
||||
pip install --no-cache-dir langchain_core
|
||||
|
||||
COPY ./chatqna.py /home/user/chatqna.py
|
||||
|
||||
|
||||
@@ -19,9 +19,10 @@ RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
|
||||
pip install --no-cache-dir langchain_core
|
||||
|
||||
COPY ./chatqna_guardrails.py /home/user/chatqna_guardrails.py
|
||||
COPY ./chatqna.py /home/user/chatqna.py
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
|
||||
|
||||
@@ -31,4 +32,4 @@ WORKDIR /home/user
|
||||
|
||||
RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
|
||||
|
||||
ENTRYPOINT ["python", "chatqna_guardrails.py"]
|
||||
ENTRYPOINT ["python", "chatqna.py", "--with-guardrails"]
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
FROM python:3.11-slim
|
||||
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
vim \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
mkdir -p /home/user && \
|
||||
chown -R user /home/user/
|
||||
|
||||
WORKDIR /home/user/
|
||||
RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
|
||||
pip install --no-cache-dir langchain_core
|
||||
|
||||
COPY ./chatqna_no_wrapper.py /home/user/chatqna_no_wrapper.py
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
|
||||
|
||||
USER user
|
||||
|
||||
WORKDIR /home/user
|
||||
|
||||
ENTRYPOINT ["python", "chatqna_no_wrapper.py", "--without-rerank"]
|
||||
@@ -6,9 +6,9 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
git \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
git
|
||||
libjemalloc-dev
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
mkdir -p /home/user && \
|
||||
@@ -19,9 +19,10 @@ RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
|
||||
pip install --no-cache-dir langchain_core
|
||||
|
||||
COPY ./chatqna_without_rerank.py /home/user/chatqna_without_rerank.py
|
||||
COPY ./chatqna.py /home/user/chatqna.py
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
|
||||
|
||||
@@ -31,4 +32,4 @@ WORKDIR /home/user
|
||||
|
||||
RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
|
||||
|
||||
ENTRYPOINT ["python", "chatqna_without_rerank.py"]
|
||||
ENTRYPOINT ["python", "chatqna.py", "--without-rerank"]
|
||||
|
||||
@@ -16,6 +16,8 @@ Quick Start Deployment Steps:
|
||||
2. Run Docker Compose.
|
||||
3. Consume the ChatQnA Service.
|
||||
|
||||
Note: If you do not have docker installed you can run this script to install docker : `bash docker_compose/install_docker.sh`
|
||||
|
||||
### Quick Start: 1.Setup Environment Variable
|
||||
|
||||
To set up environment variables for deploying ChatQnA services, follow these steps:
|
||||
@@ -240,7 +242,7 @@ Refer to the [Kubernetes Guide](./kubernetes/intel/README.md) for instructions o
|
||||
|
||||
Install Helm (version >= 3.15) first. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
|
||||
Refer to the [ChatQnA helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi.
|
||||
Refer to the [ChatQnA helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna/README.md) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi.
|
||||
|
||||
### Deploy ChatQnA on AI PC
|
||||
|
||||
@@ -306,7 +308,7 @@ Two ways of consuming ChatQnA Service:
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker_compose/intel/cpu/xeon#validate-microservices) first. A simple example:
|
||||
1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker_compose/intel/cpu/xeon/README.md#validate-microservices) first. A simple example:
|
||||
|
||||
```bash
|
||||
http_proxy="" curl ${host_ip}:6006/embed -X POST -d '{"inputs":"What is Deep Learning?"}' -H 'Content-Type: application/json'
|
||||
|
||||
170
ChatQnA/benchmark/accuracy/README.md
Normal file
170
ChatQnA/benchmark/accuracy/README.md
Normal file
@@ -0,0 +1,170 @@
|
||||
# ChatQnA Accuracy
|
||||
|
||||
ChatQnA is a Retrieval-Augmented Generation (RAG) pipeline, which can enhance generative models through external information retrieval.
|
||||
|
||||
For evaluating the accuracy, we use 2 latest published datasets and 10+ metrics which are popular and comprehensive:
|
||||
|
||||
- Dataset
|
||||
- [MultiHop](https://arxiv.org/pdf/2401.15391) (English dataset)
|
||||
- [CRUD](https://arxiv.org/abs/2401.17043) (Chinese dataset)
|
||||
- metrics (measure accuracy of both the context retrieval and response generation)
|
||||
- evaluation for retrieval/reranking
|
||||
- MRR@10
|
||||
- MAP@10
|
||||
- Hits@10
|
||||
- Hits@4
|
||||
- LLM-as-a-Judge
|
||||
- evaluation for the generated response from the end-to-end pipeline
|
||||
- BLEU
|
||||
- ROGUE(L)
|
||||
- LLM-as-a-Judge
|
||||
|
||||
## Prerequisite
|
||||
|
||||
### Environment
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIEval
|
||||
cd GenAIEval
|
||||
pip install -r requirements.txt
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
## MultiHop (English dataset)
|
||||
|
||||
[MultiHop-RAG](https://arxiv.org/pdf/2401.15391): a QA dataset to evaluate retrieval and reasoning across documents with metadata in the RAG pipelines. It contains 2556 queries, with evidence for each query distributed across 2 to 4 documents. The queries also involve document metadata, reflecting complex scenarios commonly found in real-world RAG applications.
|
||||
|
||||
### Launch Service of RAG System
|
||||
|
||||
Please refer to this [guide](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/README.md) to launch the service of `ChatQnA`.
|
||||
|
||||
### Launch Service of LLM-as-a-Judge
|
||||
|
||||
To setup a LLM model, we can use [tgi-gaudi](https://github.com/huggingface/tgi-gaudi) to launch a service. For example, the follow command is to setup the [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) model on 2 Gaudi2 cards:
|
||||
|
||||
```
|
||||
# please set your llm_port and hf_token
|
||||
|
||||
docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2
|
||||
|
||||
# for better performance, set `PREFILL_BATCH_BUCKET_SIZE`, `BATCH_BUCKET_SIZE`, `max-batch-total-tokens`, `max-batch-prefill-tokens`
|
||||
docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} -e PREFILL_BATCH_BUCKET_SIZE=1 -e BATCH_BUCKET_SIZE=8 --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 2048
|
||||
```
|
||||
|
||||
### Prepare Dataset
|
||||
|
||||
We use the evaluation dataset from [MultiHop-RAG](https://github.com/yixuantt/MultiHop-RAG) repo, use the below command to prepare the dataset.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yixuantt/MultiHop-RAG.git
|
||||
```
|
||||
|
||||
### Evaluation
|
||||
|
||||
Use below command to run the evaluation, please note that for the first run, argument `--ingest_docs` should be added in the command to ingest the documents into the vector database, while for the subsequent run, this argument should be omitted. Set `--retrieval_metrics` to get retrieval related metrics (MRR@10/MAP@10/Hits@10/Hits@4). Set `--ragas_metrics` and `--llm_endpoint` to get end-to-end rag pipeline metrics (faithfulness/answer_relevancy/...), which are judged by LLMs. We set `--limits` is 100 as default, which means only 100 examples are evaluated by llm-as-judge as it is very time consuming.
|
||||
|
||||
If you are using docker compose to deploy `ChatQnA` system, you can simply run the evaluation as following:
|
||||
|
||||
```bash
|
||||
python eval_multihop.py --docs_path MultiHop-RAG/dataset/corpus.json --dataset_path MultiHop-RAG/dataset/MultiHopRAG.json --ingest_docs --retrieval_metrics --ragas_metrics --llm_endpoint http://{llm_as_judge_ip}:{llm_as_judge_port}/generate
|
||||
```
|
||||
|
||||
If you are using Kubernetes manifest/helm to deploy `ChatQnA` system, you must specify more arguments as following:
|
||||
|
||||
```bash
|
||||
python eval_multihop.py --docs_path MultiHop-RAG/dataset/corpus.json --dataset_path MultiHop-RAG/dataset/MultiHopRAG.json --ingest_docs --retrieval_metrics --ragas_metrics --llm_endpoint http://{llm_as_judge_ip}:{llm_as_judge_port}/generate --database_endpoint http://{your_dataprep_ip}:{your_dataprep_port}/v1/dataprep --embedding_endpoint http://{your_embedding_ip}:{your_embedding_port}/v1/embeddings --tei_embedding_endpoint http://{your_tei_embedding_ip}:{your_tei_embedding_port} --retrieval_endpoint http://{your_retrieval_ip}:{your_retrieval_port}/v1/retrieval --service_url http://{your_chatqna_ip}:{your_chatqna_port}/v1/chatqna
|
||||
```
|
||||
|
||||
The default values for arguments are:
|
||||
|Argument|Default value|
|
||||
|--------|-------------|
|
||||
|service_url|http://localhost:8888/v1/chatqna|
|
||||
|database_endpoint|http://localhost:6007/v1/dataprep|
|
||||
|embedding_endpoint|http://localhost:6000/v1/embeddings|
|
||||
|tei_embedding_endpoint|http://localhost:8090|
|
||||
|retrieval_endpoint|http://localhost:7000/v1/retrieval|
|
||||
|reranking_endpoint|http://localhost:8000/v1/reranking|
|
||||
|output_dir|./output|
|
||||
|temperature|0.1|
|
||||
|max_new_tokens|1280|
|
||||
|chunk_size|256|
|
||||
|chunk_overlap|100|
|
||||
|search_type|similarity|
|
||||
|retrival_k|10|
|
||||
|fetch_k|20|
|
||||
|lambda_mult|0.5|
|
||||
|dataset_path|None|
|
||||
|docs_path|None|
|
||||
|limits|100|
|
||||
|
||||
You can check arguments details use below command:
|
||||
|
||||
```bash
|
||||
python eval_multihop.py --help
|
||||
```
|
||||
|
||||
## CRUD (Chinese dataset)
|
||||
|
||||
[CRUD-RAG](https://arxiv.org/abs/2401.17043) is a Chinese benchmark for RAG (Retrieval-Augmented Generation) system. This example utilize CRUD-RAG for evaluating the RAG system.
|
||||
|
||||
### Prepare Dataset
|
||||
|
||||
We use the evaluation dataset from [CRUD-RAG](https://github.com/IAAR-Shanghai/CRUD_RAG) repo, use the below command to prepare the dataset.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/IAAR-Shanghai/CRUD_RAG
|
||||
mkdir data/
|
||||
cp CRUD_RAG/data/crud_split/split_merged.json data/
|
||||
cp -r CRUD_RAG/data/80000_docs/ data/
|
||||
python process_crud_dataset.py
|
||||
```
|
||||
|
||||
### Launch Service of RAG System
|
||||
|
||||
Please refer to this [guide](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/README.md) to launch the service of `ChatQnA` system. For Chinese dataset, you should replace the English emebdding and llm model with Chinese, for example, `EMBEDDING_MODEL_ID="BAAI/bge-base-zh-v1.5"` and `LLM_MODEL_ID=Qwen/Qwen2-7B-Instruct`.
|
||||
|
||||
### Evaluation
|
||||
|
||||
Use below command to run the evaluation, please note that for the first run, argument `--ingest_docs` should be added in the command to ingest the documents into the vector database, while for the subsequent run, this argument should be omitted.
|
||||
|
||||
If you are using docker compose to deploy `ChatQnA` system, you can simply run the evaluation as following:
|
||||
|
||||
```bash
|
||||
python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/80000_docs --ingest_docs
|
||||
|
||||
# if you want to get ragas metrics
|
||||
python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/80000_docs --contain_original_data --llm_endpoint "http://{llm_as_judge_ip}:{llm_as_judge_port}" --ragas_metrics
|
||||
```
|
||||
|
||||
If you are using Kubernetes manifest/helm to deploy `ChatQnA` system, you must specify more arguments as following:
|
||||
|
||||
```bash
|
||||
python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/80000_docs --ingest_docs --database_endpoint http://{your_dataprep_ip}:{your_dataprep_port}/v1/dataprep --embedding_endpoint http://{your_embedding_ip}:{your_embedding_port}/v1/embeddings --retrieval_endpoint http://{your_retrieval_ip}:{your_retrieval_port}/v1/retrieval --service_url http://{your_chatqna_ip}:{your_chatqna_port}/v1/chatqna
|
||||
```
|
||||
|
||||
The default values for arguments are:
|
||||
|Argument|Default value|
|
||||
|--------|-------------|
|
||||
|service_url|http://localhost:8888/v1/chatqna|
|
||||
|database_endpoint|http://localhost:6007/v1/dataprep|
|
||||
|embedding_endpoint|http://localhost:6000/v1/embeddings|
|
||||
|retrieval_endpoint|http://localhost:7000/v1/retrieval|
|
||||
|reranking_endpoint|http://localhost:8000/v1/reranking|
|
||||
|output_dir|./output|
|
||||
|temperature|0.1|
|
||||
|max_new_tokens|1280|
|
||||
|chunk_size|256|
|
||||
|chunk_overlap|100|
|
||||
|dataset_path|./data/split_merged.json|
|
||||
|docs_path|./data/80000_docs|
|
||||
|tasks|["question_answering"]|
|
||||
|
||||
You can check arguments details use below command:
|
||||
|
||||
```bash
|
||||
python eval_crud.py --help
|
||||
```
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
This example is mostly adapted from [MultiHop-RAG](https://github.com/yixuantt/MultiHop-RAG) and [CRUD-RAG](https://github.com/IAAR-Shanghai/CRUD_RAG) repo, we thank the authors for their great work!
|
||||
210
ChatQnA/benchmark/accuracy/eval_crud.py
Normal file
210
ChatQnA/benchmark/accuracy/eval_crud.py
Normal file
@@ -0,0 +1,210 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
|
||||
from evals.evaluation.rag_eval import Evaluator
|
||||
from evals.evaluation.rag_eval.template import CRUDTemplate
|
||||
from evals.metrics.ragas import RagasMetric
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class CRUD_Evaluator(Evaluator):
|
||||
def get_ground_truth_text(self, data: dict):
|
||||
if self.task == "summarization":
|
||||
ground_truth_text = data["summary"]
|
||||
elif self.task == "question_answering":
|
||||
ground_truth_text = data["answers"]
|
||||
elif self.task == "continuation":
|
||||
ground_truth_text = data["continuing"]
|
||||
elif self.task == "hallucinated_modified":
|
||||
ground_truth_text = data["hallucinatedMod"]
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Unknown task {self.task}, only support "
|
||||
"summarization, question_answering, continuation and hallucinated_modified."
|
||||
)
|
||||
return ground_truth_text
|
||||
|
||||
def get_query(self, data: dict):
|
||||
if self.task == "summarization":
|
||||
query = data["text"]
|
||||
elif self.task == "question_answering":
|
||||
query = data["questions"]
|
||||
elif self.task == "continuation":
|
||||
query = data["beginning"]
|
||||
elif self.task == "hallucinated_modified":
|
||||
query = data["newsBeginning"]
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Unknown task {self.task}, only support "
|
||||
"summarization, question_answering, continuation and hallucinated_modified."
|
||||
)
|
||||
return query
|
||||
|
||||
def get_document(self, data: dict):
|
||||
if self.task == "summarization":
|
||||
document = data["text"]
|
||||
elif self.task == "question_answering":
|
||||
document = data["news1"]
|
||||
elif self.task == "continuation":
|
||||
document = data["beginning"]
|
||||
elif self.task == "hallucinated_modified":
|
||||
document = data["newsBeginning"]
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Unknown task {self.task}, only support "
|
||||
"summarization, question_answering, continuation and hallucinated_modified."
|
||||
)
|
||||
return document
|
||||
|
||||
def get_template(self):
|
||||
if self.task == "summarization":
|
||||
template = CRUDTemplate.get_summarization_template()
|
||||
elif self.task == "question_answering":
|
||||
template = CRUDTemplate.get_question_answering_template()
|
||||
elif self.task == "continuation":
|
||||
template = CRUDTemplate.get_continuation_template()
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Unknown task {self.task}, only support "
|
||||
"summarization, question_answering, continuation and hallucinated_modified."
|
||||
)
|
||||
return template
|
||||
|
||||
def post_process(self, result):
|
||||
return result.split("<response>")[-1].split("</response>")[0].strip()
|
||||
|
||||
def get_ragas_metrics(self, results, arguments):
|
||||
from langchain_huggingface import HuggingFaceEndpointEmbeddings
|
||||
|
||||
embeddings = HuggingFaceEndpointEmbeddings(model=arguments.tei_embedding_endpoint)
|
||||
|
||||
metric = RagasMetric(
|
||||
threshold=0.5,
|
||||
model=arguments.llm_endpoint,
|
||||
embeddings=embeddings,
|
||||
metrics=["faithfulness", "answer_relevancy"],
|
||||
)
|
||||
|
||||
all_answer_relevancy = 0
|
||||
all_faithfulness = 0
|
||||
ragas_inputs = {
|
||||
"question": [],
|
||||
"answer": [],
|
||||
"ground_truth": [],
|
||||
"contexts": [],
|
||||
}
|
||||
|
||||
valid_results = self.remove_invalid(results["results"])
|
||||
|
||||
for data in tqdm(valid_results):
|
||||
data = data["original_data"]
|
||||
|
||||
query = self.get_query(data)
|
||||
generated_text = data["generated_text"]
|
||||
ground_truth = data["ground_truth_text"]
|
||||
retrieved_documents = data["retrieved_documents"]
|
||||
|
||||
ragas_inputs["question"].append(query)
|
||||
ragas_inputs["answer"].append(generated_text)
|
||||
ragas_inputs["ground_truth"].append(ground_truth)
|
||||
ragas_inputs["contexts"].append(retrieved_documents[:3])
|
||||
|
||||
ragas_metrics = metric.measure(ragas_inputs)
|
||||
return ragas_metrics
|
||||
|
||||
|
||||
def args_parser():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument(
|
||||
"--service_url", type=str, default="http://localhost:8888/v1/chatqna", help="Service URL address."
|
||||
)
|
||||
parser.add_argument("--output_dir", type=str, default="./output", help="Directory to save evaluation results.")
|
||||
parser.add_argument(
|
||||
"--temperature", type=float, default=0.1, help="Controls the randomness of the model's text generation"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max_new_tokens", type=int, default=1280, help="Maximum number of new tokens to be generated by the model"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--chunk_size", type=int, default=256, help="the maximum number of characters that a chunk can contain"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--chunk_overlap",
|
||||
type=int,
|
||||
default=100,
|
||||
help="the number of characters that should overlap between two adjacent chunks",
|
||||
)
|
||||
parser.add_argument("--dataset_path", default="../data/split_merged.json", help="Path to the dataset")
|
||||
parser.add_argument("--docs_path", default="../data/80000_docs", help="Path to the retrieval documents")
|
||||
|
||||
# Retriever related options
|
||||
parser.add_argument("--tasks", default=["question_answering"], nargs="+", help="Task to perform")
|
||||
parser.add_argument("--ingest_docs", action="store_true", help="Whether to ingest documents to vector database")
|
||||
parser.add_argument(
|
||||
"--database_endpoint", type=str, default="http://localhost:6007/v1/dataprep", help="Service URL address."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--embedding_endpoint", type=str, default="http://localhost:6000/v1/embeddings", help="Service URL address."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--retrieval_endpoint", type=str, default="http://localhost:7000/v1/retrieval", help="Service URL address."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tei_embedding_endpoint",
|
||||
type=str,
|
||||
default="http://localhost:8090",
|
||||
help="Service URL address of tei embedding.",
|
||||
)
|
||||
parser.add_argument("--ragas_metrics", action="store_true", help="Whether to compute ragas metrics.")
|
||||
parser.add_argument("--llm_endpoint", type=str, default=None, help="Service URL address.")
|
||||
parser.add_argument(
|
||||
"--show_progress_bar", action="store", default=True, type=bool, help="Whether to show a progress bar"
|
||||
)
|
||||
parser.add_argument("--contain_original_data", action="store_true", help="Whether to contain original data")
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = args_parser()
|
||||
if os.path.isfile(args.dataset_path):
|
||||
with open(args.dataset_path) as f:
|
||||
all_datasets = json.load(f)
|
||||
else:
|
||||
raise FileNotFoundError(f"Evaluation dataset file {args.dataset_path} not exist.")
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
for task in args.tasks:
|
||||
if task == "question_answering":
|
||||
dataset = all_datasets["questanswer_1doc"]
|
||||
elif task == "summarization":
|
||||
dataset = all_datasets["event_summary"]
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Unknown task {task}, only support "
|
||||
"summarization, question_answering, continuation and hallucinated_modified."
|
||||
)
|
||||
output_save_path = os.path.join(args.output_dir, f"{task}.json")
|
||||
evaluator = CRUD_Evaluator(dataset=dataset, output_path=output_save_path, task=task)
|
||||
if args.ingest_docs:
|
||||
CRUD_Evaluator.ingest_docs(args.docs_path, args.database_endpoint, args.chunk_size, args.chunk_overlap)
|
||||
results = evaluator.evaluate(
|
||||
args, show_progress_bar=args.show_progress_bar, contain_original_data=args.contain_original_data
|
||||
)
|
||||
print(results["overall"])
|
||||
if args.ragas_metrics:
|
||||
ragas_metrics = evaluator.get_ragas_metrics(results, args)
|
||||
print(ragas_metrics)
|
||||
print(f"Evaluation results of task {task} saved to {output_save_path}.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
279
ChatQnA/benchmark/accuracy/eval_multihop.py
Normal file
279
ChatQnA/benchmark/accuracy/eval_multihop.py
Normal file
@@ -0,0 +1,279 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
|
||||
import requests
|
||||
from evals.evaluation.rag_eval import Evaluator
|
||||
from evals.metrics.ragas import RagasMetric
|
||||
from evals.metrics.retrieval import RetrievalBaseMetric
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class MultiHop_Evaluator(Evaluator):
|
||||
def get_ground_truth_text(self, data: dict):
|
||||
return data["answer"]
|
||||
|
||||
def get_query(self, data: dict):
|
||||
return data["query"]
|
||||
|
||||
def get_template(self):
|
||||
return None
|
||||
|
||||
def get_reranked_documents(self, query, docs, arguments):
|
||||
data = {
|
||||
"initial_query": query,
|
||||
"retrieved_docs": [{"text": doc} for doc in docs],
|
||||
"top_n": 10,
|
||||
}
|
||||
headers = {"Content-Type": "application/json"}
|
||||
|
||||
response = requests.post(arguments.reranking_endpoint, data=json.dumps(data), headers=headers)
|
||||
if response.ok:
|
||||
reranked_documents = response.json()["documents"]
|
||||
return reranked_documents
|
||||
else:
|
||||
print(f"Request for retrieval failed due to {response.text}.")
|
||||
return []
|
||||
|
||||
def get_retrieved_documents(self, query, arguments):
|
||||
data = {"text": query}
|
||||
headers = {"Content-Type": "application/json"}
|
||||
response = requests.post(arguments.embedding_endpoint, data=json.dumps(data), headers=headers)
|
||||
if response.ok:
|
||||
embedding = response.json()["embedding"]
|
||||
else:
|
||||
print(f"Request for embedding failed due to {response.text}.")
|
||||
return []
|
||||
data = {
|
||||
"text": query,
|
||||
"embedding": embedding,
|
||||
"search_type": arguments.search_type,
|
||||
"k": arguments.retrival_k,
|
||||
"fetch_k": arguments.fetch_k,
|
||||
"lambda_mult": arguments.lambda_mult,
|
||||
}
|
||||
response = requests.post(arguments.retrieval_endpoint, data=json.dumps(data), headers=headers)
|
||||
if response.ok:
|
||||
retrieved_documents = response.json()["retrieved_docs"]
|
||||
return [doc["text"] for doc in retrieved_documents]
|
||||
else:
|
||||
print(f"Request for retrieval failed due to {response.text}.")
|
||||
return []
|
||||
|
||||
def get_retrieval_metrics(self, all_queries, arguments):
|
||||
print("start to retrieve...")
|
||||
metric = RetrievalBaseMetric()
|
||||
hits_at_10 = 0
|
||||
hits_at_4 = 0
|
||||
map_at_10 = 0
|
||||
mrr_at_10 = 0
|
||||
total = 0
|
||||
for data in tqdm(all_queries):
|
||||
if data["question_type"] == "null_query":
|
||||
continue
|
||||
query = data["query"]
|
||||
retrieved_documents = self.get_retrieved_documents(query, arguments)
|
||||
if arguments.rerank:
|
||||
retrieved_documents = self.get_reranked_documents(query, retrieved_documents, arguments)
|
||||
golden_context = [each["fact"] for each in data["evidence_list"]]
|
||||
test_case = {
|
||||
"input": query,
|
||||
"golden_context": golden_context,
|
||||
"retrieval_context": retrieved_documents,
|
||||
}
|
||||
results = metric.measure(test_case)
|
||||
hits_at_10 += results["Hits@10"]
|
||||
hits_at_4 += results["Hits@4"]
|
||||
map_at_10 += results["MAP@10"]
|
||||
mrr_at_10 += results["MRR@10"]
|
||||
total += 1
|
||||
|
||||
# Calculate average metrics over all queries
|
||||
hits_at_10 = hits_at_10 / total
|
||||
hits_at_4 = hits_at_4 / total
|
||||
map_at_10 = map_at_10 / total
|
||||
mrr_at_10 = mrr_at_10 / total
|
||||
|
||||
return {
|
||||
"Hits@10": hits_at_10,
|
||||
"Hits@4": hits_at_4,
|
||||
"MAP@10": map_at_10,
|
||||
"MRR@10": mrr_at_10,
|
||||
}
|
||||
|
||||
def evaluate(self, all_queries, arguments):
|
||||
results = []
|
||||
accuracy = 0
|
||||
index = 0
|
||||
for data in tqdm(all_queries):
|
||||
if data["question_type"] == "null_query":
|
||||
continue
|
||||
|
||||
generated_text = self.send_request(data, arguments)
|
||||
data["generated_text"] = generated_text
|
||||
|
||||
# same method with paper: https://github.com/yixuantt/MultiHop-RAG/issues/8
|
||||
if data["answer"] in generated_text:
|
||||
accuracy += 1
|
||||
result = {"id": index, **self.scoring(data)}
|
||||
results.append(result)
|
||||
index += 1
|
||||
|
||||
valid_results = self.remove_invalid(results)
|
||||
|
||||
try:
|
||||
overall = self.compute_overall(valid_results) if len(valid_results) > 0 else {}
|
||||
except Exception as e:
|
||||
print(repr(e))
|
||||
overall = dict()
|
||||
|
||||
overall.update({"accuracy": accuracy / len(results)})
|
||||
return overall
|
||||
|
||||
def get_ragas_metrics(self, all_queries, arguments):
|
||||
from langchain_huggingface import HuggingFaceEndpointEmbeddings
|
||||
|
||||
embeddings = HuggingFaceEndpointEmbeddings(model=arguments.tei_embedding_endpoint)
|
||||
|
||||
metric = RagasMetric(threshold=0.5, model=arguments.llm_endpoint, embeddings=embeddings)
|
||||
all_answer_relevancy = 0
|
||||
all_faithfulness = 0
|
||||
ragas_inputs = {
|
||||
"question": [],
|
||||
"answer": [],
|
||||
"ground_truth": [],
|
||||
"contexts": [],
|
||||
}
|
||||
|
||||
for data in tqdm(all_queries):
|
||||
if data["question_type"] == "null_query":
|
||||
continue
|
||||
retrieved_documents = self.get_retrieved_documents(data["query"], arguments)
|
||||
generated_text = self.send_request(data, arguments)
|
||||
data["generated_text"] = generated_text
|
||||
|
||||
ragas_inputs["question"].append(data["query"])
|
||||
ragas_inputs["answer"].append(generated_text)
|
||||
ragas_inputs["ground_truth"].append(data["answer"])
|
||||
ragas_inputs["contexts"].append(retrieved_documents[:3])
|
||||
|
||||
if len(ragas_inputs["question"]) >= arguments.limits:
|
||||
break
|
||||
|
||||
ragas_metrics = metric.measure(ragas_inputs)
|
||||
return ragas_metrics
|
||||
|
||||
|
||||
def args_parser():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument(
|
||||
"--service_url", type=str, default="http://localhost:8888/v1/chatqna", help="Service URL address."
|
||||
)
|
||||
parser.add_argument("--output_dir", type=str, default="./output", help="Directory to save evaluation results.")
|
||||
parser.add_argument(
|
||||
"--temperature", type=float, default=0.1, help="Controls the randomness of the model's text generation"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max_new_tokens", type=int, default=1280, help="Maximum number of new tokens to be generated by the model"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--chunk_size", type=int, default=256, help="the maximum number of characters that a chunk can contain"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--chunk_overlap",
|
||||
type=int,
|
||||
default=100,
|
||||
help="the number of characters that should overlap between two adjacent chunks",
|
||||
)
|
||||
parser.add_argument("--search_type", type=str, default="similarity", help="similarity type")
|
||||
parser.add_argument("--retrival_k", type=int, default=10, help="Number of Documents to return.")
|
||||
parser.add_argument(
|
||||
"--fetch_k", type=int, default=20, help="Number of Documents to fetch to pass to MMR algorithm."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lambda_mult",
|
||||
type=float,
|
||||
default=0.5,
|
||||
help="Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.",
|
||||
)
|
||||
parser.add_argument("--dataset_path", default=None, help="Path to the dataset")
|
||||
parser.add_argument("--docs_path", default=None, help="Path to the retrieval documents")
|
||||
|
||||
# Retriever related options
|
||||
parser.add_argument("--ingest_docs", action="store_true", help="Whether to ingest documents to vector database")
|
||||
parser.add_argument("--retrieval_metrics", action="store_true", help="Whether to compute retrieval metrics.")
|
||||
parser.add_argument("--ragas_metrics", action="store_true", help="Whether to compute ragas metrics.")
|
||||
parser.add_argument("--limits", type=int, default=100, help="Number of examples to be evaluated by llm-as-judge")
|
||||
parser.add_argument(
|
||||
"--database_endpoint", type=str, default="http://localhost:6007/v1/dataprep", help="Service URL address."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--embedding_endpoint", type=str, default="http://localhost:6000/v1/embeddings", help="Service URL address."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tei_embedding_endpoint",
|
||||
type=str,
|
||||
default="http://localhost:8090",
|
||||
help="Service URL address of tei embedding.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--retrieval_endpoint", type=str, default="http://localhost:7000/v1/retrieval", help="Service URL address."
|
||||
)
|
||||
parser.add_argument("--rerank", action="store_true", help="Whether to use rerank microservice.")
|
||||
parser.add_argument(
|
||||
"--reranking_endpoint", type=str, default="http://localhost:8000/v1/reranking", help="Service URL address."
|
||||
)
|
||||
parser.add_argument("--llm_endpoint", type=str, default=None, help="Service URL address.")
|
||||
parser.add_argument(
|
||||
"--show_progress_bar", action="store", default=True, type=bool, help="Whether to show a progress bar"
|
||||
)
|
||||
parser.add_argument("--contain_original_data", action="store_true", help="Whether to contain original data")
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = args_parser()
|
||||
|
||||
evaluator = MultiHop_Evaluator()
|
||||
|
||||
with open(args.docs_path, "r") as file:
|
||||
doc_data = json.load(file)
|
||||
|
||||
documents = []
|
||||
for doc in doc_data:
|
||||
metadata = {"title": doc["title"], "published_at": doc["published_at"], "source": doc["source"]}
|
||||
documents.append(doc["body"])
|
||||
|
||||
# save docs to a tmp file
|
||||
tmp_corpus_file = "tmp_corpus.txt"
|
||||
with open(tmp_corpus_file, "w") as f:
|
||||
for doc in documents:
|
||||
f.write(doc + "\n")
|
||||
|
||||
if args.ingest_docs:
|
||||
evaluator.ingest_docs(tmp_corpus_file, args.database_endpoint, args.chunk_size, args.chunk_overlap)
|
||||
|
||||
with open(args.dataset_path, "r") as file:
|
||||
all_queries = json.load(file)
|
||||
|
||||
# get retrieval quality
|
||||
if args.retrieval_metrics:
|
||||
retrieval_metrics = evaluator.get_retrieval_metrics(all_queries, args)
|
||||
print(retrieval_metrics)
|
||||
|
||||
# get rag quality
|
||||
if args.ragas_metrics:
|
||||
ragas_metrics = evaluator.get_ragas_metrics(all_queries, args)
|
||||
print(ragas_metrics)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
9
ChatQnA/benchmark/accuracy/process_crud_dataset.py
Normal file
9
ChatQnA/benchmark/accuracy/process_crud_dataset.py
Normal file
@@ -0,0 +1,9 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
|
||||
path = os.path.join(os.path.dirname(__file__), "./data/80000_docs")
|
||||
for file in os.listdir(path):
|
||||
src_file = os.path.join(path, file)
|
||||
os.rename(src_file, src_file + ".txt")
|
||||
64
ChatQnA/benchmark/accuracy/run_acc.sh
Normal file
64
ChatQnA/benchmark/accuracy/run_acc.sh
Normal file
@@ -0,0 +1,64 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -x
|
||||
|
||||
function main {
|
||||
|
||||
init_params "$@"
|
||||
# run_benchmark
|
||||
echo $dataset
|
||||
if [[ ${dataset} == "MultiHop" ]]; then
|
||||
run_multihop
|
||||
elif [[ ${dataset} == "crud" ]]; then
|
||||
run_crud
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
# init params
|
||||
function init_params {
|
||||
for var in "$@"
|
||||
do
|
||||
case $var in
|
||||
--dataset=*)
|
||||
dataset=$( echo $var |cut -f2 -d=)
|
||||
;;
|
||||
*)
|
||||
echo "Error: No such parameter: ${var}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
# run_multihop
|
||||
function run_multihop {
|
||||
git clone https://github.com/yixuantt/MultiHop-RAG.git
|
||||
|
||||
python eval_multihop.py \
|
||||
--docs_path MultiHop-RAG/dataset/corpus.json \
|
||||
--dataset_path MultiHop-RAG/dataset/MultiHopRAG.json \
|
||||
--ingest_docs \
|
||||
--retrieval_metrics
|
||||
|
||||
}
|
||||
|
||||
# run_crud
|
||||
function run_crud {
|
||||
|
||||
git clone https://github.com/IAAR-Shanghai/CRUD_RAG
|
||||
mkdir data/
|
||||
cp CRUD_RAG/data/crud_split/split_merged.json data/
|
||||
cp -r CRUD_RAG/data/80000_docs/ data/
|
||||
python process_crud_dataset.py
|
||||
|
||||
python eval_crud.py \
|
||||
--dataset_path ./data/split_merged.json \
|
||||
--docs_path ./data/80000_docs \
|
||||
--ingest_docs
|
||||
}
|
||||
|
||||
|
||||
main "$@"
|
||||
@@ -1,653 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 31
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '2048'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
selector:
|
||||
app: llm-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
selector:
|
||||
app: reranking-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -1,653 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 7
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '2048'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
selector:
|
||||
app: llm-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
selector:
|
||||
app: reranking-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -1,653 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 15
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '2048'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
selector:
|
||||
app: llm-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
selector:
|
||||
app: reranking-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -1,742 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
INDEX_NAME: rag-redis
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
nodePort: 30888
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
- containerPort: 6008
|
||||
- containerPort: 6009
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
- name: port2
|
||||
port: 6008
|
||||
targetPort: 6008
|
||||
- name: port3
|
||||
port: 6009
|
||||
targetPort: 6009
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
name: embedding-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 32
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '2048'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
name: reranking-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: reranking-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: reranking-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_EMBEDDING_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: HUGGINGFACEHUB_API_TOKEN
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
containers:
|
||||
- name: vector-db
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: vector-db
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
|
||||
|
||||
---
|
||||
@@ -1,591 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
INDEX_NAME: rag-redis
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
nodePort: 30888
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
- containerPort: 6008
|
||||
- containerPort: 6009
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
- name: port2
|
||||
port: 6008
|
||||
targetPort: 6008
|
||||
- name: port3
|
||||
port: 6009
|
||||
targetPort: 6009
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
name: embedding-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '2048'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_EMBEDDING_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: HUGGINGFACEHUB_API_TOKEN
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
containers:
|
||||
- name: vector-db
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: vector-db
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
|
||||
|
||||
---
|
||||
@@ -1,591 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
INDEX_NAME: rag-redis
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
nodePort: 30888
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
- containerPort: 6008
|
||||
- containerPort: 6009
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
- name: port2
|
||||
port: 6008
|
||||
targetPort: 6008
|
||||
- name: port3
|
||||
port: 6009
|
||||
targetPort: 6009
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
name: embedding-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 16
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '2048'
|
||||
- --max-total-tokens
|
||||
- '4096'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_EMBEDDING_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: HUGGINGFACEHUB_API_TOKEN
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
containers:
|
||||
- name: vector-db
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: vector-db
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
|
||||
|
||||
---
|
||||
@@ -88,22 +88,9 @@ find . -name '*.yaml' -type f -exec sed -i "s#\$(EMBEDDING_MODEL_ID)#${EMBEDDING
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL_ID}#g" {} \;
|
||||
```
|
||||
|
||||
### Benchmark tool preparation
|
||||
|
||||
The test uses the [benchmark tool](https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark) to do performance test. We need to set up benchmark tool at the master node of Kubernetes which is k8s-master.
|
||||
|
||||
```bash
|
||||
# on k8s-master node
|
||||
git clone https://github.com/opea-project/GenAIEval.git
|
||||
cd GenAIEval
|
||||
python3 -m venv stress_venv
|
||||
source stress_venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### Test Configurations
|
||||
|
||||
Workload configuration:
|
||||
By default, the workload and benchmark configuration is as below:
|
||||
|
||||
| Key | Value |
|
||||
| -------- | ------- |
|
||||
@@ -189,24 +176,21 @@ curl -X POST "http://${cluster_ip}:6007/v1/dataprep" \
|
||||
|
||||
###### 3.2 Run Benchmark Test
|
||||
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
Before the benchmark, we can configure the number of test queries and test output directory by:
|
||||
|
||||
```bash
|
||||
export DEPLOYMENT_TYPE="k8s"
|
||||
export SERVICE_IP = None
|
||||
export SERVICE_PORT = None
|
||||
export USER_QUERIES="[640, 640, 640, 640]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_1"
|
||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||
```
|
||||
|
||||
And then run the benchmark tool by:
|
||||
And then run the benchmark by:
|
||||
|
||||
```bash
|
||||
cd GenAIEval/evals/benchmark
|
||||
python benchmark.py
|
||||
bash benchmark.sh -n 1
|
||||
```
|
||||
|
||||
The argument `-n` refers to the number of test nodes. Note that necessary dependencies will be automatically installed when running benchmark for the first time.
|
||||
|
||||
##### 4. Data collection
|
||||
|
||||
All the test results will come to this folder `/home/sdp/benchmark_output/node_1` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
|
||||
@@ -242,22 +226,20 @@ kubectl apply -f .
|
||||
|
||||
##### 3. Run tests
|
||||
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
|
||||
````bash
|
||||
export DEPLOYMENT_TYPE="k8s"
|
||||
export SERVICE_IP = None
|
||||
export SERVICE_PORT = None
|
||||
export USER_QUERIES="[1280, 1280, 1280, 1280]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_2"
|
||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||
|
||||
And then run the benchmark tool by:
|
||||
Before the benchmark, we can configure the number of test queries and test output directory by:
|
||||
|
||||
```bash
|
||||
cd GenAIEval/evals/benchmark
|
||||
python benchmark.py
|
||||
````
|
||||
export USER_QUERIES="[1280, 1280, 1280, 1280]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_2"
|
||||
```
|
||||
|
||||
And then run the benchmark by:
|
||||
|
||||
```bash
|
||||
bash benchmark.sh -n 2
|
||||
```
|
||||
|
||||
The argument `-n` refers to the number of test nodes. Note that necessary dependencies will be automatically installed when running benchmark for the first time.
|
||||
|
||||
##### 4. Data collection
|
||||
|
||||
@@ -293,24 +275,21 @@ kubectl apply -f .
|
||||
|
||||
##### 3. Run tests
|
||||
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
Before the benchmark, we can configure the number of test queries and test output directory by:
|
||||
|
||||
```bash
|
||||
export DEPLOYMENT_TYPE="k8s"
|
||||
export SERVICE_IP = None
|
||||
export SERVICE_PORT = None
|
||||
export USER_QUERIES="[2560, 2560, 2560, 2560]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_4"
|
||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||
```
|
||||
|
||||
And then run the benchmark tool by:
|
||||
And then run the benchmark by:
|
||||
|
||||
```bash
|
||||
cd GenAIEval/evals/benchmark
|
||||
python benchmark.py
|
||||
bash benchmark.sh -n 4
|
||||
```
|
||||
|
||||
The argument `-n` refers to the number of test nodes. Note that necessary dependencies will be automatically installed when running benchmark for the first time.
|
||||
|
||||
##### 4. Data collection
|
||||
|
||||
All the test results will come to this folder `/home/sdp/benchmark_output/node_4` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
|
||||
@@ -369,24 +348,21 @@ Refer to the [NVIDIA GPU Guide](../../docker_compose/nvidia/gpu/README.md) for m
|
||||
|
||||
### Run tests
|
||||
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
Before the benchmark, we can configure the number of test queries and test output directory by:
|
||||
|
||||
```bash
|
||||
export DEPLOYMENT_TYPE="docker"
|
||||
export SERVICE_IP = "ChatQnA Service IP"
|
||||
export SERVICE_PORT = "ChatQnA Service Port"
|
||||
export USER_QUERIES="[640, 640, 640, 640]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/docker"
|
||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||
```
|
||||
|
||||
And then run the benchmark tool by:
|
||||
And then run the benchmark by:
|
||||
|
||||
```bash
|
||||
cd GenAIEval/evals/benchmark
|
||||
python benchmark.py
|
||||
bash benchmark.sh -d docker -i <service-ip> -p <service-port>
|
||||
```
|
||||
|
||||
The argument `-i` and `-p` refer to the deployed ChatQnA service IP and port, respectively. Note that necessary dependencies will be automatically installed when running benchmark for the first time.
|
||||
|
||||
### Data collection
|
||||
|
||||
All the test results will come to this folder `/home/sdp/benchmark_output/docker` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
|
||||
|
||||
99
ChatQnA/benchmark/performance/benchmark.sh
Executable file
99
ChatQnA/benchmark/performance/benchmark.sh
Executable file
@@ -0,0 +1,99 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
deployment_type="k8s"
|
||||
node_number=1
|
||||
service_port=8888
|
||||
query_per_node=640
|
||||
|
||||
benchmark_tool_path="$(pwd)/GenAIEval"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 [-d deployment_type] [-n node_number] [-i service_ip] [-p service_port]"
|
||||
echo " -d deployment_type ChatQnA deployment type, select between k8s and docker (default: k8s)"
|
||||
echo " -n node_number Test node number, required only for k8s deployment_type, (default: 1)"
|
||||
echo " -i service_ip chatqna service ip, required only for docker deployment_type"
|
||||
echo " -p service_port chatqna service port, required only for docker deployment_type, (default: 8888)"
|
||||
exit 1
|
||||
}
|
||||
|
||||
while getopts ":d:n:i:p:" opt; do
|
||||
case ${opt} in
|
||||
d )
|
||||
deployment_type=$OPTARG
|
||||
;;
|
||||
n )
|
||||
node_number=$OPTARG
|
||||
;;
|
||||
i )
|
||||
service_ip=$OPTARG
|
||||
;;
|
||||
p )
|
||||
service_port=$OPTARG
|
||||
;;
|
||||
\? )
|
||||
echo "Invalid option: -$OPTARG" 1>&2
|
||||
usage
|
||||
;;
|
||||
: )
|
||||
echo "Invalid option: -$OPTARG requires an argument" 1>&2
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ "$deployment_type" == "docker" && -z "$service_ip" ]]; then
|
||||
echo "Error: service_ip is required for docker deployment_type" 1>&2
|
||||
usage
|
||||
fi
|
||||
|
||||
if [[ "$deployment_type" == "k8s" && ( -n "$service_ip" || -n "$service_port" ) ]]; then
|
||||
echo "Warning: service_ip and service_port are ignored for k8s deployment_type" 1>&2
|
||||
fi
|
||||
|
||||
function main() {
|
||||
if [[ ! -d ${benchmark_tool_path} ]]; then
|
||||
echo "Benchmark tool not found, setting up..."
|
||||
setup_env
|
||||
fi
|
||||
run_benchmark
|
||||
}
|
||||
|
||||
function setup_env() {
|
||||
git clone https://github.com/opea-project/GenAIEval.git
|
||||
pushd ${benchmark_tool_path}
|
||||
python3 -m venv stress_venv
|
||||
source stress_venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
popd
|
||||
}
|
||||
|
||||
function run_benchmark() {
|
||||
source ${benchmark_tool_path}/stress_venv/bin/activate
|
||||
export DEPLOYMENT_TYPE=${deployment_type}
|
||||
export SERVICE_IP=${service_ip:-"None"}
|
||||
export SERVICE_PORT=${service_port:-"None"}
|
||||
if [[ -z $USER_QUERIES ]]; then
|
||||
user_query=$((query_per_node*node_number))
|
||||
export USER_QUERIES="[${user_query}, ${user_query}, ${user_query}, ${user_query}]"
|
||||
echo "USER_QUERIES not configured, setting to: ${USER_QUERIES}."
|
||||
fi
|
||||
export WARMUP=$(echo $USER_QUERIES | sed -e 's/[][]//g' -e 's/,.*//')
|
||||
if [[ -z $WARMUP ]]; then export WARMUP=0; fi
|
||||
if [[ -z $TEST_OUTPUT_DIR ]]; then
|
||||
if [[ $DEPLOYMENT_TYPE == "k8s" ]]; then
|
||||
export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/node_${node_number}"
|
||||
else
|
||||
export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/docker"
|
||||
fi
|
||||
echo "TEST_OUTPUT_DIR not configured, setting to: ${TEST_OUTPUT_DIR}."
|
||||
fi
|
||||
|
||||
envsubst < ./benchmark.yaml > ${benchmark_tool_path}/evals/benchmark/benchmark.yaml
|
||||
cd ${benchmark_tool_path}/evals/benchmark
|
||||
python benchmark.py
|
||||
}
|
||||
|
||||
main
|
||||
@@ -6,14 +6,24 @@ test_suite_config: # Overall configuration settings for the test suite
|
||||
deployment_type: ${DEPLOYMENT_TYPE} # Default is "k8s", can also be "docker"
|
||||
service_ip: ${SERVICE_IP} # Leave as None for k8s, specify for Docker
|
||||
service_port: ${SERVICE_PORT} # Leave as None for k8s, specify for Docker
|
||||
concurrent_level: 5 # The concurrency level, adjustable based on requirements
|
||||
user_queries: ${USER_QUERIES} # Number of test requests at each concurrency level
|
||||
random_prompt: false # Use random prompts if true, fixed prompts if false
|
||||
warm_ups: ${WARMUP} # Number of test requests for warm-up
|
||||
run_time: 60m # The max total run time for the test suite
|
||||
seed: # The seed for all RNGs
|
||||
user_queries: ${USER_QUERIES} # Number of test requests at each concurrency level
|
||||
query_timeout: 120 # Number of seconds to wait for a simulated user to complete any executing task before exiting. 120 sec by defeult.
|
||||
random_prompt: false # Use random prompts if true, fixed prompts if false
|
||||
collect_service_metric: false # Collect service metrics if true, do not collect service metrics if false
|
||||
data_visualization: false # Generate data visualization if true, do not generate data visualization if false
|
||||
llm_model: "Intel/neural-chat-7b-v3-3" # The LLM model used for the test
|
||||
test_output_dir: "${TEST_OUTPUT_DIR}" # The directory to store the test output
|
||||
load_shape: # Tenant concurrency pattern
|
||||
name: constant # poisson or constant(locust default load shape)
|
||||
params: # Loadshape-specific parameters
|
||||
constant: # Constant load shape specific parameters, activate only if load_shape.name is constant
|
||||
concurrent_level: 5 # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
|
||||
# arrival_rate: 1.0 # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate
|
||||
poisson: # Poisson load shape specific parameters, activate only if load_shape.name is poisson
|
||||
arrival_rate: 1.0 # Request arrival rate
|
||||
|
||||
test_cases:
|
||||
chatqna:
|
||||
|
||||
23
ChatQnA/benchmark/performance/helm_charts/.helmignore
Normal file
23
ChatQnA/benchmark/performance/helm_charts/.helmignore
Normal file
@@ -0,0 +1,23 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
27
ChatQnA/benchmark/performance/helm_charts/Chart.yaml
Normal file
27
ChatQnA/benchmark/performance/helm_charts/Chart.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v2
|
||||
name: chatqna-charts
|
||||
description: A Helm chart for Kubernetes
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "1.16.0"
|
||||
26
ChatQnA/benchmark/performance/helm_charts/README.md
Normal file
26
ChatQnA/benchmark/performance/helm_charts/README.md
Normal file
@@ -0,0 +1,26 @@
|
||||
# Benchmarking Deployment
|
||||
|
||||
This document guides you through deploying this example pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
|
||||
|
||||
## Getting Started
|
||||
|
||||
### Preparation
|
||||
|
||||
```bash
|
||||
# on k8s-master node
|
||||
cd GenAIExamples/{example_name}/benchmark/performance/helm_charts
|
||||
|
||||
# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
|
||||
# vim hpu_with_rerank.yaml or hpu_without_rerank.yaml
|
||||
HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
|
||||
```
|
||||
|
||||
### Deployment
|
||||
|
||||
```bash
|
||||
# Options:
|
||||
# --num_nodes choices=[1, 2, 4, 8]
|
||||
# --mode choices=["tuned", "oob"]
|
||||
# --workflow choices=["with_rerank", "without_rerank"]
|
||||
python deployment.py --workflow=with_rerank --mode=tuned --num_nodes=1
|
||||
```
|
||||
48
ChatQnA/benchmark/performance/helm_charts/customize.yaml
Normal file
48
ChatQnA/benchmark/performance/helm_charts/customize.yaml
Normal file
@@ -0,0 +1,48 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
podSpecs:
|
||||
- name: chatqna-backend-server-deploy
|
||||
replicas: 2
|
||||
resources:
|
||||
limits:
|
||||
cpu: "8"
|
||||
memory: "8000Mi"
|
||||
requests:
|
||||
cpu: "8"
|
||||
memory: "8000Mi"
|
||||
|
||||
- name: embedding-dependency-deploy
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
cpu: "80"
|
||||
memory: "20000Mi"
|
||||
requests:
|
||||
cpu: "80"
|
||||
memory: "20000Mi"
|
||||
|
||||
- name: reranking-dependency-deploy
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
replicas: 7
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
|
||||
- name: dataprep-deploy
|
||||
replicas: 1
|
||||
|
||||
- name: vector-db
|
||||
replicas: 1
|
||||
|
||||
- name: retriever-deploy
|
||||
replicas: 2
|
||||
resources:
|
||||
requests:
|
||||
cpu: "4"
|
||||
memory: "4000Mi"
|
||||
168
ChatQnA/benchmark/performance/helm_charts/deployment.py
Normal file
168
ChatQnA/benchmark/performance/helm_charts/deployment.py
Normal file
@@ -0,0 +1,168 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
|
||||
|
||||
common_pods = [
|
||||
"chatqna-backend-server-deploy",
|
||||
"embedding-dependency-deploy",
|
||||
"dataprep-deploy",
|
||||
"vector-db",
|
||||
"retriever-deploy",
|
||||
]
|
||||
|
||||
if with_rerank:
|
||||
pods_list = common_pods + ["reranking-dependency-deploy", "llm-dependency-deploy"]
|
||||
else:
|
||||
pods_list = common_pods + ["llm-dependency-deploy"]
|
||||
|
||||
if num_nodes == 1:
|
||||
replicas = [
|
||||
{"name": "chatqna-backend-server-deploy", "replicas": 2},
|
||||
{"name": "embedding-dependency-deploy", "replicas": 1},
|
||||
{"name": "reranking-dependency-deploy", "replicas": 1} if with_rerank else None,
|
||||
{"name": "llm-dependency-deploy", "replicas": 7 if with_rerank else 8},
|
||||
{"name": "dataprep-deploy", "replicas": 1},
|
||||
{"name": "vector-db", "replicas": 1},
|
||||
{"name": "retriever-deploy", "replicas": 2},
|
||||
]
|
||||
else:
|
||||
replicas = [
|
||||
{"name": "chatqna-backend-server-deploy", "replicas": 1 * num_nodes},
|
||||
{"name": "embedding-dependency-deploy", "replicas": 1 * num_nodes},
|
||||
{"name": "reranking-dependency-deploy", "replicas": 1} if with_rerank else None,
|
||||
{"name": "llm-dependency-deploy", "replicas": (8 * num_nodes) - 1 if with_rerank else 8 * num_nodes},
|
||||
{"name": "dataprep-deploy", "replicas": 1},
|
||||
{"name": "vector-db", "replicas": 1},
|
||||
{"name": "retriever-deploy", "replicas": 1 * num_nodes},
|
||||
]
|
||||
|
||||
resources = [
|
||||
{
|
||||
"name": "chatqna-backend-server-deploy",
|
||||
"resources": {"limits": {"cpu": "16", "memory": "8000Mi"}, "requests": {"cpu": "16", "memory": "8000Mi"}},
|
||||
},
|
||||
{
|
||||
"name": "embedding-dependency-deploy",
|
||||
"resources": {"limits": {"cpu": "80", "memory": "20000Mi"}, "requests": {"cpu": "80", "memory": "20000Mi"}},
|
||||
},
|
||||
(
|
||||
{"name": "reranking-dependency-deploy", "resources": {"limits": {"habana.ai/gaudi": 1}}}
|
||||
if with_rerank
|
||||
else None
|
||||
),
|
||||
{"name": "llm-dependency-deploy", "resources": {"limits": {"habana.ai/gaudi": 1}}},
|
||||
{"name": "retriever-deploy", "resources": {"requests": {"cpu": "8", "memory": "8000Mi"}}},
|
||||
]
|
||||
|
||||
replicas = [replica for replica in replicas if replica]
|
||||
resources = [resource for resource in resources if resource]
|
||||
|
||||
tgi_params = [
|
||||
{
|
||||
"name": "llm-dependency-deploy",
|
||||
"args": [
|
||||
{"name": "--model-id", "value": "$(LLM_MODEL_ID)"},
|
||||
{"name": "--max-input-length", "value": 1280},
|
||||
{"name": "--max-total-tokens", "value": 2048},
|
||||
{"name": "--max-batch-total-tokens", "value": 65536},
|
||||
{"name": "--max-batch-prefill-tokens", "value": 4096},
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
replicas_dict = {item["name"]: item["replicas"] for item in replicas}
|
||||
resources_dict = {item["name"]: item["resources"] for item in resources}
|
||||
tgi_params_dict = {item["name"]: item["args"] for item in tgi_params}
|
||||
|
||||
dicts_to_check = [
|
||||
{"dict": replicas_dict, "key": "replicas"},
|
||||
]
|
||||
if mode == "tuned":
|
||||
dicts_to_check.extend([{"dict": resources_dict, "key": "resources"}, {"dict": tgi_params_dict, "key": "args"}])
|
||||
|
||||
merged_specs = {"podSpecs": []}
|
||||
|
||||
for pod in pods_list:
|
||||
pod_spec = {"name": pod}
|
||||
|
||||
for item in dicts_to_check:
|
||||
if pod in item["dict"]:
|
||||
pod_spec[item["key"]] = item["dict"][pod]
|
||||
|
||||
if len(pod_spec) > 1:
|
||||
merged_specs["podSpecs"].append(pod_spec)
|
||||
|
||||
yaml_data = yaml.dump(merged_specs, default_flow_style=False)
|
||||
|
||||
print(yaml_data)
|
||||
|
||||
if with_rerank:
|
||||
filename = f"{mode}_{num_nodes}_gaudi_with_rerank.yaml"
|
||||
else:
|
||||
filename = f"{mode}_{num_nodes}_gaudi_without_rerank.yaml"
|
||||
with open(filename, "w") as file:
|
||||
file.write(yaml_data)
|
||||
|
||||
current_dir = os.getcwd()
|
||||
filepath = os.path.join(current_dir, filename)
|
||||
print(f"YAML file {filepath} has been generated.")
|
||||
|
||||
return filepath
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--name", help="The name of example pipelines", default="chatqna")
|
||||
parser.add_argument("--folder", help="The path of helmcharts folder", default=".")
|
||||
parser.add_argument(
|
||||
"--num_nodes", help="Number of nodes to deploy", type=int, choices=[1, 2, 4, 8], default=1, required=True
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mode", help="set up your chatqna in the specified mode", type=str, choices=["oob", "tuned"], default="oob"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--workflow",
|
||||
help="with rerank in the pipeline",
|
||||
type=str,
|
||||
choices=["with_rerank", "without_rerank"],
|
||||
default="with_rerank",
|
||||
)
|
||||
|
||||
parser.add_argument("--template", help="helm template", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.workflow == "with_rerank":
|
||||
with_rerank = True
|
||||
workflow_file = "./hpu_with_rerank.yaml"
|
||||
else:
|
||||
with_rerank = False
|
||||
workflow_file = "./hpu_without_rerank.yaml"
|
||||
|
||||
customize_filepath = generate_yaml(args.num_nodes, mode=args.mode, with_rerank=with_rerank)
|
||||
|
||||
if args.template:
|
||||
subprocess.run(
|
||||
["helm", "template", args.folder, "-f", workflow_file, "-f", customize_filepath],
|
||||
check=True,
|
||||
text=True,
|
||||
capture_output=False,
|
||||
)
|
||||
else:
|
||||
subprocess.run(
|
||||
["helm", "install", args.name, args.folder, "-f", workflow_file, "-f", customize_filepath],
|
||||
check=True,
|
||||
text=True,
|
||||
capture_output=False,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
223
ChatQnA/benchmark/performance/helm_charts/hpu_with_rerank.yaml
Normal file
223
ChatQnA/benchmark/performance/helm_charts/hpu_with_rerank.yaml
Normal file
@@ -0,0 +1,223 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
namespace: default
|
||||
|
||||
config:
|
||||
CONFIG_MAP_NAME: chatqna-config
|
||||
NODE_SELECTOR: opea
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
INDEX_NAME: rag-redis
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
LLM_SERVER_PORT: 9009
|
||||
RERANK_SERVER_PORT: 8808
|
||||
EMBEDDING_SERVER_PORT: 6006
|
||||
|
||||
microservices:
|
||||
- name: chatqna-backend-server-deploy
|
||||
image: opea/chatqna:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
|
||||
- name: dataprep-deploy
|
||||
image: opea/dataprep-redis:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
|
||||
- name: vector-db
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
|
||||
- name: retriever-deploy
|
||||
image: opea/retriever-redis:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
|
||||
- name: embedding-dependency-deploy
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(EMBEDDING_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: reranking-dependency-deploy
|
||||
image: opea/tei-gaudi:latest
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
- value: $(RERANK_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: "512"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(LLM_MODEL_ID)
|
||||
- name: "--max-input-length"
|
||||
value: "2048"
|
||||
- name: "--max-total-tokens"
|
||||
value: "4096"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
services:
|
||||
- name: chatqna-backend-server-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
|
||||
- name: dataprep-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: embedding-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: llm-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: reranking-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: retriever-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
@@ -0,0 +1,166 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
namespace: default
|
||||
|
||||
config:
|
||||
CONFIG_MAP_NAME: chatqna-config
|
||||
NODE_SELECTOR: opea
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
|
||||
microservices:
|
||||
- name: chatqna-backend-server-deploy
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
|
||||
- name: dataprep-deploy
|
||||
image: opea/dataprep-redis:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
|
||||
- name: vector-db
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
|
||||
- name: retriever-deploy
|
||||
image: opea/retriever-redis:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
|
||||
- name: embedding-dependency-deploy
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(EMBEDDING_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(LLM_MODEL_ID)
|
||||
- name: "--max-input-length"
|
||||
value: "2048"
|
||||
- name: "--max-total-tokens"
|
||||
value: "4096"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
services:
|
||||
- name: chatqna-backend-server-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
|
||||
- name: dataprep-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: embedding-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: llm-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: retriever-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
@@ -0,0 +1,25 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ .Values.config.CONFIG_MAP_NAME }}
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: {{ .Values.config.EMBEDDING_MODEL_ID }}
|
||||
HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }}
|
||||
NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
|
||||
RERANK_MODEL_ID: {{ .Values.config.RERANK_MODEL_ID }}
|
||||
LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
INDEX_NAME: rag-redis
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
LLM_SERVER_PORT: "9009"
|
||||
RERANK_SERVER_PORT: "8808"
|
||||
EMBEDDING_SERVER_PORT: "6006"
|
||||
---
|
||||
@@ -0,0 +1,139 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
{{- $global := .Values }}
|
||||
{{- range $microservice := .Values.microservices }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ $microservice.name }}
|
||||
namespace: default
|
||||
spec:
|
||||
{{- $replicas := $microservice.replicas }}
|
||||
{{- range $podSpec := $global.podSpecs }}
|
||||
{{- if eq $podSpec.name $microservice.name }}
|
||||
{{- $replicas = $podSpec.replicas | default $microservice.replicas }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
replicas: {{ $replicas }}
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: {{ $microservice.name }}
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: {{ $microservice.name }}
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: {{ $global.config.CONFIG_MAP_NAME }}
|
||||
|
||||
{{- $args := $microservice.args }}
|
||||
{{- range $podSpec := $global.podSpecs }}
|
||||
{{- if eq $podSpec.name $microservice.name }}
|
||||
{{- $args = $podSpec.args | default $microservice.args }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $microservice.args }}
|
||||
args:
|
||||
{{- range $arg := $args }}
|
||||
{{- if $arg.name }}
|
||||
- {{ $arg.name }}
|
||||
{{- end }}
|
||||
{{- if $arg.value }}
|
||||
- "{{ $arg.value }}"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $microservice.env }}
|
||||
env:
|
||||
{{- range $env := $microservice.env }}
|
||||
- name: {{ $env.name }}
|
||||
value: "{{ $env.value }}"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- $image := $microservice.image }}
|
||||
{{- range $podSpec := $global.podSpecs }}
|
||||
{{- if eq $podSpec.name $microservice.name }}
|
||||
{{- $image = $podSpec.image | default $microservice.image }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
image: {{ $image }}
|
||||
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: {{ $microservice.name }}
|
||||
|
||||
{{- if $microservice.ports }}
|
||||
ports:
|
||||
{{- range $port := $microservice.ports }}
|
||||
{{- range $port_name, $port_id := $port }}
|
||||
- {{ $port_name }}: {{ $port_id }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- $resources := $microservice.resources }}
|
||||
{{- range $podSpec := $global.podSpecs }}
|
||||
{{- if eq $podSpec.name $microservice.name }}
|
||||
{{- if $podSpec.resources }}
|
||||
{{- $resources = $podSpec.resources }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $resources }}
|
||||
resources:
|
||||
{{- range $resourceType, $resource := $resources }}
|
||||
{{ $resourceType }}:
|
||||
{{- range $limitType, $limit := $resource }}
|
||||
{{ $limitType }}: {{ $limit }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $microservice.volumeMounts }}
|
||||
volumeMounts:
|
||||
{{- range $volumeMount := $microservice.volumeMounts }}
|
||||
- mountPath: {{ $volumeMount.mountPath }}
|
||||
name: {{ $volumeMount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: {{ $global.config.NODE_SELECTOR }}
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: {{ $microservice.name }}
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
|
||||
|
||||
{{- if $microservice.volumes }}
|
||||
volumes:
|
||||
{{- range $index, $volume := $microservice.volumes }}
|
||||
- name: {{ $volume.name }}
|
||||
{{- if $volume.hostPath }}
|
||||
hostPath:
|
||||
path: {{ $volume.hostPath.path }}
|
||||
type: {{ $volume.hostPath.type }}
|
||||
{{- else if $volume.emptyDir }}
|
||||
emptyDir:
|
||||
medium: {{ $volume.emptyDir.medium }}
|
||||
sizeLimit: {{ $volume.emptyDir.sizeLimit }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
---
|
||||
{{- end }}
|
||||
@@ -0,0 +1,24 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
{{- range $service := .Values.services }}
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ $service.name }}
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
{{- range $port := $service.spec.ports }}
|
||||
- name: {{ $port.name }}
|
||||
{{- range $port_name, $port_id := $port }}
|
||||
{{- if ne $port_name "name"}}
|
||||
{{ $port_name }}: {{ $port_id }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
selector:
|
||||
app: {{ $service.spec.selector.app }}
|
||||
type: {{ $service.spec.type }}
|
||||
---
|
||||
{{- end }}
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-no-wrapper:latest
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
@@ -327,7 +327,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
@@ -29,7 +29,7 @@ metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-no-wrapper:latest
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
@@ -327,7 +327,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
@@ -381,7 +381,7 @@ metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-no-wrapper:latest
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
@@ -327,7 +327,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-no-wrapper:latest
|
||||
image: opea/chatqna
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
@@ -327,7 +327,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-no-wrapper-without-rerank:latest
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
@@ -29,7 +29,7 @@ metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-no-wrapper-without-rerank:latest
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
@@ -295,7 +295,7 @@ metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-no-wrapper-without-rerank:latest
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-no-wrapper-without-rerank:latest
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -237,7 +237,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-no-wrapper:latest
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
@@ -345,7 +345,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-no-wrapper:latest
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
@@ -345,7 +345,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-no-wrapper:latest
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
@@ -345,7 +345,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-no-wrapper:latest
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
@@ -345,7 +345,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-no-wrapper-without-rerank:latest
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-no-wrapper-without-rerank:latest
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-no-wrapper-without-rerank:latest
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-no-wrapper-without-rerank:latest
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -167,10 +167,10 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -255,7 +255,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
@@ -0,0 +1,507 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 63
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1280'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -0,0 +1,507 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 31
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1280'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -0,0 +1,514 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
|
||||
LLM_SERVER_PORT: '9009'
|
||||
RERANK_SERVER_PORT: '8808'
|
||||
EMBEDDING_SERVER_PORT: '6006'
|
||||
|
||||
---
|
||||
# Source: chatqna-charts/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
# Source: chatqna-charts/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
# Source: chatqna-charts/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
# Source: chatqna-charts/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
# Source: chatqna-charts/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
# Source: chatqna-charts/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
# Source: chatqna-charts/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
# Source: chatqna-charts/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-config
|
||||
image: opea/chatqna-model-fixed-root:latest
|
||||
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
# Source: chatqna-charts/templates/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
# Source: chatqna-charts/templates/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
# Source: chatqna-charts/templates/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-config
|
||||
image: opea/retriever-redis:latest
|
||||
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
# Source: chatqna-charts/templates/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-config
|
||||
args:
|
||||
- --model-id
|
||||
- "$(EMBEDDING_MODEL_ID)"
|
||||
- --auto-truncate
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
# Source: chatqna-charts/templates/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-config
|
||||
args:
|
||||
- --model-id
|
||||
- "$(RERANK_MODEL_ID)"
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: "none"
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: "habana"
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: "all"
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: "512"
|
||||
image: opea/tei-gaudi:latest
|
||||
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
# Source: chatqna-charts/templates/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-config
|
||||
command: ["/bin/bash", "-c"]
|
||||
args: ["python3 -m vllm.entrypoints.openai.api_server --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"]
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: "none"
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: "habana"
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: "all"
|
||||
image: opea/llm-vllm-hpu:latest
|
||||
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
@@ -0,0 +1,507 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 15
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1280'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -0,0 +1,421 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 64
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1280'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -0,0 +1,421 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 32
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1280'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -0,0 +1,421 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1280'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -0,0 +1,421 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 16
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1280'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -1,683 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 31
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1024'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
selector:
|
||||
app: llm-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
selector:
|
||||
app: reranking-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -1,683 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 7
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1024'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
selector:
|
||||
app: llm-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
selector:
|
||||
app: reranking-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -1,683 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 15
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1024'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
selector:
|
||||
app: llm-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/reranking-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-deploy
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
selector:
|
||||
app: reranking-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -1,622 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
INDEX_NAME: rag-redis
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 4000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 4000Mi
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
nodePort: 30888
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
- containerPort: 6008
|
||||
- containerPort: 6009
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
- name: port2
|
||||
port: 6008
|
||||
targetPort: 6008
|
||||
- name: port3
|
||||
port: 6009
|
||||
targetPort: 6009
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
name: embedding-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 4
|
||||
requests:
|
||||
cpu: 4
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 32
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1024'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 4
|
||||
requests:
|
||||
cpu: 4
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_EMBEDDING_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: HUGGINGFACEHUB_API_TOKEN
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 2500Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 2500Mi
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
containers:
|
||||
- name: vector-db
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: vector-db
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
|
||||
|
||||
---
|
||||
@@ -1,622 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
INDEX_NAME: rag-redis
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 4000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 4000Mi
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
nodePort: 30888
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
- containerPort: 6008
|
||||
- containerPort: 6009
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
- name: port2
|
||||
port: 6008
|
||||
targetPort: 6008
|
||||
- name: port3
|
||||
port: 6009
|
||||
targetPort: 6009
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
name: embedding-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 4
|
||||
requests:
|
||||
cpu: 4
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1024'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 4
|
||||
requests:
|
||||
cpu: 4
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_EMBEDDING_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: HUGGINGFACEHUB_API_TOKEN
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 2500Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 2500Mi
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
containers:
|
||||
- name: vector-db
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: vector-db
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
|
||||
|
||||
---
|
||||
@@ -1,622 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
INDEX_NAME: rag-redis
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 4000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 4000Mi
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
nodePort: 30888
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
- containerPort: 6008
|
||||
- containerPort: 6009
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
- name: port2
|
||||
port: 6008
|
||||
targetPort: 6008
|
||||
- name: port3
|
||||
port: 6009
|
||||
targetPort: 6009
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
name: embedding-dependency-deploy
|
||||
args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 76
|
||||
memory: 20000Mi
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/embedding-tei:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 6000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 4
|
||||
requests:
|
||||
cpu: 4
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: embedding-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: embedding-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 6000
|
||||
targetPort: 6000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 16
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1024'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
serviceAccountName: default
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 4
|
||||
requests:
|
||||
cpu: 4
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- env:
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: REDIS_URL
|
||||
- name: TEI_EMBEDDING_ENDPOINT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: TEI_EMBEDDING_ENDPOINT
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: HUGGINGFACEHUB_API_TOKEN
|
||||
- name: INDEX_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: qna-config
|
||||
key: INDEX_NAME
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 2500Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 2500Mi
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
containers:
|
||||
- name: vector-db
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: vector-db
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
|
||||
|
||||
---
|
||||
@@ -1,34 +1,276 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
|
||||
|
||||
class ChatTemplate:
|
||||
@staticmethod
|
||||
def generate_rag_prompt(question, documents):
|
||||
context_str = "\n".join(documents)
|
||||
if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3:
|
||||
# chinese context
|
||||
template = """
|
||||
### 你将扮演一个乐于助人、尊重他人并诚实的助手,你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案,请避免分享不准确的信息。
|
||||
### 搜索结果:{context}
|
||||
### 问题:{question}
|
||||
### 回答:
|
||||
"""
|
||||
else:
|
||||
template = """
|
||||
### You are a helpful, respectful and honest assistant to help the user with questions. \
|
||||
Please refer to the search results obtained from the local knowledge base. \
|
||||
But be careful to not incorporate the information that you think is not relevant to the question. \
|
||||
If you don't know the answer to a question, please don't share false information. \n
|
||||
### Search results: {context} \n
|
||||
### Question: {question} \n
|
||||
### Answer:
|
||||
"""
|
||||
return template.format(context=context_str, question=question)
|
||||
|
||||
|
||||
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
|
||||
EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
|
||||
GUARDRAIL_SERVICE_HOST_IP = os.getenv("GUARDRAIL_SERVICE_HOST_IP", "0.0.0.0")
|
||||
GUARDRAIL_SERVICE_PORT = int(os.getenv("GUARDRAIL_SERVICE_PORT", 80))
|
||||
EMBEDDING_SERVER_HOST_IP = os.getenv("EMBEDDING_SERVER_HOST_IP", "0.0.0.0")
|
||||
EMBEDDING_SERVER_PORT = int(os.getenv("EMBEDDING_SERVER_PORT", 80))
|
||||
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
|
||||
RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
|
||||
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
|
||||
RERANK_SERVER_HOST_IP = os.getenv("RERANK_SERVER_HOST_IP", "0.0.0.0")
|
||||
RERANK_SERVER_PORT = int(os.getenv("RERANK_SERVER_PORT", 80))
|
||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 80))
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
|
||||
inputs["inputs"] = inputs["text"]
|
||||
del inputs["text"]
|
||||
elif self.services[cur_node].service_type == ServiceType.RETRIEVER:
|
||||
# prepare the retriever params
|
||||
retriever_parameters = kwargs.get("retriever_parameters", None)
|
||||
if retriever_parameters:
|
||||
inputs.update(retriever_parameters.dict())
|
||||
elif self.services[cur_node].service_type == ServiceType.LLM:
|
||||
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
|
||||
next_inputs = {}
|
||||
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
|
||||
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
|
||||
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
|
||||
next_inputs["top_p"] = llm_parameters_dict["top_p"]
|
||||
next_inputs["stream"] = inputs["streaming"]
|
||||
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
|
||||
# next_inputs["presence_penalty"] = inputs["presence_penalty"]
|
||||
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
|
||||
next_inputs["temperature"] = inputs["temperature"]
|
||||
inputs = next_inputs
|
||||
return inputs
|
||||
|
||||
|
||||
def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
next_data = {}
|
||||
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
|
||||
assert isinstance(data, list)
|
||||
next_data = {"text": inputs["inputs"], "embedding": data[0]}
|
||||
elif self.services[cur_node].service_type == ServiceType.RETRIEVER:
|
||||
|
||||
docs = [doc["text"] for doc in data["retrieved_docs"]]
|
||||
|
||||
with_rerank = runtime_graph.downstream(cur_node)[0].startswith("rerank")
|
||||
if with_rerank and docs:
|
||||
# forward to rerank
|
||||
# prepare inputs for rerank
|
||||
next_data["query"] = data["initial_query"]
|
||||
next_data["texts"] = [doc["text"] for doc in data["retrieved_docs"]]
|
||||
else:
|
||||
# forward to llm
|
||||
if not docs and with_rerank:
|
||||
# delete the rerank from retriever -> rerank -> llm
|
||||
for ds in reversed(runtime_graph.downstream(cur_node)):
|
||||
for nds in runtime_graph.downstream(ds):
|
||||
runtime_graph.add_edge(cur_node, nds)
|
||||
runtime_graph.delete_node_if_exists(ds)
|
||||
|
||||
# handle template
|
||||
# if user provides template, then format the prompt with it
|
||||
# otherwise, use the default template
|
||||
prompt = data["initial_query"]
|
||||
chat_template = llm_parameters_dict["chat_template"]
|
||||
if chat_template:
|
||||
prompt_template = PromptTemplate.from_template(chat_template)
|
||||
input_variables = prompt_template.input_variables
|
||||
if sorted(input_variables) == ["context", "question"]:
|
||||
prompt = prompt_template.format(question=data["initial_query"], context="\n".join(docs))
|
||||
elif input_variables == ["question"]:
|
||||
prompt = prompt_template.format(question=data["initial_query"])
|
||||
else:
|
||||
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
|
||||
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
|
||||
else:
|
||||
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
|
||||
|
||||
next_data["inputs"] = prompt
|
||||
|
||||
elif self.services[cur_node].service_type == ServiceType.RERANK:
|
||||
# rerank the inputs with the scores
|
||||
reranker_parameters = kwargs.get("reranker_parameters", None)
|
||||
top_n = reranker_parameters.top_n if reranker_parameters else 1
|
||||
docs = inputs["texts"]
|
||||
reranked_docs = []
|
||||
for best_response in data[:top_n]:
|
||||
reranked_docs.append(docs[best_response["index"]])
|
||||
|
||||
# handle template
|
||||
# if user provides template, then format the prompt with it
|
||||
# otherwise, use the default template
|
||||
prompt = inputs["query"]
|
||||
chat_template = llm_parameters_dict["chat_template"]
|
||||
if chat_template:
|
||||
prompt_template = PromptTemplate.from_template(chat_template)
|
||||
input_variables = prompt_template.input_variables
|
||||
if sorted(input_variables) == ["context", "question"]:
|
||||
prompt = prompt_template.format(question=prompt, context="\n".join(reranked_docs))
|
||||
elif input_variables == ["question"]:
|
||||
prompt = prompt_template.format(question=prompt)
|
||||
else:
|
||||
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
|
||||
prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs)
|
||||
else:
|
||||
prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs)
|
||||
|
||||
next_data["inputs"] = prompt
|
||||
|
||||
else:
|
||||
next_data = data
|
||||
|
||||
return next_data
|
||||
|
||||
|
||||
def align_generator(self, gen, **kwargs):
|
||||
# openai reaponse format
|
||||
# b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
|
||||
for line in gen:
|
||||
line = line.decode("utf-8")
|
||||
start = line.find("{")
|
||||
end = line.rfind("}") + 1
|
||||
|
||||
json_str = line[start:end]
|
||||
try:
|
||||
# sometimes yield empty chunk, do a fallback here
|
||||
json_data = json.loads(json_str)
|
||||
if json_data["choices"][0]["finish_reason"] != "eos_token":
|
||||
yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
|
||||
except Exception as e:
|
||||
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
|
||||
class ChatQnAService:
|
||||
def __init__(self, host="0.0.0.0", port=8000):
|
||||
self.host = host
|
||||
self.port = port
|
||||
ServiceOrchestrator.align_inputs = align_inputs
|
||||
ServiceOrchestrator.align_outputs = align_outputs
|
||||
ServiceOrchestrator.align_generator = align_generator
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
|
||||
def add_remote_service(self):
|
||||
|
||||
embedding = MicroService(
|
||||
name="embedding",
|
||||
host=EMBEDDING_SERVICE_HOST_IP,
|
||||
port=EMBEDDING_SERVICE_PORT,
|
||||
endpoint="/v1/embeddings",
|
||||
host=EMBEDDING_SERVER_HOST_IP,
|
||||
port=EMBEDDING_SERVER_PORT,
|
||||
endpoint="/embed",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
)
|
||||
|
||||
retriever = MicroService(
|
||||
name="retriever",
|
||||
host=RETRIEVER_SERVICE_HOST_IP,
|
||||
port=RETRIEVER_SERVICE_PORT,
|
||||
endpoint="/v1/retrieval",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RETRIEVER,
|
||||
)
|
||||
|
||||
rerank = MicroService(
|
||||
name="rerank",
|
||||
host=RERANK_SERVER_HOST_IP,
|
||||
port=RERANK_SERVER_PORT,
|
||||
endpoint="/rerank",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RERANK,
|
||||
)
|
||||
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVER_HOST_IP,
|
||||
port=LLM_SERVER_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
self.megaservice.add(embedding).add(retriever).add(rerank).add(llm)
|
||||
self.megaservice.flow_to(embedding, retriever)
|
||||
self.megaservice.flow_to(retriever, rerank)
|
||||
self.megaservice.flow_to(rerank, llm)
|
||||
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
def add_remote_service_without_rerank(self):
|
||||
|
||||
embedding = MicroService(
|
||||
name="embedding",
|
||||
host=EMBEDDING_SERVER_HOST_IP,
|
||||
port=EMBEDDING_SERVER_PORT,
|
||||
endpoint="/embed",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
)
|
||||
|
||||
retriever = MicroService(
|
||||
name="retriever",
|
||||
host=RETRIEVER_SERVICE_HOST_IP,
|
||||
port=RETRIEVER_SERVICE_PORT,
|
||||
endpoint="/v1/retrieval",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RETRIEVER,
|
||||
)
|
||||
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVER_HOST_IP,
|
||||
port=LLM_SERVER_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
self.megaservice.add(embedding).add(retriever).add(llm)
|
||||
self.megaservice.flow_to(embedding, retriever)
|
||||
self.megaservice.flow_to(retriever, llm)
|
||||
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
def add_remote_service_with_guardrails(self):
|
||||
guardrail_in = MicroService(
|
||||
name="guardrail_in",
|
||||
host=GUARDRAIL_SERVICE_HOST_IP,
|
||||
port=GUARDRAIL_SERVICE_PORT,
|
||||
endpoint="/v1/guardrails",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.GUARDRAIL,
|
||||
)
|
||||
embedding = MicroService(
|
||||
name="embedding",
|
||||
host=EMBEDDING_SERVER_HOST_IP,
|
||||
port=EMBEDDING_SERVER_PORT,
|
||||
endpoint="/embed",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
)
|
||||
@@ -42,27 +284,49 @@ class ChatQnAService:
|
||||
)
|
||||
rerank = MicroService(
|
||||
name="rerank",
|
||||
host=RERANK_SERVICE_HOST_IP,
|
||||
port=RERANK_SERVICE_PORT,
|
||||
endpoint="/v1/reranking",
|
||||
host=RERANK_SERVER_HOST_IP,
|
||||
port=RERANK_SERVER_PORT,
|
||||
endpoint="/rerank",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RERANK,
|
||||
)
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVICE_HOST_IP,
|
||||
port=LLM_SERVICE_PORT,
|
||||
host=LLM_SERVER_HOST_IP,
|
||||
port=LLM_SERVER_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
self.megaservice.add(embedding).add(retriever).add(rerank).add(llm)
|
||||
# guardrail_out = MicroService(
|
||||
# name="guardrail_out",
|
||||
# host=GUARDRAIL_SERVICE_HOST_IP,
|
||||
# port=GUARDRAIL_SERVICE_PORT,
|
||||
# endpoint="/v1/guardrails",
|
||||
# use_remote_service=True,
|
||||
# service_type=ServiceType.GUARDRAIL,
|
||||
# )
|
||||
# self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm).add(guardrail_out)
|
||||
self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm)
|
||||
self.megaservice.flow_to(guardrail_in, embedding)
|
||||
self.megaservice.flow_to(embedding, retriever)
|
||||
self.megaservice.flow_to(retriever, rerank)
|
||||
self.megaservice.flow_to(rerank, llm)
|
||||
# self.megaservice.flow_to(llm, guardrail_out)
|
||||
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--without-rerank", action="store_true")
|
||||
parser.add_argument("--with-guardrails", action="store_true")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
|
||||
chatqna.add_remote_service()
|
||||
if args.without_rerank:
|
||||
chatqna.add_remote_service_without_rerank()
|
||||
elif args.with_guardrails:
|
||||
chatqna.add_remote_service_with_guardrails()
|
||||
else:
|
||||
chatqna.add_remote_service()
|
||||
|
||||
@@ -30,21 +30,11 @@ opea_micro_services:
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
model-id: ${EMBEDDING_MODEL_ID}
|
||||
embedding:
|
||||
host: ${EMBEDDING_SERVICE_HOST_IP}
|
||||
ports: ${EMBEDDING_SERVICE_PORT}
|
||||
image: opea/embedding-tei:latest
|
||||
endpoint: /v1/embeddings
|
||||
retrieval:
|
||||
host: ${RETRIEVER_SERVICE_HOST_IP}
|
||||
ports: ${RETRIEVER_SERVICE_PORT}
|
||||
image: opea/retriever-redis:latest
|
||||
endpoint: /v1/retrieval
|
||||
reranking:
|
||||
host: ${RERANK_SERVICE_HOST_IP}
|
||||
ports: ${RERANK_SERVICE_PORT}
|
||||
image: opea/reranking-tei:latest
|
||||
endpoint: /v1/reranking
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
@@ -64,11 +54,6 @@ opea_micro_services:
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
model-id: ${LLM_MODEL_ID}
|
||||
llm:
|
||||
host: ${LLM_SERVICE_HOST_IP}
|
||||
ports: ${LLM_SERVICE_PORT}
|
||||
image: opea/llm-tgi:latest
|
||||
endpoint: /v1/chat/completions
|
||||
ui:
|
||||
host: ${UI_SERVICE_HOST_IP}
|
||||
ports:
|
||||
|
||||
@@ -1,89 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
|
||||
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
|
||||
|
||||
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
GUARDRAIL_SERVICE_HOST_IP = os.getenv("GUARDRAIL_SERVICE_HOST_IP", "0.0.0.0")
|
||||
GUARDRAIL_SERVICE_PORT = int(os.getenv("GUARDRAIL_SERVICE_PORT", 9090))
|
||||
EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
|
||||
EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
|
||||
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
|
||||
RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
|
||||
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
|
||||
|
||||
|
||||
class ChatQnAService:
|
||||
def __init__(self, host="0.0.0.0", port=8000):
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
|
||||
def add_remote_service(self):
|
||||
guardrail_in = MicroService(
|
||||
name="guardrail_in",
|
||||
host=GUARDRAIL_SERVICE_HOST_IP,
|
||||
port=GUARDRAIL_SERVICE_PORT,
|
||||
endpoint="/v1/guardrails",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.GUARDRAIL,
|
||||
)
|
||||
embedding = MicroService(
|
||||
name="embedding",
|
||||
host=EMBEDDING_SERVICE_HOST_IP,
|
||||
port=EMBEDDING_SERVICE_PORT,
|
||||
endpoint="/v1/embeddings",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
)
|
||||
retriever = MicroService(
|
||||
name="retriever",
|
||||
host=RETRIEVER_SERVICE_HOST_IP,
|
||||
port=RETRIEVER_SERVICE_PORT,
|
||||
endpoint="/v1/retrieval",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RETRIEVER,
|
||||
)
|
||||
rerank = MicroService(
|
||||
name="rerank",
|
||||
host=RERANK_SERVICE_HOST_IP,
|
||||
port=RERANK_SERVICE_PORT,
|
||||
endpoint="/v1/reranking",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RERANK,
|
||||
)
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVICE_HOST_IP,
|
||||
port=LLM_SERVICE_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
# guardrail_out = MicroService(
|
||||
# name="guardrail_out",
|
||||
# host=GUARDRAIL_SERVICE_HOST_IP,
|
||||
# port=GUARDRAIL_SERVICE_PORT,
|
||||
# endpoint="/v1/guardrails",
|
||||
# use_remote_service=True,
|
||||
# service_type=ServiceType.GUARDRAIL,
|
||||
# )
|
||||
# self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm).add(guardrail_out)
|
||||
self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm)
|
||||
self.megaservice.flow_to(guardrail_in, embedding)
|
||||
self.megaservice.flow_to(embedding, retriever)
|
||||
self.megaservice.flow_to(retriever, rerank)
|
||||
self.megaservice.flow_to(rerank, llm)
|
||||
# self.megaservice.flow_to(llm, guardrail_out)
|
||||
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
|
||||
chatqna.add_remote_service()
|
||||
@@ -1,275 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
|
||||
|
||||
class ChatTemplate:
|
||||
@staticmethod
|
||||
def generate_rag_prompt(question, documents):
|
||||
context_str = "\n".join(documents)
|
||||
if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3:
|
||||
# chinese context
|
||||
template = """
|
||||
### 你将扮演一个乐于助人、尊重他人并诚实的助手,你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案,请避免分享不准确的信息。
|
||||
### 搜索结果:{context}
|
||||
### 问题:{question}
|
||||
### 回答:
|
||||
"""
|
||||
else:
|
||||
template = """
|
||||
### You are a helpful, respectful and honest assistant to help the user with questions. \
|
||||
Please refer to the search results obtained from the local knowledge base. \
|
||||
But be careful to not incorporate the information that you think is not relevant to the question. \
|
||||
If you don't know the answer to a question, please don't share false information. \n
|
||||
### Search results: {context} \n
|
||||
### Question: {question} \n
|
||||
### Answer:
|
||||
"""
|
||||
return template.format(context=context_str, question=question)
|
||||
|
||||
|
||||
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
# EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
|
||||
# EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
|
||||
# RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
|
||||
# RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
|
||||
# RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
|
||||
# RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
|
||||
# LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
|
||||
# LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
|
||||
EMBEDDING_SERVER_HOST_IP = os.getenv("EMBEDDING_SERVER_HOST_IP", "0.0.0.0")
|
||||
EMBEDDING_SERVER_PORT = int(os.getenv("EMBEDDING_SERVER_PORT", 6006))
|
||||
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
|
||||
RERANK_SERVER_HOST_IP = os.getenv("RERANK_SERVER_HOST_IP", "0.0.0.0")
|
||||
RERANK_SERVER_PORT = int(os.getenv("RERANK_SERVER_PORT", 8808))
|
||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 9009))
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
|
||||
inputs["inputs"] = inputs["text"]
|
||||
del inputs["text"]
|
||||
elif self.services[cur_node].service_type == ServiceType.RETRIEVER:
|
||||
# prepare the retriever params
|
||||
retriever_parameters = kwargs.get("retriever_parameters", None)
|
||||
if retriever_parameters:
|
||||
inputs.update(retriever_parameters.dict())
|
||||
elif self.services[cur_node].service_type == ServiceType.LLM:
|
||||
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
|
||||
next_inputs = {}
|
||||
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
|
||||
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
|
||||
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
|
||||
next_inputs["top_p"] = llm_parameters_dict["top_p"]
|
||||
next_inputs["stream"] = inputs["streaming"]
|
||||
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
|
||||
next_inputs["presence_penalty"] = inputs["presence_penalty"]
|
||||
next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
|
||||
next_inputs["temperature"] = inputs["temperature"]
|
||||
inputs = next_inputs
|
||||
|
||||
return inputs
|
||||
|
||||
|
||||
def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
next_data = {}
|
||||
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
|
||||
assert isinstance(data, list)
|
||||
next_data = {"text": inputs["inputs"], "embedding": data[0]}
|
||||
elif self.services[cur_node].service_type == ServiceType.RETRIEVER:
|
||||
|
||||
docs = [doc["text"] for doc in data["retrieved_docs"]]
|
||||
|
||||
with_rerank = runtime_graph.downstream(cur_node)[0].startswith("rerank")
|
||||
if with_rerank and docs:
|
||||
# forward to rerank
|
||||
# prepare inputs for rerank
|
||||
next_data["query"] = data["initial_query"]
|
||||
next_data["texts"] = [doc["text"] for doc in data["retrieved_docs"]]
|
||||
else:
|
||||
# forward to llm
|
||||
if not docs and with_rerank:
|
||||
# delete the rerank from retriever -> rerank -> llm
|
||||
for ds in reversed(runtime_graph.downstream(cur_node)):
|
||||
for nds in runtime_graph.downstream(ds):
|
||||
runtime_graph.add_edge(cur_node, nds)
|
||||
runtime_graph.delete_node_if_exists(ds)
|
||||
|
||||
# handle template
|
||||
# if user provides template, then format the prompt with it
|
||||
# otherwise, use the default template
|
||||
prompt = data["initial_query"]
|
||||
chat_template = llm_parameters_dict["chat_template"]
|
||||
if chat_template:
|
||||
prompt_template = PromptTemplate.from_template(chat_template)
|
||||
input_variables = prompt_template.input_variables
|
||||
if sorted(input_variables) == ["context", "question"]:
|
||||
prompt = prompt_template.format(question=data["initial_query"], context="\n".join(docs))
|
||||
elif input_variables == ["question"]:
|
||||
prompt = prompt_template.format(question=data["initial_query"])
|
||||
else:
|
||||
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
|
||||
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
|
||||
else:
|
||||
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
|
||||
|
||||
next_data["inputs"] = prompt
|
||||
|
||||
elif self.services[cur_node].service_type == ServiceType.RERANK:
|
||||
# rerank the inputs with the scores
|
||||
reranker_parameters = kwargs.get("reranker_parameters", None)
|
||||
top_n = reranker_parameters.top_n if reranker_parameters else 1
|
||||
docs = inputs["texts"]
|
||||
reranked_docs = []
|
||||
for best_response in data[:top_n]:
|
||||
reranked_docs.append(docs[best_response["index"]])
|
||||
|
||||
# handle template
|
||||
# if user provides template, then format the prompt with it
|
||||
# otherwise, use the default template
|
||||
prompt = inputs["query"]
|
||||
chat_template = llm_parameters_dict["chat_template"]
|
||||
if chat_template:
|
||||
prompt_template = PromptTemplate.from_template(chat_template)
|
||||
input_variables = prompt_template.input_variables
|
||||
if sorted(input_variables) == ["context", "question"]:
|
||||
prompt = prompt_template.format(question=prompt, context="\n".join(docs))
|
||||
elif input_variables == ["question"]:
|
||||
prompt = prompt_template.format(question=prompt)
|
||||
else:
|
||||
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
|
||||
prompt = ChatTemplate.generate_rag_prompt(prompt, docs)
|
||||
else:
|
||||
prompt = ChatTemplate.generate_rag_prompt(prompt, docs)
|
||||
|
||||
next_data["inputs"] = prompt
|
||||
|
||||
return next_data
|
||||
|
||||
|
||||
def align_generator(self, gen, **kwargs):
|
||||
# openai reaponse format
|
||||
# b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
|
||||
for line in gen:
|
||||
line = line.decode("utf-8")
|
||||
start = line.find("{")
|
||||
end = line.rfind("}") + 1
|
||||
|
||||
json_str = line[start:end]
|
||||
try:
|
||||
# sometimes yield empty chunk, do a fallback here
|
||||
json_data = json.loads(json_str)
|
||||
if json_data["choices"][0]["finish_reason"] != "eos_token":
|
||||
yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
|
||||
except Exception as e:
|
||||
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
|
||||
class ChatQnAService:
|
||||
def __init__(self, host="0.0.0.0", port=8000):
|
||||
self.host = host
|
||||
self.port = port
|
||||
ServiceOrchestrator.align_inputs = align_inputs
|
||||
ServiceOrchestrator.align_outputs = align_outputs
|
||||
ServiceOrchestrator.align_generator = align_generator
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
|
||||
def add_remote_service(self):
|
||||
|
||||
embedding = MicroService(
|
||||
name="embedding",
|
||||
host=EMBEDDING_SERVER_HOST_IP,
|
||||
port=EMBEDDING_SERVER_PORT,
|
||||
endpoint="/embed",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
)
|
||||
|
||||
retriever = MicroService(
|
||||
name="retriever",
|
||||
host=RETRIEVER_SERVICE_HOST_IP,
|
||||
port=RETRIEVER_SERVICE_PORT,
|
||||
endpoint="/v1/retrieval",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RETRIEVER,
|
||||
)
|
||||
|
||||
rerank = MicroService(
|
||||
name="rerank",
|
||||
host=RERANK_SERVER_HOST_IP,
|
||||
port=RERANK_SERVER_PORT,
|
||||
endpoint="/rerank",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RERANK,
|
||||
)
|
||||
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVER_HOST_IP,
|
||||
port=LLM_SERVER_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
self.megaservice.add(embedding).add(retriever).add(rerank).add(llm)
|
||||
self.megaservice.flow_to(embedding, retriever)
|
||||
self.megaservice.flow_to(retriever, rerank)
|
||||
self.megaservice.flow_to(rerank, llm)
|
||||
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
def add_remote_service_without_rerank(self):
|
||||
|
||||
embedding = MicroService(
|
||||
name="embedding",
|
||||
host=EMBEDDING_SERVER_HOST_IP,
|
||||
port=EMBEDDING_SERVER_PORT,
|
||||
endpoint="/embed",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
)
|
||||
|
||||
retriever = MicroService(
|
||||
name="retriever",
|
||||
host=RETRIEVER_SERVICE_HOST_IP,
|
||||
port=RETRIEVER_SERVICE_PORT,
|
||||
endpoint="/v1/retrieval",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RETRIEVER,
|
||||
)
|
||||
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVER_HOST_IP,
|
||||
port=LLM_SERVER_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
self.megaservice.add(embedding).add(retriever).add(llm)
|
||||
self.megaservice.flow_to(embedding, retriever)
|
||||
self.megaservice.flow_to(retriever, llm)
|
||||
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--without-rerank", action="store_true")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
|
||||
if args.without_rerank:
|
||||
chatqna.add_remote_service_without_rerank()
|
||||
else:
|
||||
chatqna.add_remote_service()
|
||||
@@ -1,57 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
|
||||
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
|
||||
|
||||
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
|
||||
EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
|
||||
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
|
||||
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
|
||||
|
||||
|
||||
class ChatQnAService:
|
||||
def __init__(self, host="0.0.0.0", port=8000):
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
|
||||
def add_remote_service(self):
|
||||
embedding = MicroService(
|
||||
name="embedding",
|
||||
host=EMBEDDING_SERVICE_HOST_IP,
|
||||
port=EMBEDDING_SERVICE_PORT,
|
||||
endpoint="/v1/embeddings",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
)
|
||||
retriever = MicroService(
|
||||
name="retriever",
|
||||
host=RETRIEVER_SERVICE_HOST_IP,
|
||||
port=RETRIEVER_SERVICE_PORT,
|
||||
endpoint="/v1/retrieval",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RETRIEVER,
|
||||
)
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVICE_HOST_IP,
|
||||
port=LLM_SERVICE_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
self.megaservice.add(embedding).add(retriever).add(llm)
|
||||
self.megaservice.flow_to(embedding, retriever)
|
||||
self.megaservice.flow_to(retriever, llm)
|
||||
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
|
||||
chatqna.add_remote_service()
|
||||
35
ChatQnA/docker_compose/install_docker.sh
Normal file
35
ChatQnA/docker_compose/install_docker.sh
Normal file
@@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Update the package index
|
||||
sudo apt-get -y update
|
||||
|
||||
# Install prerequisites
|
||||
sudo apt-get -y install ca-certificates curl
|
||||
|
||||
# Create the directory for the Docker GPG key
|
||||
sudo install -m 0755 -d /etc/apt/keyrings
|
||||
|
||||
# Add Docker's official GPG key
|
||||
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
|
||||
|
||||
# Set permissions for the GPG key
|
||||
sudo chmod a+r /etc/apt/keyrings/docker.asc
|
||||
|
||||
# Add Docker repository to the sources list
|
||||
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
|
||||
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
|
||||
# Update the package index with Docker packages
|
||||
sudo apt-get -y update
|
||||
|
||||
# Install Docker packages
|
||||
sudo apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
|
||||
|
||||
# add existing user
|
||||
sudo usermod -aG docker $USER
|
||||
|
||||
# Optional: Verify that Docker is installed correctly
|
||||
sudo docker --version
|
||||
@@ -2,78 +2,174 @@
|
||||
|
||||
This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on AIPC. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
We use [Ollama](https://ollama.com/) as our LLM service for AIPC.
|
||||
|
||||
Please follow the instructions to set up Ollama on your PC. This will set the entrypoint needed for the Ollama to suit the ChatQnA examples.
|
||||
|
||||
### Set Up Ollama LLM Service
|
||||
|
||||
#### Install Ollama Service
|
||||
|
||||
Install Ollama service with one command:
|
||||
|
||||
```
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
```
|
||||
|
||||
#### Set Ollama Service Configuration
|
||||
|
||||
Ollama Service Configuration file is /etc/systemd/system/ollama.service. Edit the file to set OLLAMA_HOST environment.
|
||||
Replace **<host_ip>** with your host IPV4 (please use external public IP). For example the host_ip is 10.132.x.y, then `Environment="OLLAMA_HOST=10.132.x.y:11434"'.
|
||||
|
||||
```
|
||||
Environment="OLLAMA_HOST=host_ip:11434"
|
||||
```
|
||||
|
||||
#### Set https_proxy environment for Ollama
|
||||
|
||||
If your system access network through proxy, add https_proxy in Ollama Service Configuration file
|
||||
|
||||
```
|
||||
Environment="https_proxy=Your_HTTPS_Proxy"
|
||||
```
|
||||
|
||||
#### Restart Ollama services
|
||||
|
||||
```
|
||||
$ sudo systemctl daemon-reload
|
||||
$ sudo systemctl restart ollama.service
|
||||
```
|
||||
|
||||
#### Check the service started
|
||||
|
||||
```
|
||||
netstat -tuln | grep 11434
|
||||
```
|
||||
|
||||
The output are:
|
||||
|
||||
```
|
||||
tcp 0 0 10.132.x.y:11434 0.0.0.0:* LISTEN
|
||||
```
|
||||
|
||||
#### Pull Ollama LLM model
|
||||
|
||||
Run the command to download LLM models. The <host_ip> is the one set in [Ollama Service Configuration](#Set-Ollama-Service-Configuration)
|
||||
|
||||
```
|
||||
export host_ip=<host_ip>
|
||||
export OLLAMA_HOST=http://${host_ip}:11434
|
||||
ollama pull llama3.2
|
||||
```
|
||||
|
||||
After downloaded the models, you can list the models by `ollama list`.
|
||||
|
||||
The output should be similar to the following:
|
||||
|
||||
```
|
||||
NAME ID SIZE MODIFIED
|
||||
llama3.2:latest a80c4f17acd5 2.0 GB 2 minutes ago
|
||||
```
|
||||
|
||||
### Consume Ollama LLM Service
|
||||
|
||||
Access ollama service to verify that the ollama is functioning correctly.
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:11434/api/generate -d '{"model": "llama3.2", "prompt":"What is Deep Learning?"}'
|
||||
```
|
||||
|
||||
The outputs are similar to these:
|
||||
|
||||
```
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.098813868Z","response":"Deep","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.124514468Z","response":" learning","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.149754216Z","response":" is","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.180420784Z","response":" a","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.229185873Z","response":" subset","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.263956118Z","response":" of","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.289097354Z","response":" machine","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.316838918Z","response":" learning","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.342309506Z","response":" that","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.367221264Z","response":" involves","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.39205893Z","response":" the","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.417933974Z","response":" use","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.443110388Z","response":" of","done":false}
|
||||
...
|
||||
```
|
||||
|
||||
## 🚀 Build Docker Images
|
||||
|
||||
First of all, you need to build Docker Images locally and install the python package of it.
|
||||
|
||||
```bash
|
||||
mkdir ~/OPEA -p
|
||||
cd ~/OPEA
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
```
|
||||
|
||||
### 1. Build Embedding Image
|
||||
If you are in a proxy environment, set the proxy-related environment variables:
|
||||
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
|
||||
### 1. Build Retriever Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/embedding-tei:latest -f comps/embeddings/tei/langchain/Dockerfile .
|
||||
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build Retriever Image
|
||||
### 2 Build LLM Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/retriever-redis:latest -f comps/retrievers/redis/langchain/Dockerfile .
|
||||
docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build Rerank Image
|
||||
### 3. Build Dataprep Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/reranking-tei:latest -f comps/reranks/tei/Dockerfile .
|
||||
```
|
||||
|
||||
### 4. Build LLM Image
|
||||
|
||||
We use [Ollama](https://ollama.com/) as our LLM service for AIPC. Please pre-download Ollama on your PC.
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-ollama:latest -f comps/llms/text-generation/ollama/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Build Dataprep Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/dataprep-redis:latest -f comps/dataprep/redis/langchain/Dockerfile .
|
||||
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
|
||||
cd ..
|
||||
```
|
||||
|
||||
### 6. Build MegaService Docker Image
|
||||
### 4. Build MegaService Docker Image
|
||||
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:
|
||||
|
||||
```bash
|
||||
cd ~/OPEA
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
cd GenAIExamples/ChatQnA
|
||||
docker build --no-cache -t opea/chatqna:latest -f Dockerfile .
|
||||
cd ../../..
|
||||
docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||
```
|
||||
|
||||
### 7. Build UI Docker Image
|
||||
### 5. Build UI Docker Image
|
||||
|
||||
Build frontend Docker image via below command:
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/ui
|
||||
docker build --no-cache -t opea/chatqna-ui:latest -f ./docker/Dockerfile .
|
||||
cd ../../../..
|
||||
cd ~/OPEA/GenAIExamples/ChatQnA/ui
|
||||
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have the following 7 Docker Images:
|
||||
### 6. Build Nginx Docker Image
|
||||
|
||||
```bash
|
||||
cd GenAIComps
|
||||
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have the following 6 Docker Images:
|
||||
|
||||
1. `opea/dataprep-redis:latest`
|
||||
2. `opea/embedding-tei:latest`
|
||||
3. `opea/retriever-redis:latest`
|
||||
4. `opea/reranking-tei:latest`
|
||||
5. `opea/llm-ollama:latest`
|
||||
6. `opea/chatqna:latest`
|
||||
7. `opea/chatqna-ui:latest`
|
||||
2. `opea/retriever-redis:latest`
|
||||
3. `opea/llm-ollama:latest`
|
||||
4. `opea/chatqna:latest`
|
||||
5. `opea/chatqna-ui:latest`
|
||||
6. `opea/nginx:latest`
|
||||
|
||||
## 🚀 Start Microservices
|
||||
|
||||
@@ -113,21 +209,10 @@ export http_proxy=${your_http_proxy}
|
||||
export https_proxy=${your_http_proxy}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
||||
|
||||
export OLLAMA_ENDPOINT=http://${host_ip}:11434
|
||||
export OLLAMA_MODEL="llama3"
|
||||
export OLLAMA_MODEL="llama3.2"
|
||||
```
|
||||
|
||||
- Windows PC
|
||||
@@ -135,21 +220,10 @@ export OLLAMA_MODEL="llama3"
|
||||
```bash
|
||||
set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
|
||||
set RERANK_MODEL_ID=BAAI/bge-reranker-base
|
||||
set TEI_EMBEDDING_ENDPOINT=http://%host_ip%:6006
|
||||
set TEI_RERANKING_ENDPOINT=http://%host_ip%:8808
|
||||
set REDIS_URL=redis://%host_ip%:6379
|
||||
set INDEX_NAME=rag-redis
|
||||
set HUGGINGFACEHUB_API_TOKEN=%your_hf_api_token%
|
||||
set MEGA_SERVICE_HOST_IP=%host_ip%
|
||||
set EMBEDDING_SERVICE_HOST_IP=%host_ip%
|
||||
set RETRIEVER_SERVICE_HOST_IP=%host_ip%
|
||||
set RERANK_SERVICE_HOST_IP=%host_ip%
|
||||
set LLM_SERVICE_HOST_IP=%host_ip%
|
||||
set BACKEND_SERVICE_ENDPOINT=http://%host_ip%:8888/v1/chatqna
|
||||
set DATAPREP_SERVICE_ENDPOINT=http://%host_ip%:6007/v1/dataprep
|
||||
|
||||
set OLLAMA_ENDPOINT=http://host.docker.internal:11434
|
||||
set OLLAMA_MODEL="llama3"
|
||||
set OLLAMA_MODEL="llama3.2"
|
||||
```
|
||||
|
||||
Note: Please replace with `host_ip` with you external IP address, do not use localhost.
|
||||
@@ -159,14 +233,8 @@ Note: Please replace with `host_ip` with you external IP address, do not use loc
|
||||
> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/aipc/
|
||||
cd ~/OPEA/GenAIExamples/ChatQnA/docker_compose/intel/cpu/aipc/
|
||||
docker compose up -d
|
||||
|
||||
# let ollama service runs
|
||||
# e.g. ollama run llama3
|
||||
OLLAMA_HOST=${host_ip}:11434 ollama run $OLLAMA_MODEL
|
||||
# for windows
|
||||
# ollama run %OLLAMA_MODEL%
|
||||
```
|
||||
|
||||
### Validate Microservices
|
||||
@@ -183,16 +251,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
2. Embedding Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:6000/v1/embeddings\
|
||||
-X POST \
|
||||
-d '{"text":"hello"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. Retriever Microservice
|
||||
2. Retriever Microservice
|
||||
To validate the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:
|
||||
|
||||
```bash
|
||||
@@ -203,7 +262,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
4. TEI Reranking Service
|
||||
3. TEI Reranking Service
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8808/rerank \
|
||||
@@ -212,22 +271,13 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
5. Reranking Microservice
|
||||
4. Ollama Service
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8000/v1/reranking\
|
||||
-X POST \
|
||||
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
curl http://${host_ip}:11434/api/generate -d '{"model": "llama3.2", "prompt":"What is Deep Learning?"}'
|
||||
```
|
||||
|
||||
6. Ollama Service
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:11434/api/generate -d '{"model": "llama3", "prompt":"What is Deep Learning?"}'
|
||||
```
|
||||
|
||||
7. LLM Microservice
|
||||
5. LLM Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:9000/v1/chat/completions\
|
||||
@@ -236,37 +286,51 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
8. MegaService
|
||||
6. MegaService
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
|
||||
"messages": "What is the revenue of Nike in 2023?", "model": "'"${OLLAMA_MODEL}"'"
|
||||
"messages": "What is the revenue of Nike in 2023?"
|
||||
}'
|
||||
```
|
||||
|
||||
9. Dataprep Microservice(Optional)
|
||||
7. Upload RAG Files through Dataprep Microservice (Optional)
|
||||
|
||||
If you want to update the default knowledge base, you can use the following commands:
|
||||
To chat with retrieved information, you need to upload a file using Dataprep service.
|
||||
|
||||
Update Knowledge Base via Local File Upload:
|
||||
Here is an example of Nike 2023 pdf file.
|
||||
|
||||
```bash
|
||||
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F "files=@./nke-10k-2023.pdf"
|
||||
```
|
||||
```bash
|
||||
# download pdf file
|
||||
wget https://raw.githubusercontent.com/opea-project/GenAIComps/main/comps/retrievers/redis/data/nke-10k-2023.pdf
|
||||
|
||||
This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
|
||||
# upload pdf file with dataprep
|
||||
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F "files=@./nke-10k-2023.pdf"
|
||||
```
|
||||
|
||||
Add Knowledge Base via HTTP Links:
|
||||
This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
|
||||
|
||||
```bash
|
||||
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F 'link_list=["https://opea.dev"]'
|
||||
```
|
||||
Alternatively, you can add knowledge base via HTTP Links:
|
||||
|
||||
This command updates a knowledge base by submitting a list of HTTP links for processing.
|
||||
```bash
|
||||
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F 'link_list=["https://opea.dev"]'
|
||||
```
|
||||
|
||||
This command updates a knowledge base by submitting a list of HTTP links for processing.
|
||||
|
||||
To check the uploaded files, you are able to get the file list that uploaded:
|
||||
|
||||
```bash
|
||||
curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
|
||||
-H "Content-Type: application/json"
|
||||
```
|
||||
|
||||
the output is:
|
||||
`[{"name":"nke-10k-2023.pdf","id":"nke-10k-2023.pdf","type":"File","parent":""}]`
|
||||
|
||||
## 🚀 Launch the UI
|
||||
|
||||
|
||||
@@ -13,15 +13,17 @@ services:
|
||||
container_name: dataprep-redis-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6007:6007"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
@@ -36,20 +38,6 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
@@ -62,9 +50,11 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
@@ -82,23 +72,6 @@ services:
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
container_name: reranking-tei-aipc-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
ports:
|
||||
- "8000:8000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-ollama
|
||||
container_name: llm-ollama
|
||||
@@ -109,7 +82,6 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
@@ -120,11 +92,10 @@ services:
|
||||
container_name: chatqna-aipc-backend-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- dataprep-redis-service
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- reranking
|
||||
- llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
@@ -132,11 +103,15 @@ services:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
- MEGA_SERVICE_HOST_IP=chaqna-aipc-backend-server
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=80
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- RERANK_SERVER_HOST_IP=tei-reranking-service
|
||||
- RERANK_SERVER_PORT=80
|
||||
- LLM_SERVER_HOST_IP=llm
|
||||
- LLM_SERVER_PORT=9000
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
chaqna-aipc-ui-server:
|
||||
@@ -150,8 +125,27 @@ services:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
chaqna-aipc-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: chaqna-aipc-nginx-server
|
||||
depends_on:
|
||||
- chaqna-aipc-backend-server
|
||||
- chaqna-aipc-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
|
||||
- FRONTEND_SERVICE_PORT=5173
|
||||
- BACKEND_SERVICE_NAME=chatqna
|
||||
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
|
||||
- BACKEND_SERVICE_PORT=8888
|
||||
- DATAPREP_SERVICE_IP=dataprep-redis-service
|
||||
- DATAPREP_SERVICE_PORT=6007
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
|
||||
20
ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh
Normal file
20
ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh
Normal file
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
if [ -z "${your_hf_api_token}" ]; then
|
||||
echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set your_hf_api_token."
|
||||
fi
|
||||
|
||||
if [ -z "${host_ip}" ]; then
|
||||
echo "Error: host_ip is not set. Please set host_ip first."
|
||||
fi
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export OLLAMA_ENDPOINT=http://${host_ip}:11434
|
||||
export OLLAMA_MODEL="llama3.2"
|
||||
@@ -97,61 +97,20 @@ After launching your instance, you can connect to it using SSH (for Linux instan
|
||||
|
||||
First of all, you need to build Docker Images locally and install the python package of it.
|
||||
|
||||
### 1. Build Embedding Image
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build Retriever Image
|
||||
### 1. Build Retriever Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build Rerank Image
|
||||
|
||||
> Skip for ChatQnA without Rerank pipeline
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
|
||||
```
|
||||
|
||||
### 4. Build LLM Image
|
||||
|
||||
#### Use TGI as backend
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
```
|
||||
|
||||
#### Use vLLM as backend
|
||||
|
||||
Build vLLM docker.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/vllm-project/vllm.git
|
||||
cd ./vllm/
|
||||
docker build --no-cache -t opea/vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.cpu .
|
||||
cd ..
|
||||
```
|
||||
|
||||
Build microservice.
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Build Dataprep Image
|
||||
### 2. Build Dataprep Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
|
||||
cd ..
|
||||
```
|
||||
|
||||
### 6. Build MegaService Docker Image
|
||||
### 3. Build MegaService Docker Image
|
||||
|
||||
1. MegaService with Rerank
|
||||
|
||||
@@ -173,7 +132,7 @@ cd ..
|
||||
docker build --no-cache -t opea/chatqna-without-rerank:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.without_rerank .
|
||||
```
|
||||
|
||||
### 7. Build UI Docker Image
|
||||
### 4. Build UI Docker Image
|
||||
|
||||
Build frontend Docker image via below command:
|
||||
|
||||
@@ -182,7 +141,7 @@ cd GenAIExamples/ChatQnA/ui
|
||||
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
|
||||
```
|
||||
|
||||
### 8. Build Conversational React UI Docker Image (Optional)
|
||||
### 5. Build Conversational React UI Docker Image (Optional)
|
||||
|
||||
Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command:
|
||||
|
||||
@@ -193,23 +152,20 @@ cd GenAIExamples/ChatQnA/ui
|
||||
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
|
||||
```
|
||||
|
||||
### 9. Build Nginx Docker Image
|
||||
### 6. Build Nginx Docker Image
|
||||
|
||||
```bash
|
||||
cd GenAIComps
|
||||
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have the following 8 Docker Images:
|
||||
Then run the command `docker images`, you will have the following 5 Docker Images:
|
||||
|
||||
1. `opea/dataprep-redis:latest`
|
||||
2. `opea/embedding-tei:latest`
|
||||
3. `opea/retriever-redis:latest`
|
||||
4. `opea/reranking-tei:latest`
|
||||
5. `opea/llm-tgi:latest` or `opea/llm-vllm:latest`
|
||||
6. `opea/chatqna:latest` or `opea/chatqna-without-rerank:latest`
|
||||
7. `opea/chatqna-ui:latest`
|
||||
8. `opea/nginx:latest`
|
||||
2. `opea/retriever-redis:latest`
|
||||
3. `opea/chatqna:latest` or `opea/chatqna-without-rerank:latest`
|
||||
4. `opea/chatqna-ui:latest`
|
||||
5. `opea/nginx:latest`
|
||||
|
||||
## 🚀 Start Microservices
|
||||
|
||||
@@ -315,16 +271,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
2. Embedding Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:6000/v1/embeddings\
|
||||
-X POST \
|
||||
-d '{"text":"hello"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. Retriever Microservice
|
||||
2. Retriever Microservice
|
||||
|
||||
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
|
||||
is determined by the embedding model.
|
||||
@@ -340,7 +287,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
4. TEI Reranking Service
|
||||
3. TEI Reranking Service
|
||||
|
||||
> Skip for ChatQnA without Rerank pipeline
|
||||
|
||||
@@ -351,18 +298,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
5. Reranking Microservice
|
||||
|
||||
> Skip for ChatQnA without Rerank pipeline
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8000/v1/reranking\
|
||||
-X POST \
|
||||
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
6. LLM backend Service
|
||||
4. LLM backend Service
|
||||
|
||||
In first startup, this service will take more time to download the model files. After it's finished, the service will be ready.
|
||||
|
||||
@@ -395,31 +331,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-d '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}'
|
||||
```
|
||||
|
||||
7. LLM Microservice
|
||||
|
||||
This service depends on above LLM backend service startup. It will be ready after long time, to wait for them being ready in first startup.
|
||||
|
||||
```bash
|
||||
# TGI service
|
||||
curl http://${host_ip}:9000/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
For parameters in TGI modes, please refer to [HuggingFace InferenceClient API](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation) (except we rename "max_new_tokens" to "max_tokens".)
|
||||
|
||||
```bash
|
||||
# vLLM Service
|
||||
curl http://${host_ip}:9000/v1/chat/completions \
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":1,"temperature":0.7,"frequency_penalty":0,"presence_penalty":0, "streaming":false}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
For parameters in vLLM modes, can refer to [LangChain VLLMOpenAI API](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.vllm.VLLMOpenAI.html)
|
||||
|
||||
8. MegaService
|
||||
5. MegaService
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
|
||||
@@ -427,7 +339,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
}'
|
||||
```
|
||||
|
||||
9. Nginx Service
|
||||
6. Nginx Service
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \
|
||||
@@ -435,7 +347,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-d '{"messages": "What is the revenue of Nike in 2023?"}'
|
||||
```
|
||||
|
||||
10. Dataprep Microservice(Optional)
|
||||
7. Dataprep Microservice(Optional)
|
||||
|
||||
If you want to update the default knowledge base, you can use the following commands:
|
||||
|
||||
|
||||
@@ -70,38 +70,20 @@ git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
```
|
||||
|
||||
### 1. Build Embedding Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build Retriever Image
|
||||
### 1. Build Retriever Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/retriever-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/qdrant/haystack/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build Rerank Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .`
|
||||
```
|
||||
|
||||
### 4. Build LLM Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Build Dataprep Image
|
||||
### 2. Build Dataprep Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/dataprep-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/qdrant/langchain/Dockerfile .
|
||||
cd ..
|
||||
```
|
||||
|
||||
### 6. Build MegaService Docker Image
|
||||
### 3. Build MegaService Docker Image
|
||||
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:
|
||||
|
||||
@@ -112,7 +94,7 @@ docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_pr
|
||||
cd ../../..
|
||||
```
|
||||
|
||||
### 7. Build UI Docker Image
|
||||
### 4. Build UI Docker Image
|
||||
|
||||
Build frontend Docker image via below command:
|
||||
|
||||
@@ -122,7 +104,7 @@ docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https
|
||||
cd ../../../..
|
||||
```
|
||||
|
||||
### 8. Build Conversational React UI Docker Image (Optional)
|
||||
### 5. Build Conversational React UI Docker Image (Optional)
|
||||
|
||||
Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command:
|
||||
|
||||
@@ -136,15 +118,20 @@ docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https
|
||||
cd ../../../..
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have the following 7 Docker Images:
|
||||
### 6. Build Nginx Docker Image
|
||||
|
||||
```bash
|
||||
cd GenAIComps
|
||||
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have the following 5 Docker Images:
|
||||
|
||||
1. `opea/dataprep-qdrant:latest`
|
||||
2. `opea/embedding-tei:latest`
|
||||
3. `opea/retriever-qdrant:latest`
|
||||
4. `opea/reranking-tei:latest`
|
||||
5. `opea/llm-tgi:latest`
|
||||
6. `opea/chatqna:latest`
|
||||
7. `opea/chatqna-ui:latest`
|
||||
2. `opea/retriever-qdrant:latest`
|
||||
3. `opea/chatqna:latest`
|
||||
4. `opea/chatqna-ui:latest`
|
||||
5. `opea/nginx:latest`
|
||||
|
||||
## 🚀 Start Microservices
|
||||
|
||||
@@ -193,20 +180,7 @@ export https_proxy=${your_http_proxy}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6040"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:6041"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:6042"
|
||||
export QDRANT_HOST=${host_ip}
|
||||
export QDRANT_PORT=6333
|
||||
export INDEX_NAME="rag-qdrant"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8912/v1/chatqna"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6043/v1/dataprep"
|
||||
```
|
||||
|
||||
Note: Please replace with `host_ip` with you external IP address, do not use localhost.
|
||||
@@ -234,16 +208,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
2. Embedding Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:6044/v1/embeddings\
|
||||
-X POST \
|
||||
-d '{"text":"hello"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. Retriever Microservice
|
||||
2. Retriever Microservice
|
||||
|
||||
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
|
||||
is determined by the embedding model.
|
||||
@@ -259,7 +224,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
4. TEI Reranking Service
|
||||
3. TEI Reranking Service
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:6041/rerank \
|
||||
@@ -268,16 +233,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
5. Reranking Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:6046/v1/reranking\
|
||||
-X POST \
|
||||
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
6. TGI Service
|
||||
4. TGI Service
|
||||
|
||||
In first startup, this service will take more time to download the model files. After it's finished, the service will be ready.
|
||||
|
||||
@@ -302,16 +258,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
7. LLM Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:6047/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
8. MegaService
|
||||
5. MegaService
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8912/v1/chatqna -H "Content-Type: application/json" -d '{
|
||||
@@ -319,7 +266,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
}'
|
||||
```
|
||||
|
||||
9. Dataprep Microservice(Optional)
|
||||
6. Dataprep Microservice(Optional)
|
||||
|
||||
If you want to update the default knowledge base, you can use the following commands:
|
||||
|
||||
|
||||
@@ -20,10 +20,10 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_HOST: ${REDIS_HOST}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
@@ -38,20 +38,6 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
@@ -64,9 +50,10 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
@@ -85,23 +72,6 @@ services:
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
container_name: reranking-tei-xeon-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
ports:
|
||||
- "8000:8000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
container_name: tgi-service
|
||||
@@ -118,83 +88,65 @@ services:
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
chaqna-xeon-backend-server:
|
||||
chatqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
container_name: chatqna-xeon-backend-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- dataprep-redis-service
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- reranking
|
||||
- tgi-service
|
||||
- llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
- MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- RERANK_SERVER_HOST_IP=tei-reranking-service
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=tgi-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
chaqna-xeon-ui-server:
|
||||
chatqna-xeon-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-xeon-ui-server
|
||||
depends_on:
|
||||
- chaqna-xeon-backend-server
|
||||
- chatqna-xeon-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
|
||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
chaqna-xeon-nginx-server:
|
||||
chatqna-xeon-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: chaqna-xeon-nginx-server
|
||||
container_name: chatqna-xeon-nginx-server
|
||||
depends_on:
|
||||
- chaqna-xeon-backend-server
|
||||
- chaqna-xeon-ui-server
|
||||
- chatqna-xeon-backend-server
|
||||
- chatqna-xeon-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
|
||||
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
|
||||
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
|
||||
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
|
||||
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
|
||||
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
|
||||
- FRONTEND_SERVICE_PORT=5173
|
||||
- BACKEND_SERVICE_NAME=chatqna
|
||||
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
|
||||
- BACKEND_SERVICE_PORT=8888
|
||||
- DATAPREP_SERVICE_IP=dataprep-redis-service
|
||||
- DATAPREP_SERVICE_PORT=6007
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
|
||||
@@ -1,184 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
redis-vector-db:
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
container_name: redis-vector-db
|
||||
ports:
|
||||
- "6379:6379"
|
||||
- "8001:8001"
|
||||
dataprep-redis-service:
|
||||
image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
|
||||
container_name: dataprep-redis-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6007:6007"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_HOST: ${REDIS_HOST}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-embedding-server
|
||||
ports:
|
||||
- "6006:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
# embedding:
|
||||
# image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
# container_name: embedding-tei-server
|
||||
# depends_on:
|
||||
# - tei-embedding-service
|
||||
# ports:
|
||||
# - "6000:6000"
|
||||
# ipc: host
|
||||
# environment:
|
||||
# no_proxy: ${no_proxy}
|
||||
# http_proxy: ${http_proxy}
|
||||
# https_proxy: ${https_proxy}
|
||||
# TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
# restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
ports:
|
||||
- "7000:7000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
# reranking:
|
||||
# image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
# container_name: reranking-tei-xeon-server
|
||||
# depends_on:
|
||||
# - tei-reranking-service
|
||||
# ports:
|
||||
# - "8000:8000"
|
||||
# ipc: host
|
||||
# environment:
|
||||
# no_proxy: ${no_proxy}
|
||||
# http_proxy: ${http_proxy}
|
||||
# https_proxy: ${https_proxy}
|
||||
# TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
# HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
# HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
# HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
# restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "9009:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
# llm:
|
||||
# image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
# container_name: llm-tgi-server
|
||||
# depends_on:
|
||||
# - tgi-service
|
||||
# ports:
|
||||
# - "9000:9000"
|
||||
# ipc: host
|
||||
# environment:
|
||||
# no_proxy: ${no_proxy}
|
||||
# http_proxy: ${http_proxy}
|
||||
# https_proxy: ${https_proxy}
|
||||
# TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
# HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
# HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
# HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
# restart: unless-stopped
|
||||
chaqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-no-wrapper:${TAG:-latest}
|
||||
container_name: chatqna-xeon-backend-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
# - embedding
|
||||
- dataprep-redis-service
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
# - reranking
|
||||
- tgi-service
|
||||
# - llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVER_HOST_IP}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RERANK_SERVER_HOST_IP=${RERANK_SERVER_HOST_IP}
|
||||
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
|
||||
ipc: host
|
||||
restart: always
|
||||
chaqna-xeon-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-xeon-ui-server
|
||||
depends_on:
|
||||
- chaqna-xeon-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
|
||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
@@ -20,10 +20,10 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
QDRANT_HOST: ${QDRANT_HOST}
|
||||
QDRANT_HOST: qdrant-vector-db
|
||||
QDRANT_PORT: 6333
|
||||
COLLECTION_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
@@ -38,20 +38,6 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6044:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-qdrant:${TAG:-latest}
|
||||
container_name: retriever-qdrant-server
|
||||
@@ -64,9 +50,10 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
QDRANT_HOST: ${QDRANT_HOST}
|
||||
QDRANT_HOST: qdrant-vector-db
|
||||
QDRANT_PORT: 6333
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
@@ -84,23 +71,6 @@ services:
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
container_name: reranking-tei-xeon-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
ports:
|
||||
- "6046:8000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
container_name: tgi-service
|
||||
@@ -117,67 +87,65 @@ services:
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "6047:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
chaqna-xeon-backend-server:
|
||||
chatqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
container_name: chatqna-xeon-backend-server
|
||||
depends_on:
|
||||
- qdrant-vector-db
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- reranking
|
||||
- tgi-service
|
||||
- llm
|
||||
ports:
|
||||
- "8912:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_PORT=${EMBEDDING_SERVICE_PORT}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RETRIEVER_SERVICE_PORT=${RETRIEVER_SERVICE_PORT}
|
||||
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
|
||||
- RERANK_SERVICE_PORT=${RERANK_SERVICE_PORT}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
|
||||
- MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- RETRIEVER_SERVICE_PORT=${RETRIEVER_SERVICE_PORT:-7000}
|
||||
- RERANK_SERVER_HOST_IP=tei-reranking-service
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=tgi-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
chaqna-xeon-ui-server:
|
||||
chatqna-xeon-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-xeon-ui-server
|
||||
depends_on:
|
||||
- chaqna-xeon-backend-server
|
||||
- chatqna-xeon-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
|
||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-xeon-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: chatqna-xeon-nginx-server
|
||||
depends_on:
|
||||
- chatqna-xeon-backend-server
|
||||
- chatqna-xeon-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
|
||||
- FRONTEND_SERVICE_PORT=5173
|
||||
- BACKEND_SERVICE_NAME=chatqna
|
||||
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
|
||||
- BACKEND_SERVICE_PORT=8888
|
||||
- DATAPREP_SERVICE_IP=dataprep-qdrant-service
|
||||
- DATAPREP_SERVICE_PORT=6007
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
|
||||
@@ -20,9 +20,10 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
@@ -37,23 +38,6 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-embedding-service"
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
@@ -66,12 +50,10 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-retriever-service"
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
@@ -90,26 +72,6 @@ services:
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
container_name: reranking-tei-xeon-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
ports:
|
||||
- "8000:8000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-reranking-service"
|
||||
restart: unless-stopped
|
||||
vllm_service:
|
||||
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
|
||||
container_name: vllm-service
|
||||
@@ -125,67 +87,64 @@ services:
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
|
||||
container_name: llm-vllm-server
|
||||
depends_on:
|
||||
- vllm_service
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
vLLM_ENDPOINT: ${vLLM_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_MODEL: ${LLM_MODEL_ID}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-llm-service"
|
||||
restart: unless-stopped
|
||||
chaqna-xeon-backend-server:
|
||||
chatqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
container_name: chatqna-xeon-backend-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- reranking
|
||||
- vllm_service
|
||||
- llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
- MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- RERANK_SERVER_HOST_IP=tei-reranking-service
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=vllm_service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
chaqna-xeon-ui-server:
|
||||
chatqna-xeon-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-xeon-ui-server
|
||||
depends_on:
|
||||
- chaqna-xeon-backend-server
|
||||
- chatqna-xeon-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
|
||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-xeon-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: chatqna-xeon-nginx-server
|
||||
depends_on:
|
||||
- chatqna-xeon-backend-server
|
||||
- chatqna-xeon-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
|
||||
- FRONTEND_SERVICE_PORT=5173
|
||||
- BACKEND_SERVICE_NAME=chatqna
|
||||
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
|
||||
- BACKEND_SERVICE_PORT=8888
|
||||
- DATAPREP_SERVICE_IP=dataprep-redis-service
|
||||
- DATAPREP_SERVICE_PORT=6007
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user