Compare commits
1 Commits
helmcharts
...
AISEprofil
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
09fa201b30 |
8
.github/CODEOWNERS
vendored
8
.github/CODEOWNERS
vendored
@@ -1,17 +1,13 @@
|
||||
/AgentQnA/ kaokao.lv@intel.com
|
||||
/AgentQnA/ xuhui.ren@intel.com
|
||||
/AudioQnA/ sihan.chen@intel.com
|
||||
/ChatQnA/ liang1.lv@intel.com
|
||||
/CodeGen/ liang1.lv@intel.com
|
||||
/CodeTrans/ sihan.chen@intel.com
|
||||
/DocSum/ letong.han@intel.com
|
||||
/DocIndexRetriever/ kaokao.lv@intel.com chendi.xue@intel.com
|
||||
/InstructionTuning xinyu.ye@intel.com
|
||||
/RerankFinetuning xinyu.ye@intel.com
|
||||
/MultimodalQnA tiep.le@intel.com
|
||||
/DocIndexRetriever/ xuhui.ren@intel.com chendi.xue@intel.com
|
||||
/FaqGen/ xinyao.wang@intel.com
|
||||
/SearchQnA/ sihan.chen@intel.com
|
||||
/Translation/ liang1.lv@intel.com
|
||||
/VisualQnA/ liang1.lv@intel.com
|
||||
/ProductivitySuite/ hoong.tee.yeoh@intel.com
|
||||
/VideoQnA huiling.bao@intel.com
|
||||
/*/ liang1.lv@intel.com
|
||||
|
||||
6
.github/workflows/_example-workflow.yml
vendored
6
.github/workflows/_example-workflow.yml
vendored
@@ -12,10 +12,6 @@ on:
|
||||
example:
|
||||
required: true
|
||||
type: string
|
||||
services:
|
||||
default: ""
|
||||
required: false
|
||||
type: string
|
||||
tag:
|
||||
default: "latest"
|
||||
required: false
|
||||
@@ -81,7 +77,6 @@ jobs:
|
||||
with:
|
||||
work_dir: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
|
||||
docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
|
||||
service_list: ${{ inputs.services }}
|
||||
registry: ${OPEA_IMAGE_REPO}opea
|
||||
tag: ${{ inputs.tag }}
|
||||
|
||||
@@ -110,6 +105,7 @@ jobs:
|
||||
example: ${{ inputs.example }}
|
||||
hardware: ${{ inputs.node }}
|
||||
tag: ${{ inputs.tag }}
|
||||
context: "CD"
|
||||
secrets: inherit
|
||||
|
||||
####################################################################################################
|
||||
|
||||
8
.github/workflows/_manifest-e2e.yml
vendored
8
.github/workflows/_manifest-e2e.yml
vendored
@@ -20,6 +20,11 @@ on:
|
||||
description: "Tag to apply to images, default is latest"
|
||||
required: false
|
||||
type: string
|
||||
context:
|
||||
default: "CI"
|
||||
description: "CI or CD"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
manifest-test:
|
||||
@@ -46,7 +51,7 @@ jobs:
|
||||
|
||||
- name: Set variables
|
||||
run: |
|
||||
echo "IMAGE_REPO=${OPEA_IMAGE_REPO}opea" >> $GITHUB_ENV
|
||||
echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
|
||||
echo "IMAGE_TAG=${{ inputs.tag }}" >> $GITHUB_ENV
|
||||
lower_example=$(echo "${{ inputs.example }}" | tr '[:upper:]' '[:lower:]')
|
||||
echo "NAMESPACE=$lower_example-$(tr -dc a-z0-9 </dev/urandom | head -c 16)" >> $GITHUB_ENV
|
||||
@@ -55,6 +60,7 @@ jobs:
|
||||
echo "continue_test=true" >> $GITHUB_ENV
|
||||
echo "should_cleanup=false" >> $GITHUB_ENV
|
||||
echo "skip_validate=true" >> $GITHUB_ENV
|
||||
echo "CONTEXT=${{ inputs.context }}" >> $GITHUB_ENV
|
||||
echo "NAMESPACE=$NAMESPACE"
|
||||
|
||||
- name: Kubectl install
|
||||
|
||||
1
.github/workflows/_run-docker-compose.yml
vendored
1
.github/workflows/_run-docker-compose.yml
vendored
@@ -118,7 +118,6 @@ jobs:
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
|
||||
PINECONE_KEY_LANGCHAIN_TEST: ${{ secrets.PINECONE_KEY_LANGCHAIN_TEST }}
|
||||
IMAGE_REPO: ${{ inputs.registry }}
|
||||
IMAGE_TAG: ${{ inputs.tag }}
|
||||
example: ${{ inputs.example }}
|
||||
|
||||
59
.github/workflows/manual-image-build.yml
vendored
59
.github/workflows/manual-image-build.yml
vendored
@@ -1,59 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Build specific images on manual event
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
nodes:
|
||||
default: "gaudi,xeon"
|
||||
description: "Hardware to run test"
|
||||
required: true
|
||||
type: string
|
||||
example:
|
||||
default: "ChatQnA"
|
||||
description: 'Build images belong to which example?'
|
||||
required: true
|
||||
type: string
|
||||
services:
|
||||
default: "chatqna,chatqna-without-rerank"
|
||||
description: 'Service list to build'
|
||||
required: true
|
||||
type: string
|
||||
tag:
|
||||
default: "latest"
|
||||
description: "Tag to apply to images"
|
||||
required: true
|
||||
type: string
|
||||
opea_branch:
|
||||
default: "main"
|
||||
description: 'OPEA branch for image build'
|
||||
required: false
|
||||
type: string
|
||||
jobs:
|
||||
get-test-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
nodes: ${{ steps.get-matrix.outputs.nodes }}
|
||||
steps:
|
||||
- name: Create Matrix
|
||||
id: get-matrix
|
||||
run: |
|
||||
nodes=($(echo ${{ inputs.nodes }} | tr ',' ' '))
|
||||
nodes_json=$(printf '%s\n' "${nodes[@]}" | sort -u | jq -R '.' | jq -sc '.')
|
||||
echo "nodes=$nodes_json" >> $GITHUB_OUTPUT
|
||||
|
||||
image-build:
|
||||
needs: get-test-matrix
|
||||
strategy:
|
||||
matrix:
|
||||
node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_example-workflow.yml
|
||||
with:
|
||||
node: ${{ matrix.node }}
|
||||
example: ${{ inputs.example }}
|
||||
services: ${{ inputs.services }}
|
||||
tag: ${{ inputs.tag }}
|
||||
opea_branch: ${{ inputs.opea_branch }}
|
||||
secrets: inherit
|
||||
50
.github/workflows/pr-bum_list_check.yml
vendored
Normal file
50
.github/workflows/pr-bum_list_check.yml
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Check Requirements
|
||||
|
||||
on: [pull_request]
|
||||
|
||||
jobs:
|
||||
check-requirements:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout PR branch
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Save PR requirements
|
||||
run: |
|
||||
find . -name "requirements.txt" -exec cat {} \; | \
|
||||
grep -v '^\s*#' | \
|
||||
grep -v '^\s*$' | \
|
||||
grep -v '^\s*-' | \
|
||||
sed 's/^\s*//' | \
|
||||
awk -F'[>=<]' '{print $1}' | \
|
||||
sort -u > pr-requirements.txt
|
||||
cat pr-requirements.txt
|
||||
|
||||
- name: Checkout main branch
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: main
|
||||
path: main-branch
|
||||
|
||||
- name: Save main branch requirements
|
||||
run: |
|
||||
find ./main-branch -name "requirements.txt" -exec cat {} \; | \
|
||||
grep -v '^\s*#' | \
|
||||
grep -v '^\s*$' | \
|
||||
grep -v '^\s*-' | \
|
||||
sed 's/^\s*//' | \
|
||||
awk -F'[>=<]' '{print $1}' | \
|
||||
sort -u > main-requirements.txt
|
||||
cat main-requirements.txt
|
||||
|
||||
- name: Compare requirements
|
||||
run: |
|
||||
comm -23 pr-requirements.txt main-requirements.txt > added-packages.txt
|
||||
if [ -s added-packages.txt ]; then
|
||||
echo "New packages found in PR:" && cat added-packages.txt
|
||||
else
|
||||
echo "No new packages found😊."
|
||||
fi
|
||||
2
.github/workflows/pr-manifest-e2e.yml
vendored
2
.github/workflows/pr-manifest-e2e.yml
vendored
@@ -8,8 +8,6 @@ on:
|
||||
branches: ["main", "*rc"]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- "**/Dockerfile**"
|
||||
- "**.py"
|
||||
- "**/kubernetes/**/manifests/**"
|
||||
- "**/tests/test_manifest**"
|
||||
- "!**.md"
|
||||
|
||||
88
.github/workflows/pr-path-detection.yml
vendored
88
.github/workflows/pr-path-detection.yml
vendored
@@ -50,40 +50,28 @@ jobs:
|
||||
|
||||
- name: Checkout Repo GenAIExamples
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check the Validity of Hyperlinks
|
||||
run: |
|
||||
cd ${{github.workspace}}
|
||||
fail="FALSE"
|
||||
merged_commit=$(git log -1 --format='%H')
|
||||
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
|
||||
if [ -n "$changed_files" ]; then
|
||||
for changed_file in $changed_files; do
|
||||
echo $changed_file
|
||||
url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIExamples/blob/main') || true
|
||||
if [ -n "$url_lines" ]; then
|
||||
for url_line in $url_lines; do
|
||||
echo $url_line
|
||||
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
|
||||
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
|
||||
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response" -ne 200 ]; then
|
||||
echo "**********Validation failed, try again**********"
|
||||
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response_retry" -eq 200 ]; then
|
||||
echo "*****Retry successfully*****"
|
||||
else
|
||||
echo "Invalid link from ${{github.workspace}}/$path: $url"
|
||||
fail="TRUE"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .)
|
||||
if [ -n "$url_lines" ]; then
|
||||
for url_line in $url_lines; do
|
||||
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
|
||||
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
|
||||
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response" -ne 200 ]; then
|
||||
echo "**********Validation failed, try again**********"
|
||||
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response_retry" -eq 200 ]; then
|
||||
echo "*****Retry successfully*****"
|
||||
else
|
||||
echo "Invalid link from ${{github.workspace}}/$path: $url"
|
||||
fail="TRUE"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
else
|
||||
echo "No changed .md file."
|
||||
fi
|
||||
|
||||
if [[ "$fail" == "TRUE" ]]; then
|
||||
@@ -101,8 +89,6 @@ jobs:
|
||||
|
||||
- name: Checkout Repo GenAIExamples
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Checking Relative Path Validity
|
||||
run: |
|
||||
@@ -116,34 +102,33 @@ jobs:
|
||||
branch="https://github.com/opea-project/GenAIExamples/blob/${{ github.event.pull_request.head.ref }}"
|
||||
fi
|
||||
link_head="https://github.com/opea-project/GenAIExamples/blob/main"
|
||||
|
||||
merged_commit=$(git log -1 --format='%H')
|
||||
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
|
||||
png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http')
|
||||
if [ -n "$png_lines" ]; then
|
||||
for png_line in $png_lines; do
|
||||
refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-)
|
||||
png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1)
|
||||
|
||||
if [[ "${png_path:0:1}" == "/" ]]; then
|
||||
check_path=$png_path
|
||||
elif [[ "$png_path" == *#* ]]; then
|
||||
relative_path=$(echo "$png_path" | cut -d '#' -f1)
|
||||
if [ -n "$relative_path" ]; then
|
||||
check_path=$(dirname "$refer_path")/$relative_path
|
||||
png_path=$(echo "$png_path" | awk -F'#' '{print "#" $2}')
|
||||
else
|
||||
check_path=$refer_path
|
||||
fi
|
||||
check_path=${{github.workspace}}$png_path
|
||||
elif [[ "${png_path:0:1}" == "#" ]]; then
|
||||
check_path=${{github.workspace}}/$refer_path$png_path
|
||||
else
|
||||
check_path=$(dirname "$refer_path")/$png_path
|
||||
check_path=${{github.workspace}}/$(dirname "$refer_path")/$png_path
|
||||
fi
|
||||
|
||||
if [ -e "$check_path" ]; then
|
||||
real_path=$(realpath $check_path)
|
||||
if [[ "$png_line" == *#* ]]; then
|
||||
if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then
|
||||
url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIExamples||')$png_path
|
||||
real_path=$(realpath $check_path)
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Path $png_path in file ${{github.workspace}}/$refer_path does not exist"
|
||||
fail="TRUE"
|
||||
else
|
||||
url=$link_head$(echo "$real_path" | sed 's|.*/GenAIExamples||')
|
||||
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response" -ne 200 ]; then
|
||||
echo "**********Validation failed, try again**********"
|
||||
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response_retry" -eq 200 ]; then
|
||||
echo "*****Retry successfully*****"
|
||||
else
|
||||
echo "Retry failed. Check branch ${{ github.event.pull_request.head.ref }}"
|
||||
url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIExamples||')
|
||||
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev")
|
||||
if [ "$response" -ne 200 ]; then
|
||||
echo "**********Validation failed, try again**********"
|
||||
@@ -155,13 +140,10 @@ jobs:
|
||||
fail="TRUE"
|
||||
fi
|
||||
else
|
||||
echo "Validation succeed $png_line"
|
||||
echo "Check branch ${{ github.event.pull_request.head.ref }} successfully."
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "${{github.workspace}}/$refer_path:$png_path does not exist"
|
||||
fail="TRUE"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
6
.github/workflows/push-image-build.yml
vendored
6
.github/workflows/push-image-build.yml
vendored
@@ -23,10 +23,12 @@ jobs:
|
||||
image-build:
|
||||
needs: job1
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
|
||||
matrix:
|
||||
example: ${{ fromJSON(needs.job1.outputs.run_matrix).include.*.example }}
|
||||
node: ["gaudi","xeon"]
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_example-workflow.yml
|
||||
with:
|
||||
node: ${{ matrix.hardware }}
|
||||
node: ${{ matrix.node }}
|
||||
example: ${{ matrix.example }}
|
||||
secrets: inherit
|
||||
|
||||
@@ -79,7 +79,7 @@ repos:
|
||||
- id: isort
|
||||
|
||||
- repo: https://github.com/PyCQA/docformatter
|
||||
rev: 06907d0
|
||||
rev: v1.7.5
|
||||
hooks:
|
||||
- id: docformatter
|
||||
args: [
|
||||
|
||||
@@ -5,73 +5,6 @@
|
||||
This example showcases a hierarchical multi-agent system for question-answering applications. The architecture diagram is shown below. The supervisor agent interfaces with the user and dispatch tasks to the worker agent and other tools to gather information and come up with answers. The worker agent uses the retrieval tool to generate answers to the queries posted by the supervisor agent. Other tools used by the supervisor agent may include APIs to interface knowledge graphs, SQL databases, external knowledge bases, etc.
|
||||

|
||||
|
||||
The AgentQnA example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
|
||||
|
||||
```mermaid
|
||||
---
|
||||
config:
|
||||
flowchart:
|
||||
nodeSpacing: 400
|
||||
rankSpacing: 100
|
||||
curve: linear
|
||||
themeVariables:
|
||||
fontSize: 50px
|
||||
---
|
||||
flowchart LR
|
||||
%% Colors %%
|
||||
classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef invisible fill:transparent,stroke:transparent;
|
||||
|
||||
%% Subgraphs %%
|
||||
subgraph DocIndexRetriever-MegaService["DocIndexRetriever MegaService "]
|
||||
direction LR
|
||||
EM([Embedding MicroService]):::blue
|
||||
RET([Retrieval MicroService]):::blue
|
||||
RER([Rerank MicroService]):::blue
|
||||
end
|
||||
subgraph UserInput[" User Input "]
|
||||
direction LR
|
||||
a([User Input Query]):::orchid
|
||||
Ingest([Ingest data]):::orchid
|
||||
end
|
||||
AG_REACT([Agent MicroService - react]):::blue
|
||||
AG_RAG([Agent MicroService - rag]):::blue
|
||||
LLM_gen{{LLM Service <br>}}
|
||||
DP([Data Preparation MicroService]):::blue
|
||||
TEI_RER{{Reranking service<br>}}
|
||||
TEI_EM{{Embedding service <br>}}
|
||||
VDB{{Vector DB<br><br>}}
|
||||
R_RET{{Retriever service <br>}}
|
||||
|
||||
|
||||
|
||||
%% Questions interaction
|
||||
direction LR
|
||||
a[User Input Query] --> AG_REACT
|
||||
AG_REACT --> AG_RAG
|
||||
AG_RAG --> DocIndexRetriever-MegaService
|
||||
EM ==> RET
|
||||
RET ==> RER
|
||||
Ingest[Ingest data] --> DP
|
||||
|
||||
%% Embedding service flow
|
||||
direction LR
|
||||
AG_RAG <-.-> LLM_gen
|
||||
AG_REACT <-.-> LLM_gen
|
||||
EM <-.-> TEI_EM
|
||||
RET <-.-> R_RET
|
||||
RER <-.-> TEI_RER
|
||||
|
||||
direction TB
|
||||
%% Vector DB interaction
|
||||
R_RET <-.-> VDB
|
||||
DP <-.-> VDB
|
||||
|
||||
|
||||
```
|
||||
|
||||
### Why Agent for question answering?
|
||||
|
||||
1. Improve relevancy of retrieved context.
|
||||
|
||||
@@ -2,63 +2,6 @@
|
||||
|
||||
AudioQnA is an example that demonstrates the integration of Generative AI (GenAI) models for performing question-answering (QnA) on audio files, with the added functionality of Text-to-Speech (TTS) for generating spoken responses. The example showcases how to convert audio input to text using Automatic Speech Recognition (ASR), generate answers to user queries using a language model, and then convert those answers back to speech using Text-to-Speech (TTS).
|
||||
|
||||
The AudioQnA example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
|
||||
|
||||
```mermaid
|
||||
---
|
||||
config:
|
||||
flowchart:
|
||||
nodeSpacing: 400
|
||||
rankSpacing: 100
|
||||
curve: linear
|
||||
themeVariables:
|
||||
fontSize: 50px
|
||||
---
|
||||
flowchart LR
|
||||
%% Colors %%
|
||||
classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef invisible fill:transparent,stroke:transparent;
|
||||
style AudioQnA-MegaService stroke:#000000
|
||||
|
||||
%% Subgraphs %%
|
||||
subgraph AudioQnA-MegaService["AudioQnA MegaService "]
|
||||
direction LR
|
||||
ASR([ASR MicroService]):::blue
|
||||
LLM([LLM MicroService]):::blue
|
||||
TTS([TTS MicroService]):::blue
|
||||
end
|
||||
subgraph UserInterface[" User Interface "]
|
||||
direction LR
|
||||
a([User Input Query]):::orchid
|
||||
UI([UI server<br>]):::orchid
|
||||
end
|
||||
|
||||
|
||||
|
||||
WSP_SRV{{whisper service<br>}}
|
||||
SPC_SRV{{speecht5 service <br>}}
|
||||
LLM_gen{{LLM Service <br>}}
|
||||
GW([AudioQnA GateWay<br>]):::orange
|
||||
|
||||
|
||||
%% Questions interaction
|
||||
direction LR
|
||||
a[User Audio Query] --> UI
|
||||
UI --> GW
|
||||
GW <==> AudioQnA-MegaService
|
||||
ASR ==> LLM
|
||||
LLM ==> TTS
|
||||
|
||||
%% Embedding service flow
|
||||
direction LR
|
||||
ASR <-.-> WSP_SRV
|
||||
LLM <-.-> LLM_gen
|
||||
TTS <-.-> SPC_SRV
|
||||
|
||||
```
|
||||
|
||||
## Deploy AudioQnA Service
|
||||
|
||||
The AudioQnA service can be deployed on either Intel Gaudi2 or Intel Xeon Scalable Processor.
|
||||
|
||||
@@ -1,98 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import os
|
||||
|
||||
from comps import AudioQnAGateway, MicroService, ServiceOrchestrator, ServiceType
|
||||
|
||||
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
|
||||
WHISPER_SERVER_HOST_IP = os.getenv("WHISPER_SERVER_HOST_IP", "0.0.0.0")
|
||||
WHISPER_SERVER_PORT = int(os.getenv("WHISPER_SERVER_PORT", 7066))
|
||||
GPT_SOVITS_SERVER_HOST_IP = os.getenv("GPT_SOVITS_SERVER_HOST_IP", "0.0.0.0")
|
||||
GPT_SOVITS_SERVER_PORT = int(os.getenv("GPT_SOVITS_SERVER_PORT", 9088))
|
||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888))
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
print(inputs)
|
||||
if self.services[cur_node].service_type == ServiceType.ASR:
|
||||
# {'byte_str': 'UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA'}
|
||||
inputs["audio"] = inputs["byte_str"]
|
||||
del inputs["byte_str"]
|
||||
elif self.services[cur_node].service_type == ServiceType.LLM:
|
||||
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
|
||||
next_inputs = {}
|
||||
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
|
||||
next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
|
||||
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
|
||||
next_inputs["top_p"] = llm_parameters_dict["top_p"]
|
||||
next_inputs["stream"] = inputs["streaming"] # False as default
|
||||
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
|
||||
# next_inputs["presence_penalty"] = inputs["presence_penalty"]
|
||||
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
|
||||
next_inputs["temperature"] = inputs["temperature"]
|
||||
inputs = next_inputs
|
||||
elif self.services[cur_node].service_type == ServiceType.TTS:
|
||||
next_inputs = {}
|
||||
next_inputs["text"] = inputs["choices"][0]["message"]["content"]
|
||||
next_inputs["text_language"] = kwargs["tts_text_language"] if "tts_text_language" in kwargs else "zh"
|
||||
inputs = next_inputs
|
||||
return inputs
|
||||
|
||||
|
||||
def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
if self.services[cur_node].service_type == ServiceType.TTS:
|
||||
audio_base64 = base64.b64encode(data).decode("utf-8")
|
||||
return {"byte_str": audio_base64}
|
||||
return data
|
||||
|
||||
|
||||
class AudioQnAService:
|
||||
def __init__(self, host="0.0.0.0", port=8000):
|
||||
self.host = host
|
||||
self.port = port
|
||||
ServiceOrchestrator.align_inputs = align_inputs
|
||||
ServiceOrchestrator.align_outputs = align_outputs
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
|
||||
def add_remote_service(self):
|
||||
asr = MicroService(
|
||||
name="asr",
|
||||
host=WHISPER_SERVER_HOST_IP,
|
||||
port=WHISPER_SERVER_PORT,
|
||||
# endpoint="/v1/audio/transcriptions",
|
||||
endpoint="/v1/asr",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.ASR,
|
||||
)
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVER_HOST_IP,
|
||||
port=LLM_SERVER_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
tts = MicroService(
|
||||
name="tts",
|
||||
host=GPT_SOVITS_SERVER_HOST_IP,
|
||||
port=GPT_SOVITS_SERVER_PORT,
|
||||
# endpoint="/v1/audio/speech",
|
||||
endpoint="/",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.TTS,
|
||||
)
|
||||
self.megaservice.add(asr).add(llm).add(tts)
|
||||
self.megaservice.flow_to(asr, llm)
|
||||
self.megaservice.flow_to(llm, tts)
|
||||
self.gateway = AudioQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
audioqna = AudioQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
|
||||
audioqna.add_remote_service()
|
||||
@@ -1,4 +1,4 @@
|
||||
# AudioQnA Accuracy
|
||||
# AudioQnA accuracy Evaluation
|
||||
|
||||
AudioQnA is an example that demonstrates the integration of Generative AI (GenAI) models for performing question-answering (QnA) on audio scene, which contains Automatic Speech Recognition (ASR) and Text-to-Speech (TTS). The following is the piepline for evaluating the ASR accuracy.
|
||||
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
python online_evaluate.py
|
||||
@@ -127,13 +127,9 @@ curl http://${host_ip}:3002/v1/audio/speech \
|
||||
|
||||
## 🚀 Test MegaService
|
||||
|
||||
Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the
|
||||
base64 string to the megaservice endpoint. The megaservice will return a spoken response as a base64 string. To listen
|
||||
to the response, decode the base64 string and save it as a .wav file.
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:3008/v1/audioqna \
|
||||
-X POST \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
|
||||
-H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
@@ -1,64 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
whisper-service:
|
||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||
container_name: whisper-service
|
||||
ports:
|
||||
- "7066:7066"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
command: --language "zh"
|
||||
gpt-sovits-service:
|
||||
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
|
||||
container_name: gpt-sovits-service
|
||||
ports:
|
||||
- "9880:9880"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "3006:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
audioqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/audioqna-multilang:${TAG:-latest}
|
||||
container_name: audioqna-xeon-backend-server
|
||||
ports:
|
||||
- "3008:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
|
||||
- LLM_MODEL_ID=${LLM_MODEL_ID}
|
||||
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
|
||||
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
|
||||
- GPT_SOVITS_SERVER_HOST_IP=${GPT_SOVITS_SERVER_HOST_IP}
|
||||
- GPT_SOVITS_SERVER_PORT=${GPT_SOVITS_SERVER_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
@@ -79,8 +79,6 @@ export LLM_SERVICE_PORT=3007
|
||||
|
||||
## 🚀 Start the MegaService
|
||||
|
||||
> **_NOTE:_** Users will need at least three Gaudi cards for AudioQnA.
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/AudioQnA/docker_compose/intel/hpu/gaudi/
|
||||
docker compose up -d
|
||||
@@ -129,13 +127,9 @@ curl http://${host_ip}:3002/v1/audio/speech \
|
||||
|
||||
## 🚀 Test MegaService
|
||||
|
||||
Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the
|
||||
base64 string to the megaservice endpoint. The megaservice will return a spoken response as a base64 string. To listen
|
||||
to the response, decode the base64 string and save it as a .wav file.
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:3008/v1/audioqna \
|
||||
-X POST \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
|
||||
-H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
@@ -53,9 +53,3 @@ services:
|
||||
dockerfile: comps/tts/speecht5/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
gpt-sovits:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/tts/gpt-sovits/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
|
||||
|
||||
@@ -19,8 +19,7 @@ RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
|
||||
pip install --no-cache-dir langchain_core
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||
|
||||
COPY ./chatqna.py /home/user/chatqna.py
|
||||
|
||||
|
||||
@@ -19,10 +19,9 @@ RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
|
||||
pip install --no-cache-dir langchain_core
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||
|
||||
COPY ./chatqna.py /home/user/chatqna.py
|
||||
COPY ./chatqna_guardrails.py /home/user/chatqna_guardrails.py
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
|
||||
|
||||
@@ -32,4 +31,4 @@ WORKDIR /home/user
|
||||
|
||||
RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
|
||||
|
||||
ENTRYPOINT ["python", "chatqna.py", "--with-guardrails"]
|
||||
ENTRYPOINT ["python", "chatqna_guardrails.py"]
|
||||
|
||||
@@ -8,6 +8,7 @@ FROM python:3.11-slim
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
vim \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
@@ -19,9 +20,10 @@ RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
|
||||
pip install --no-cache-dir langchain_core
|
||||
|
||||
COPY ./audioqna_multilang.py /home/user/audioqna_multilang.py
|
||||
COPY ./chatqna_no_wrapper.py /home/user/chatqna_no_wrapper.py
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
|
||||
|
||||
@@ -29,4 +31,4 @@ USER user
|
||||
|
||||
WORKDIR /home/user
|
||||
|
||||
ENTRYPOINT ["python", "audioqna_multilang.py"]
|
||||
ENTRYPOINT ["python", "chatqna_no_wrapper.py"]
|
||||
34
ChatQnA/Dockerfile.no_wrapper_without_rerank
Normal file
34
ChatQnA/Dockerfile.no_wrapper_without_rerank
Normal file
@@ -0,0 +1,34 @@
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
FROM python:3.11-slim
|
||||
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
vim \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
mkdir -p /home/user && \
|
||||
chown -R user /home/user/
|
||||
|
||||
WORKDIR /home/user/
|
||||
RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
|
||||
pip install --no-cache-dir langchain_core
|
||||
|
||||
COPY ./chatqna_no_wrapper.py /home/user/chatqna_no_wrapper.py
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
|
||||
|
||||
USER user
|
||||
|
||||
WORKDIR /home/user
|
||||
|
||||
ENTRYPOINT ["python", "chatqna_no_wrapper.py", "--without-rerank"]
|
||||
@@ -6,9 +6,9 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
git \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev
|
||||
libjemalloc-dev \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
mkdir -p /home/user && \
|
||||
@@ -19,10 +19,9 @@ RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
|
||||
pip install --no-cache-dir langchain_core
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||
|
||||
COPY ./chatqna.py /home/user/chatqna.py
|
||||
COPY ./chatqna_without_rerank.py /home/user/chatqna_without_rerank.py
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
|
||||
|
||||
@@ -32,4 +31,4 @@ WORKDIR /home/user
|
||||
|
||||
RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
|
||||
|
||||
ENTRYPOINT ["python", "chatqna.py", "--without-rerank"]
|
||||
ENTRYPOINT ["python", "chatqna_without_rerank.py"]
|
||||
|
||||
@@ -1,170 +0,0 @@
|
||||
# ChatQnA Accuracy
|
||||
|
||||
ChatQnA is a Retrieval-Augmented Generation (RAG) pipeline, which can enhance generative models through external information retrieval.
|
||||
|
||||
For evaluating the accuracy, we use 2 latest published datasets and 10+ metrics which are popular and comprehensive:
|
||||
|
||||
- Dataset
|
||||
- [MultiHop](https://arxiv.org/pdf/2401.15391) (English dataset)
|
||||
- [CRUD](https://arxiv.org/abs/2401.17043) (Chinese dataset)
|
||||
- metrics (measure accuracy of both the context retrieval and response generation)
|
||||
- evaluation for retrieval/reranking
|
||||
- MRR@10
|
||||
- MAP@10
|
||||
- Hits@10
|
||||
- Hits@4
|
||||
- LLM-as-a-Judge
|
||||
- evaluation for the generated response from the end-to-end pipeline
|
||||
- BLEU
|
||||
- ROGUE(L)
|
||||
- LLM-as-a-Judge
|
||||
|
||||
## Prerequisite
|
||||
|
||||
### Environment
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIEval
|
||||
cd GenAIEval
|
||||
pip install -r requirements.txt
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
## MultiHop (English dataset)
|
||||
|
||||
[MultiHop-RAG](https://arxiv.org/pdf/2401.15391): a QA dataset to evaluate retrieval and reasoning across documents with metadata in the RAG pipelines. It contains 2556 queries, with evidence for each query distributed across 2 to 4 documents. The queries also involve document metadata, reflecting complex scenarios commonly found in real-world RAG applications.
|
||||
|
||||
### Launch Service of RAG System
|
||||
|
||||
Please refer to this [guide](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/README.md) to launch the service of `ChatQnA`.
|
||||
|
||||
### Launch Service of LLM-as-a-Judge
|
||||
|
||||
To setup a LLM model, we can use [tgi-gaudi](https://github.com/huggingface/tgi-gaudi) to launch a service. For example, the follow command is to setup the [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) model on 2 Gaudi2 cards:
|
||||
|
||||
```
|
||||
# please set your llm_port and hf_token
|
||||
|
||||
docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2
|
||||
|
||||
# for better performance, set `PREFILL_BATCH_BUCKET_SIZE`, `BATCH_BUCKET_SIZE`, `max-batch-total-tokens`, `max-batch-prefill-tokens`
|
||||
docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} -e PREFILL_BATCH_BUCKET_SIZE=1 -e BATCH_BUCKET_SIZE=8 --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 2048
|
||||
```
|
||||
|
||||
### Prepare Dataset
|
||||
|
||||
We use the evaluation dataset from [MultiHop-RAG](https://github.com/yixuantt/MultiHop-RAG) repo, use the below command to prepare the dataset.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yixuantt/MultiHop-RAG.git
|
||||
```
|
||||
|
||||
### Evaluation
|
||||
|
||||
Use below command to run the evaluation, please note that for the first run, argument `--ingest_docs` should be added in the command to ingest the documents into the vector database, while for the subsequent run, this argument should be omitted. Set `--retrieval_metrics` to get retrieval related metrics (MRR@10/MAP@10/Hits@10/Hits@4). Set `--ragas_metrics` and `--llm_endpoint` to get end-to-end rag pipeline metrics (faithfulness/answer_relevancy/...), which are judged by LLMs. We set `--limits` is 100 as default, which means only 100 examples are evaluated by llm-as-judge as it is very time consuming.
|
||||
|
||||
If you are using docker compose to deploy `ChatQnA` system, you can simply run the evaluation as following:
|
||||
|
||||
```bash
|
||||
python eval_multihop.py --docs_path MultiHop-RAG/dataset/corpus.json --dataset_path MultiHop-RAG/dataset/MultiHopRAG.json --ingest_docs --retrieval_metrics --ragas_metrics --llm_endpoint http://{llm_as_judge_ip}:{llm_as_judge_port}/generate
|
||||
```
|
||||
|
||||
If you are using Kubernetes manifest/helm to deploy `ChatQnA` system, you must specify more arguments as following:
|
||||
|
||||
```bash
|
||||
python eval_multihop.py --docs_path MultiHop-RAG/dataset/corpus.json --dataset_path MultiHop-RAG/dataset/MultiHopRAG.json --ingest_docs --retrieval_metrics --ragas_metrics --llm_endpoint http://{llm_as_judge_ip}:{llm_as_judge_port}/generate --database_endpoint http://{your_dataprep_ip}:{your_dataprep_port}/v1/dataprep --embedding_endpoint http://{your_embedding_ip}:{your_embedding_port}/v1/embeddings --tei_embedding_endpoint http://{your_tei_embedding_ip}:{your_tei_embedding_port} --retrieval_endpoint http://{your_retrieval_ip}:{your_retrieval_port}/v1/retrieval --service_url http://{your_chatqna_ip}:{your_chatqna_port}/v1/chatqna
|
||||
```
|
||||
|
||||
The default values for arguments are:
|
||||
|Argument|Default value|
|
||||
|--------|-------------|
|
||||
|service_url|http://localhost:8888/v1/chatqna|
|
||||
|database_endpoint|http://localhost:6007/v1/dataprep|
|
||||
|embedding_endpoint|http://localhost:6000/v1/embeddings|
|
||||
|tei_embedding_endpoint|http://localhost:8090|
|
||||
|retrieval_endpoint|http://localhost:7000/v1/retrieval|
|
||||
|reranking_endpoint|http://localhost:8000/v1/reranking|
|
||||
|output_dir|./output|
|
||||
|temperature|0.1|
|
||||
|max_new_tokens|1280|
|
||||
|chunk_size|256|
|
||||
|chunk_overlap|100|
|
||||
|search_type|similarity|
|
||||
|retrival_k|10|
|
||||
|fetch_k|20|
|
||||
|lambda_mult|0.5|
|
||||
|dataset_path|None|
|
||||
|docs_path|None|
|
||||
|limits|100|
|
||||
|
||||
You can check arguments details use below command:
|
||||
|
||||
```bash
|
||||
python eval_multihop.py --help
|
||||
```
|
||||
|
||||
## CRUD (Chinese dataset)
|
||||
|
||||
[CRUD-RAG](https://arxiv.org/abs/2401.17043) is a Chinese benchmark for RAG (Retrieval-Augmented Generation) system. This example utilize CRUD-RAG for evaluating the RAG system.
|
||||
|
||||
### Prepare Dataset
|
||||
|
||||
We use the evaluation dataset from [CRUD-RAG](https://github.com/IAAR-Shanghai/CRUD_RAG) repo, use the below command to prepare the dataset.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/IAAR-Shanghai/CRUD_RAG
|
||||
mkdir data/
|
||||
cp CRUD_RAG/data/crud_split/split_merged.json data/
|
||||
cp -r CRUD_RAG/data/80000_docs/ data/
|
||||
python process_crud_dataset.py
|
||||
```
|
||||
|
||||
### Launch Service of RAG System
|
||||
|
||||
Please refer to this [guide](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/README.md) to launch the service of `ChatQnA` system. For Chinese dataset, you should replace the English emebdding and llm model with Chinese, for example, `EMBEDDING_MODEL_ID="BAAI/bge-base-zh-v1.5"` and `LLM_MODEL_ID=Qwen/Qwen2-7B-Instruct`.
|
||||
|
||||
### Evaluation
|
||||
|
||||
Use below command to run the evaluation, please note that for the first run, argument `--ingest_docs` should be added in the command to ingest the documents into the vector database, while for the subsequent run, this argument should be omitted.
|
||||
|
||||
If you are using docker compose to deploy `ChatQnA` system, you can simply run the evaluation as following:
|
||||
|
||||
```bash
|
||||
python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/80000_docs --ingest_docs
|
||||
|
||||
# if you want to get ragas metrics
|
||||
python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/80000_docs --contain_original_data --llm_endpoint "http://{llm_as_judge_ip}:{llm_as_judge_port}" --ragas_metrics
|
||||
```
|
||||
|
||||
If you are using Kubernetes manifest/helm to deploy `ChatQnA` system, you must specify more arguments as following:
|
||||
|
||||
```bash
|
||||
python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/80000_docs --ingest_docs --database_endpoint http://{your_dataprep_ip}:{your_dataprep_port}/v1/dataprep --embedding_endpoint http://{your_embedding_ip}:{your_embedding_port}/v1/embeddings --retrieval_endpoint http://{your_retrieval_ip}:{your_retrieval_port}/v1/retrieval --service_url http://{your_chatqna_ip}:{your_chatqna_port}/v1/chatqna
|
||||
```
|
||||
|
||||
The default values for arguments are:
|
||||
|Argument|Default value|
|
||||
|--------|-------------|
|
||||
|service_url|http://localhost:8888/v1/chatqna|
|
||||
|database_endpoint|http://localhost:6007/v1/dataprep|
|
||||
|embedding_endpoint|http://localhost:6000/v1/embeddings|
|
||||
|retrieval_endpoint|http://localhost:7000/v1/retrieval|
|
||||
|reranking_endpoint|http://localhost:8000/v1/reranking|
|
||||
|output_dir|./output|
|
||||
|temperature|0.1|
|
||||
|max_new_tokens|1280|
|
||||
|chunk_size|256|
|
||||
|chunk_overlap|100|
|
||||
|dataset_path|./data/split_merged.json|
|
||||
|docs_path|./data/80000_docs|
|
||||
|tasks|["question_answering"]|
|
||||
|
||||
You can check arguments details use below command:
|
||||
|
||||
```bash
|
||||
python eval_crud.py --help
|
||||
```
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
This example is mostly adapted from [MultiHop-RAG](https://github.com/yixuantt/MultiHop-RAG) and [CRUD-RAG](https://github.com/IAAR-Shanghai/CRUD_RAG) repo, we thank the authors for their great work!
|
||||
@@ -1,210 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
|
||||
from evals.evaluation.rag_eval import Evaluator
|
||||
from evals.evaluation.rag_eval.template import CRUDTemplate
|
||||
from evals.metrics.ragas import RagasMetric
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class CRUD_Evaluator(Evaluator):
|
||||
def get_ground_truth_text(self, data: dict):
|
||||
if self.task == "summarization":
|
||||
ground_truth_text = data["summary"]
|
||||
elif self.task == "question_answering":
|
||||
ground_truth_text = data["answers"]
|
||||
elif self.task == "continuation":
|
||||
ground_truth_text = data["continuing"]
|
||||
elif self.task == "hallucinated_modified":
|
||||
ground_truth_text = data["hallucinatedMod"]
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Unknown task {self.task}, only support "
|
||||
"summarization, question_answering, continuation and hallucinated_modified."
|
||||
)
|
||||
return ground_truth_text
|
||||
|
||||
def get_query(self, data: dict):
|
||||
if self.task == "summarization":
|
||||
query = data["text"]
|
||||
elif self.task == "question_answering":
|
||||
query = data["questions"]
|
||||
elif self.task == "continuation":
|
||||
query = data["beginning"]
|
||||
elif self.task == "hallucinated_modified":
|
||||
query = data["newsBeginning"]
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Unknown task {self.task}, only support "
|
||||
"summarization, question_answering, continuation and hallucinated_modified."
|
||||
)
|
||||
return query
|
||||
|
||||
def get_document(self, data: dict):
|
||||
if self.task == "summarization":
|
||||
document = data["text"]
|
||||
elif self.task == "question_answering":
|
||||
document = data["news1"]
|
||||
elif self.task == "continuation":
|
||||
document = data["beginning"]
|
||||
elif self.task == "hallucinated_modified":
|
||||
document = data["newsBeginning"]
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Unknown task {self.task}, only support "
|
||||
"summarization, question_answering, continuation and hallucinated_modified."
|
||||
)
|
||||
return document
|
||||
|
||||
def get_template(self):
|
||||
if self.task == "summarization":
|
||||
template = CRUDTemplate.get_summarization_template()
|
||||
elif self.task == "question_answering":
|
||||
template = CRUDTemplate.get_question_answering_template()
|
||||
elif self.task == "continuation":
|
||||
template = CRUDTemplate.get_continuation_template()
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Unknown task {self.task}, only support "
|
||||
"summarization, question_answering, continuation and hallucinated_modified."
|
||||
)
|
||||
return template
|
||||
|
||||
def post_process(self, result):
|
||||
return result.split("<response>")[-1].split("</response>")[0].strip()
|
||||
|
||||
def get_ragas_metrics(self, results, arguments):
|
||||
from langchain_huggingface import HuggingFaceEndpointEmbeddings
|
||||
|
||||
embeddings = HuggingFaceEndpointEmbeddings(model=arguments.tei_embedding_endpoint)
|
||||
|
||||
metric = RagasMetric(
|
||||
threshold=0.5,
|
||||
model=arguments.llm_endpoint,
|
||||
embeddings=embeddings,
|
||||
metrics=["faithfulness", "answer_relevancy"],
|
||||
)
|
||||
|
||||
all_answer_relevancy = 0
|
||||
all_faithfulness = 0
|
||||
ragas_inputs = {
|
||||
"question": [],
|
||||
"answer": [],
|
||||
"ground_truth": [],
|
||||
"contexts": [],
|
||||
}
|
||||
|
||||
valid_results = self.remove_invalid(results["results"])
|
||||
|
||||
for data in tqdm(valid_results):
|
||||
data = data["original_data"]
|
||||
|
||||
query = self.get_query(data)
|
||||
generated_text = data["generated_text"]
|
||||
ground_truth = data["ground_truth_text"]
|
||||
retrieved_documents = data["retrieved_documents"]
|
||||
|
||||
ragas_inputs["question"].append(query)
|
||||
ragas_inputs["answer"].append(generated_text)
|
||||
ragas_inputs["ground_truth"].append(ground_truth)
|
||||
ragas_inputs["contexts"].append(retrieved_documents[:3])
|
||||
|
||||
ragas_metrics = metric.measure(ragas_inputs)
|
||||
return ragas_metrics
|
||||
|
||||
|
||||
def args_parser():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument(
|
||||
"--service_url", type=str, default="http://localhost:8888/v1/chatqna", help="Service URL address."
|
||||
)
|
||||
parser.add_argument("--output_dir", type=str, default="./output", help="Directory to save evaluation results.")
|
||||
parser.add_argument(
|
||||
"--temperature", type=float, default=0.1, help="Controls the randomness of the model's text generation"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max_new_tokens", type=int, default=1280, help="Maximum number of new tokens to be generated by the model"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--chunk_size", type=int, default=256, help="the maximum number of characters that a chunk can contain"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--chunk_overlap",
|
||||
type=int,
|
||||
default=100,
|
||||
help="the number of characters that should overlap between two adjacent chunks",
|
||||
)
|
||||
parser.add_argument("--dataset_path", default="../data/split_merged.json", help="Path to the dataset")
|
||||
parser.add_argument("--docs_path", default="../data/80000_docs", help="Path to the retrieval documents")
|
||||
|
||||
# Retriever related options
|
||||
parser.add_argument("--tasks", default=["question_answering"], nargs="+", help="Task to perform")
|
||||
parser.add_argument("--ingest_docs", action="store_true", help="Whether to ingest documents to vector database")
|
||||
parser.add_argument(
|
||||
"--database_endpoint", type=str, default="http://localhost:6007/v1/dataprep", help="Service URL address."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--embedding_endpoint", type=str, default="http://localhost:6000/v1/embeddings", help="Service URL address."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--retrieval_endpoint", type=str, default="http://localhost:7000/v1/retrieval", help="Service URL address."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tei_embedding_endpoint",
|
||||
type=str,
|
||||
default="http://localhost:8090",
|
||||
help="Service URL address of tei embedding.",
|
||||
)
|
||||
parser.add_argument("--ragas_metrics", action="store_true", help="Whether to compute ragas metrics.")
|
||||
parser.add_argument("--llm_endpoint", type=str, default=None, help="Service URL address.")
|
||||
parser.add_argument(
|
||||
"--show_progress_bar", action="store", default=True, type=bool, help="Whether to show a progress bar"
|
||||
)
|
||||
parser.add_argument("--contain_original_data", action="store_true", help="Whether to contain original data")
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = args_parser()
|
||||
if os.path.isfile(args.dataset_path):
|
||||
with open(args.dataset_path) as f:
|
||||
all_datasets = json.load(f)
|
||||
else:
|
||||
raise FileNotFoundError(f"Evaluation dataset file {args.dataset_path} not exist.")
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
for task in args.tasks:
|
||||
if task == "question_answering":
|
||||
dataset = all_datasets["questanswer_1doc"]
|
||||
elif task == "summarization":
|
||||
dataset = all_datasets["event_summary"]
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Unknown task {task}, only support "
|
||||
"summarization, question_answering, continuation and hallucinated_modified."
|
||||
)
|
||||
output_save_path = os.path.join(args.output_dir, f"{task}.json")
|
||||
evaluator = CRUD_Evaluator(dataset=dataset, output_path=output_save_path, task=task)
|
||||
if args.ingest_docs:
|
||||
CRUD_Evaluator.ingest_docs(args.docs_path, args.database_endpoint, args.chunk_size, args.chunk_overlap)
|
||||
results = evaluator.evaluate(
|
||||
args, show_progress_bar=args.show_progress_bar, contain_original_data=args.contain_original_data
|
||||
)
|
||||
print(results["overall"])
|
||||
if args.ragas_metrics:
|
||||
ragas_metrics = evaluator.get_ragas_metrics(results, args)
|
||||
print(ragas_metrics)
|
||||
print(f"Evaluation results of task {task} saved to {output_save_path}.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,279 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
|
||||
import requests
|
||||
from evals.evaluation.rag_eval import Evaluator
|
||||
from evals.metrics.ragas import RagasMetric
|
||||
from evals.metrics.retrieval import RetrievalBaseMetric
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class MultiHop_Evaluator(Evaluator):
|
||||
def get_ground_truth_text(self, data: dict):
|
||||
return data["answer"]
|
||||
|
||||
def get_query(self, data: dict):
|
||||
return data["query"]
|
||||
|
||||
def get_template(self):
|
||||
return None
|
||||
|
||||
def get_reranked_documents(self, query, docs, arguments):
|
||||
data = {
|
||||
"initial_query": query,
|
||||
"retrieved_docs": [{"text": doc} for doc in docs],
|
||||
"top_n": 10,
|
||||
}
|
||||
headers = {"Content-Type": "application/json"}
|
||||
|
||||
response = requests.post(arguments.reranking_endpoint, data=json.dumps(data), headers=headers)
|
||||
if response.ok:
|
||||
reranked_documents = response.json()["documents"]
|
||||
return reranked_documents
|
||||
else:
|
||||
print(f"Request for retrieval failed due to {response.text}.")
|
||||
return []
|
||||
|
||||
def get_retrieved_documents(self, query, arguments):
|
||||
data = {"text": query}
|
||||
headers = {"Content-Type": "application/json"}
|
||||
response = requests.post(arguments.embedding_endpoint, data=json.dumps(data), headers=headers)
|
||||
if response.ok:
|
||||
embedding = response.json()["embedding"]
|
||||
else:
|
||||
print(f"Request for embedding failed due to {response.text}.")
|
||||
return []
|
||||
data = {
|
||||
"text": query,
|
||||
"embedding": embedding,
|
||||
"search_type": arguments.search_type,
|
||||
"k": arguments.retrival_k,
|
||||
"fetch_k": arguments.fetch_k,
|
||||
"lambda_mult": arguments.lambda_mult,
|
||||
}
|
||||
response = requests.post(arguments.retrieval_endpoint, data=json.dumps(data), headers=headers)
|
||||
if response.ok:
|
||||
retrieved_documents = response.json()["retrieved_docs"]
|
||||
return [doc["text"] for doc in retrieved_documents]
|
||||
else:
|
||||
print(f"Request for retrieval failed due to {response.text}.")
|
||||
return []
|
||||
|
||||
def get_retrieval_metrics(self, all_queries, arguments):
|
||||
print("start to retrieve...")
|
||||
metric = RetrievalBaseMetric()
|
||||
hits_at_10 = 0
|
||||
hits_at_4 = 0
|
||||
map_at_10 = 0
|
||||
mrr_at_10 = 0
|
||||
total = 0
|
||||
for data in tqdm(all_queries):
|
||||
if data["question_type"] == "null_query":
|
||||
continue
|
||||
query = data["query"]
|
||||
retrieved_documents = self.get_retrieved_documents(query, arguments)
|
||||
if arguments.rerank:
|
||||
retrieved_documents = self.get_reranked_documents(query, retrieved_documents, arguments)
|
||||
golden_context = [each["fact"] for each in data["evidence_list"]]
|
||||
test_case = {
|
||||
"input": query,
|
||||
"golden_context": golden_context,
|
||||
"retrieval_context": retrieved_documents,
|
||||
}
|
||||
results = metric.measure(test_case)
|
||||
hits_at_10 += results["Hits@10"]
|
||||
hits_at_4 += results["Hits@4"]
|
||||
map_at_10 += results["MAP@10"]
|
||||
mrr_at_10 += results["MRR@10"]
|
||||
total += 1
|
||||
|
||||
# Calculate average metrics over all queries
|
||||
hits_at_10 = hits_at_10 / total
|
||||
hits_at_4 = hits_at_4 / total
|
||||
map_at_10 = map_at_10 / total
|
||||
mrr_at_10 = mrr_at_10 / total
|
||||
|
||||
return {
|
||||
"Hits@10": hits_at_10,
|
||||
"Hits@4": hits_at_4,
|
||||
"MAP@10": map_at_10,
|
||||
"MRR@10": mrr_at_10,
|
||||
}
|
||||
|
||||
def evaluate(self, all_queries, arguments):
|
||||
results = []
|
||||
accuracy = 0
|
||||
index = 0
|
||||
for data in tqdm(all_queries):
|
||||
if data["question_type"] == "null_query":
|
||||
continue
|
||||
|
||||
generated_text = self.send_request(data, arguments)
|
||||
data["generated_text"] = generated_text
|
||||
|
||||
# same method with paper: https://github.com/yixuantt/MultiHop-RAG/issues/8
|
||||
if data["answer"] in generated_text:
|
||||
accuracy += 1
|
||||
result = {"id": index, **self.scoring(data)}
|
||||
results.append(result)
|
||||
index += 1
|
||||
|
||||
valid_results = self.remove_invalid(results)
|
||||
|
||||
try:
|
||||
overall = self.compute_overall(valid_results) if len(valid_results) > 0 else {}
|
||||
except Exception as e:
|
||||
print(repr(e))
|
||||
overall = dict()
|
||||
|
||||
overall.update({"accuracy": accuracy / len(results)})
|
||||
return overall
|
||||
|
||||
def get_ragas_metrics(self, all_queries, arguments):
|
||||
from langchain_huggingface import HuggingFaceEndpointEmbeddings
|
||||
|
||||
embeddings = HuggingFaceEndpointEmbeddings(model=arguments.tei_embedding_endpoint)
|
||||
|
||||
metric = RagasMetric(threshold=0.5, model=arguments.llm_endpoint, embeddings=embeddings)
|
||||
all_answer_relevancy = 0
|
||||
all_faithfulness = 0
|
||||
ragas_inputs = {
|
||||
"question": [],
|
||||
"answer": [],
|
||||
"ground_truth": [],
|
||||
"contexts": [],
|
||||
}
|
||||
|
||||
for data in tqdm(all_queries):
|
||||
if data["question_type"] == "null_query":
|
||||
continue
|
||||
retrieved_documents = self.get_retrieved_documents(data["query"], arguments)
|
||||
generated_text = self.send_request(data, arguments)
|
||||
data["generated_text"] = generated_text
|
||||
|
||||
ragas_inputs["question"].append(data["query"])
|
||||
ragas_inputs["answer"].append(generated_text)
|
||||
ragas_inputs["ground_truth"].append(data["answer"])
|
||||
ragas_inputs["contexts"].append(retrieved_documents[:3])
|
||||
|
||||
if len(ragas_inputs["question"]) >= arguments.limits:
|
||||
break
|
||||
|
||||
ragas_metrics = metric.measure(ragas_inputs)
|
||||
return ragas_metrics
|
||||
|
||||
|
||||
def args_parser():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument(
|
||||
"--service_url", type=str, default="http://localhost:8888/v1/chatqna", help="Service URL address."
|
||||
)
|
||||
parser.add_argument("--output_dir", type=str, default="./output", help="Directory to save evaluation results.")
|
||||
parser.add_argument(
|
||||
"--temperature", type=float, default=0.1, help="Controls the randomness of the model's text generation"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max_new_tokens", type=int, default=1280, help="Maximum number of new tokens to be generated by the model"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--chunk_size", type=int, default=256, help="the maximum number of characters that a chunk can contain"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--chunk_overlap",
|
||||
type=int,
|
||||
default=100,
|
||||
help="the number of characters that should overlap between two adjacent chunks",
|
||||
)
|
||||
parser.add_argument("--search_type", type=str, default="similarity", help="similarity type")
|
||||
parser.add_argument("--retrival_k", type=int, default=10, help="Number of Documents to return.")
|
||||
parser.add_argument(
|
||||
"--fetch_k", type=int, default=20, help="Number of Documents to fetch to pass to MMR algorithm."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lambda_mult",
|
||||
type=float,
|
||||
default=0.5,
|
||||
help="Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.",
|
||||
)
|
||||
parser.add_argument("--dataset_path", default=None, help="Path to the dataset")
|
||||
parser.add_argument("--docs_path", default=None, help="Path to the retrieval documents")
|
||||
|
||||
# Retriever related options
|
||||
parser.add_argument("--ingest_docs", action="store_true", help="Whether to ingest documents to vector database")
|
||||
parser.add_argument("--retrieval_metrics", action="store_true", help="Whether to compute retrieval metrics.")
|
||||
parser.add_argument("--ragas_metrics", action="store_true", help="Whether to compute ragas metrics.")
|
||||
parser.add_argument("--limits", type=int, default=100, help="Number of examples to be evaluated by llm-as-judge")
|
||||
parser.add_argument(
|
||||
"--database_endpoint", type=str, default="http://localhost:6007/v1/dataprep", help="Service URL address."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--embedding_endpoint", type=str, default="http://localhost:6000/v1/embeddings", help="Service URL address."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tei_embedding_endpoint",
|
||||
type=str,
|
||||
default="http://localhost:8090",
|
||||
help="Service URL address of tei embedding.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--retrieval_endpoint", type=str, default="http://localhost:7000/v1/retrieval", help="Service URL address."
|
||||
)
|
||||
parser.add_argument("--rerank", action="store_true", help="Whether to use rerank microservice.")
|
||||
parser.add_argument(
|
||||
"--reranking_endpoint", type=str, default="http://localhost:8000/v1/reranking", help="Service URL address."
|
||||
)
|
||||
parser.add_argument("--llm_endpoint", type=str, default=None, help="Service URL address.")
|
||||
parser.add_argument(
|
||||
"--show_progress_bar", action="store", default=True, type=bool, help="Whether to show a progress bar"
|
||||
)
|
||||
parser.add_argument("--contain_original_data", action="store_true", help="Whether to contain original data")
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = args_parser()
|
||||
|
||||
evaluator = MultiHop_Evaluator()
|
||||
|
||||
with open(args.docs_path, "r") as file:
|
||||
doc_data = json.load(file)
|
||||
|
||||
documents = []
|
||||
for doc in doc_data:
|
||||
metadata = {"title": doc["title"], "published_at": doc["published_at"], "source": doc["source"]}
|
||||
documents.append(doc["body"])
|
||||
|
||||
# save docs to a tmp file
|
||||
tmp_corpus_file = "tmp_corpus.txt"
|
||||
with open(tmp_corpus_file, "w") as f:
|
||||
for doc in documents:
|
||||
f.write(doc + "\n")
|
||||
|
||||
if args.ingest_docs:
|
||||
evaluator.ingest_docs(tmp_corpus_file, args.database_endpoint, args.chunk_size, args.chunk_overlap)
|
||||
|
||||
with open(args.dataset_path, "r") as file:
|
||||
all_queries = json.load(file)
|
||||
|
||||
# get retrieval quality
|
||||
if args.retrieval_metrics:
|
||||
retrieval_metrics = evaluator.get_retrieval_metrics(all_queries, args)
|
||||
print(retrieval_metrics)
|
||||
|
||||
# get rag quality
|
||||
if args.ragas_metrics:
|
||||
ragas_metrics = evaluator.get_ragas_metrics(all_queries, args)
|
||||
print(ragas_metrics)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,9 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
|
||||
path = os.path.join(os.path.dirname(__file__), "./data/80000_docs")
|
||||
for file in os.listdir(path):
|
||||
src_file = os.path.join(path, file)
|
||||
os.rename(src_file, src_file + ".txt")
|
||||
@@ -1,64 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -x
|
||||
|
||||
function main {
|
||||
|
||||
init_params "$@"
|
||||
# run_benchmark
|
||||
echo $dataset
|
||||
if [[ ${dataset} == "MultiHop" ]]; then
|
||||
run_multihop
|
||||
elif [[ ${dataset} == "crud" ]]; then
|
||||
run_crud
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
# init params
|
||||
function init_params {
|
||||
for var in "$@"
|
||||
do
|
||||
case $var in
|
||||
--dataset=*)
|
||||
dataset=$( echo $var |cut -f2 -d=)
|
||||
;;
|
||||
*)
|
||||
echo "Error: No such parameter: ${var}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
# run_multihop
|
||||
function run_multihop {
|
||||
git clone https://github.com/yixuantt/MultiHop-RAG.git
|
||||
|
||||
python eval_multihop.py \
|
||||
--docs_path MultiHop-RAG/dataset/corpus.json \
|
||||
--dataset_path MultiHop-RAG/dataset/MultiHopRAG.json \
|
||||
--ingest_docs \
|
||||
--retrieval_metrics
|
||||
|
||||
}
|
||||
|
||||
# run_crud
|
||||
function run_crud {
|
||||
|
||||
git clone https://github.com/IAAR-Shanghai/CRUD_RAG
|
||||
mkdir data/
|
||||
cp CRUD_RAG/data/crud_split/split_merged.json data/
|
||||
cp -r CRUD_RAG/data/80000_docs/ data/
|
||||
python process_crud_dataset.py
|
||||
|
||||
python eval_crud.py \
|
||||
--dataset_path ./data/split_merged.json \
|
||||
--docs_path ./data/80000_docs \
|
||||
--ingest_docs
|
||||
}
|
||||
|
||||
|
||||
main "$@"
|
||||
@@ -88,9 +88,22 @@ find . -name '*.yaml' -type f -exec sed -i "s#\$(EMBEDDING_MODEL_ID)#${EMBEDDING
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL_ID}#g" {} \;
|
||||
```
|
||||
|
||||
### Benchmark tool preparation
|
||||
|
||||
The test uses the [benchmark tool](https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark/README.md) to do performance test. We need to set up benchmark tool at the master node of Kubernetes which is k8s-master.
|
||||
|
||||
```bash
|
||||
# on k8s-master node
|
||||
git clone https://github.com/opea-project/GenAIEval.git
|
||||
cd GenAIEval
|
||||
python3 -m venv stress_venv
|
||||
source stress_venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### Test Configurations
|
||||
|
||||
By default, the workload and benchmark configuration is as below:
|
||||
Workload configuration:
|
||||
|
||||
| Key | Value |
|
||||
| -------- | ------- |
|
||||
@@ -176,21 +189,24 @@ curl -X POST "http://${cluster_ip}:6007/v1/dataprep" \
|
||||
|
||||
###### 3.2 Run Benchmark Test
|
||||
|
||||
Before the benchmark, we can configure the number of test queries and test output directory by:
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
|
||||
```bash
|
||||
export DEPLOYMENT_TYPE="k8s"
|
||||
export SERVICE_IP = None
|
||||
export SERVICE_PORT = None
|
||||
export USER_QUERIES="[640, 640, 640, 640]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_1"
|
||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||
```
|
||||
|
||||
And then run the benchmark by:
|
||||
And then run the benchmark tool by:
|
||||
|
||||
```bash
|
||||
bash benchmark.sh -n 1
|
||||
cd GenAIEval/evals/benchmark
|
||||
python benchmark.py
|
||||
```
|
||||
|
||||
The argument `-n` refers to the number of test nodes. Note that necessary dependencies will be automatically installed when running benchmark for the first time.
|
||||
|
||||
##### 4. Data collection
|
||||
|
||||
All the test results will come to this folder `/home/sdp/benchmark_output/node_1` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
|
||||
@@ -226,20 +242,22 @@ kubectl apply -f .
|
||||
|
||||
##### 3. Run tests
|
||||
|
||||
Before the benchmark, we can configure the number of test queries and test output directory by:
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
|
||||
```bash
|
||||
````bash
|
||||
export DEPLOYMENT_TYPE="k8s"
|
||||
export SERVICE_IP = None
|
||||
export SERVICE_PORT = None
|
||||
export USER_QUERIES="[1280, 1280, 1280, 1280]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_2"
|
||||
```
|
||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||
|
||||
And then run the benchmark by:
|
||||
And then run the benchmark tool by:
|
||||
|
||||
```bash
|
||||
bash benchmark.sh -n 2
|
||||
```
|
||||
|
||||
The argument `-n` refers to the number of test nodes. Note that necessary dependencies will be automatically installed when running benchmark for the first time.
|
||||
cd GenAIEval/evals/benchmark
|
||||
python benchmark.py
|
||||
````
|
||||
|
||||
##### 4. Data collection
|
||||
|
||||
@@ -275,21 +293,24 @@ kubectl apply -f .
|
||||
|
||||
##### 3. Run tests
|
||||
|
||||
Before the benchmark, we can configure the number of test queries and test output directory by:
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
|
||||
```bash
|
||||
export DEPLOYMENT_TYPE="k8s"
|
||||
export SERVICE_IP = None
|
||||
export SERVICE_PORT = None
|
||||
export USER_QUERIES="[2560, 2560, 2560, 2560]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_4"
|
||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||
```
|
||||
|
||||
And then run the benchmark by:
|
||||
And then run the benchmark tool by:
|
||||
|
||||
```bash
|
||||
bash benchmark.sh -n 4
|
||||
cd GenAIEval/evals/benchmark
|
||||
python benchmark.py
|
||||
```
|
||||
|
||||
The argument `-n` refers to the number of test nodes. Note that necessary dependencies will be automatically installed when running benchmark for the first time.
|
||||
|
||||
##### 4. Data collection
|
||||
|
||||
All the test results will come to this folder `/home/sdp/benchmark_output/node_4` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
|
||||
@@ -348,21 +369,24 @@ Refer to the [NVIDIA GPU Guide](../../docker_compose/nvidia/gpu/README.md) for m
|
||||
|
||||
### Run tests
|
||||
|
||||
Before the benchmark, we can configure the number of test queries and test output directory by:
|
||||
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
|
||||
|
||||
```bash
|
||||
export DEPLOYMENT_TYPE="docker"
|
||||
export SERVICE_IP = "ChatQnA Service IP"
|
||||
export SERVICE_PORT = "ChatQnA Service Port"
|
||||
export USER_QUERIES="[640, 640, 640, 640]"
|
||||
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/docker"
|
||||
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
|
||||
```
|
||||
|
||||
And then run the benchmark by:
|
||||
And then run the benchmark tool by:
|
||||
|
||||
```bash
|
||||
bash benchmark.sh -d docker -i <service-ip> -p <service-port>
|
||||
cd GenAIEval/evals/benchmark
|
||||
python benchmark.py
|
||||
```
|
||||
|
||||
The argument `-i` and `-p` refer to the deployed ChatQnA service IP and port, respectively. Note that necessary dependencies will be automatically installed when running benchmark for the first time.
|
||||
|
||||
### Data collection
|
||||
|
||||
All the test results will come to this folder `/home/sdp/benchmark_output/docker` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
|
||||
|
||||
@@ -1,99 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
deployment_type="k8s"
|
||||
node_number=1
|
||||
service_port=8888
|
||||
query_per_node=640
|
||||
|
||||
benchmark_tool_path="$(pwd)/GenAIEval"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 [-d deployment_type] [-n node_number] [-i service_ip] [-p service_port]"
|
||||
echo " -d deployment_type ChatQnA deployment type, select between k8s and docker (default: k8s)"
|
||||
echo " -n node_number Test node number, required only for k8s deployment_type, (default: 1)"
|
||||
echo " -i service_ip chatqna service ip, required only for docker deployment_type"
|
||||
echo " -p service_port chatqna service port, required only for docker deployment_type, (default: 8888)"
|
||||
exit 1
|
||||
}
|
||||
|
||||
while getopts ":d:n:i:p:" opt; do
|
||||
case ${opt} in
|
||||
d )
|
||||
deployment_type=$OPTARG
|
||||
;;
|
||||
n )
|
||||
node_number=$OPTARG
|
||||
;;
|
||||
i )
|
||||
service_ip=$OPTARG
|
||||
;;
|
||||
p )
|
||||
service_port=$OPTARG
|
||||
;;
|
||||
\? )
|
||||
echo "Invalid option: -$OPTARG" 1>&2
|
||||
usage
|
||||
;;
|
||||
: )
|
||||
echo "Invalid option: -$OPTARG requires an argument" 1>&2
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ "$deployment_type" == "docker" && -z "$service_ip" ]]; then
|
||||
echo "Error: service_ip is required for docker deployment_type" 1>&2
|
||||
usage
|
||||
fi
|
||||
|
||||
if [[ "$deployment_type" == "k8s" && ( -n "$service_ip" || -n "$service_port" ) ]]; then
|
||||
echo "Warning: service_ip and service_port are ignored for k8s deployment_type" 1>&2
|
||||
fi
|
||||
|
||||
function main() {
|
||||
if [[ ! -d ${benchmark_tool_path} ]]; then
|
||||
echo "Benchmark tool not found, setting up..."
|
||||
setup_env
|
||||
fi
|
||||
run_benchmark
|
||||
}
|
||||
|
||||
function setup_env() {
|
||||
git clone https://github.com/opea-project/GenAIEval.git
|
||||
pushd ${benchmark_tool_path}
|
||||
python3 -m venv stress_venv
|
||||
source stress_venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
popd
|
||||
}
|
||||
|
||||
function run_benchmark() {
|
||||
source ${benchmark_tool_path}/stress_venv/bin/activate
|
||||
export DEPLOYMENT_TYPE=${deployment_type}
|
||||
export SERVICE_IP=${service_ip:-"None"}
|
||||
export SERVICE_PORT=${service_port:-"None"}
|
||||
if [[ -z $USER_QUERIES ]]; then
|
||||
user_query=$((query_per_node*node_number))
|
||||
export USER_QUERIES="[${user_query}, ${user_query}, ${user_query}, ${user_query}]"
|
||||
echo "USER_QUERIES not configured, setting to: ${USER_QUERIES}."
|
||||
fi
|
||||
export WARMUP=$(echo $USER_QUERIES | sed -e 's/[][]//g' -e 's/,.*//')
|
||||
if [[ -z $WARMUP ]]; then export WARMUP=0; fi
|
||||
if [[ -z $TEST_OUTPUT_DIR ]]; then
|
||||
if [[ $DEPLOYMENT_TYPE == "k8s" ]]; then
|
||||
export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/node_${node_number}"
|
||||
else
|
||||
export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/docker"
|
||||
fi
|
||||
echo "TEST_OUTPUT_DIR not configured, setting to: ${TEST_OUTPUT_DIR}."
|
||||
fi
|
||||
|
||||
envsubst < ./benchmark.yaml > ${benchmark_tool_path}/evals/benchmark/benchmark.yaml
|
||||
cd ${benchmark_tool_path}/evals/benchmark
|
||||
python benchmark.py
|
||||
}
|
||||
|
||||
main
|
||||
@@ -6,24 +6,14 @@ test_suite_config: # Overall configuration settings for the test suite
|
||||
deployment_type: ${DEPLOYMENT_TYPE} # Default is "k8s", can also be "docker"
|
||||
service_ip: ${SERVICE_IP} # Leave as None for k8s, specify for Docker
|
||||
service_port: ${SERVICE_PORT} # Leave as None for k8s, specify for Docker
|
||||
warm_ups: ${WARMUP} # Number of test requests for warm-up
|
||||
run_time: 60m # The max total run time for the test suite
|
||||
seed: # The seed for all RNGs
|
||||
concurrent_level: 5 # The concurrency level, adjustable based on requirements
|
||||
user_queries: ${USER_QUERIES} # Number of test requests at each concurrency level
|
||||
query_timeout: 120 # Number of seconds to wait for a simulated user to complete any executing task before exiting. 120 sec by defeult.
|
||||
random_prompt: false # Use random prompts if true, fixed prompts if false
|
||||
run_time: 60m # The max total run time for the test suite
|
||||
collect_service_metric: false # Collect service metrics if true, do not collect service metrics if false
|
||||
data_visualization: false # Generate data visualization if true, do not generate data visualization if false
|
||||
llm_model: "Intel/neural-chat-7b-v3-3" # The LLM model used for the test
|
||||
test_output_dir: "${TEST_OUTPUT_DIR}" # The directory to store the test output
|
||||
load_shape: # Tenant concurrency pattern
|
||||
name: constant # poisson or constant(locust default load shape)
|
||||
params: # Loadshape-specific parameters
|
||||
constant: # Constant load shape specific parameters, activate only if load_shape.name is constant
|
||||
concurrent_level: 5 # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
|
||||
# arrival_rate: 1.0 # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate
|
||||
poisson: # Poisson load shape specific parameters, activate only if load_shape.name is poisson
|
||||
arrival_rate: 1.0 # Request arrival rate
|
||||
|
||||
test_cases:
|
||||
chatqna:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Benchmarking Deployment
|
||||
# ChatQnA Deployment
|
||||
|
||||
This document guides you through deploying this example pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
|
||||
This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
|
||||
|
||||
## Getting Started
|
||||
|
||||
@@ -8,19 +8,31 @@ This document guides you through deploying this example pipelines using Helm cha
|
||||
|
||||
```bash
|
||||
# on k8s-master node
|
||||
cd GenAIExamples/{example_name}/benchmark/performance/helm_charts
|
||||
cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
|
||||
|
||||
# Replace <your token> with your actual Hugging Face token and run the following command:
|
||||
HUGGINGFACE_TOKEN=<your token>
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#\${HF_TOKEN}#${HUGGINGFACE_TOKEN}#g" {} \;
|
||||
|
||||
# Replace the following placeholders with the desired model IDs:
|
||||
LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
|
||||
RERANK_MODEL_ID=BAAI/bge-reranker-base
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#\$(LLM_MODEL_ID)#${LLM_MODEL_ID}#g" {} \;
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#\$(EMBEDDING_MODEL_ID)#${EMBEDDING_MODEL_ID}#g" {} \;
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL_ID}#g" {} \;
|
||||
|
||||
# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
|
||||
# vim hpu_with_rerank.yaml or hpu_without_rerank.yaml
|
||||
HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
|
||||
```
|
||||
|
||||
### Deployment
|
||||
### ChatQnA Installation
|
||||
|
||||
```bash
|
||||
# Options:
|
||||
# --num_nodes choices=[1, 2, 4, 8]
|
||||
# --mode choices=["tuned", "oob"]
|
||||
# --workflow choices=["with_rerank", "without_rerank"]
|
||||
python deployment.py --workflow=with_rerank --mode=tuned --num_nodes=1
|
||||
# Deploy a ChatQnA pipeline using the specified YAML configuration.
|
||||
# To deploy with different configurations, simply provide a different YAML file.
|
||||
helm install chatqna helm_charts/ -f helm_charts/oob_single_node.yaml
|
||||
|
||||
# Tips: To display rendered manifests according to the given yaml.
|
||||
helm template chatqna helm_charts/ -f helm_charts/oob_single_node.yaml
|
||||
```
|
||||
|
||||
Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
podSpecs:
|
||||
- name: chatqna-backend-server-deploy
|
||||
replicas: 2
|
||||
resources:
|
||||
limits:
|
||||
cpu: "8"
|
||||
memory: "8000Mi"
|
||||
requests:
|
||||
cpu: "8"
|
||||
memory: "8000Mi"
|
||||
|
||||
- name: embedding-dependency-deploy
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
cpu: "80"
|
||||
memory: "20000Mi"
|
||||
requests:
|
||||
cpu: "80"
|
||||
memory: "20000Mi"
|
||||
|
||||
- name: reranking-dependency-deploy
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
replicas: 7
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
|
||||
- name: dataprep-deploy
|
||||
replicas: 1
|
||||
|
||||
- name: vector-db
|
||||
replicas: 1
|
||||
|
||||
- name: retriever-deploy
|
||||
replicas: 2
|
||||
resources:
|
||||
requests:
|
||||
cpu: "4"
|
||||
memory: "4000Mi"
|
||||
@@ -1,168 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
|
||||
|
||||
common_pods = [
|
||||
"chatqna-backend-server-deploy",
|
||||
"embedding-dependency-deploy",
|
||||
"dataprep-deploy",
|
||||
"vector-db",
|
||||
"retriever-deploy",
|
||||
]
|
||||
|
||||
if with_rerank:
|
||||
pods_list = common_pods + ["reranking-dependency-deploy", "llm-dependency-deploy"]
|
||||
else:
|
||||
pods_list = common_pods + ["llm-dependency-deploy"]
|
||||
|
||||
if num_nodes == 1:
|
||||
replicas = [
|
||||
{"name": "chatqna-backend-server-deploy", "replicas": 2},
|
||||
{"name": "embedding-dependency-deploy", "replicas": 1},
|
||||
{"name": "reranking-dependency-deploy", "replicas": 1} if with_rerank else None,
|
||||
{"name": "llm-dependency-deploy", "replicas": 7 if with_rerank else 8},
|
||||
{"name": "dataprep-deploy", "replicas": 1},
|
||||
{"name": "vector-db", "replicas": 1},
|
||||
{"name": "retriever-deploy", "replicas": 2},
|
||||
]
|
||||
else:
|
||||
replicas = [
|
||||
{"name": "chatqna-backend-server-deploy", "replicas": 1 * num_nodes},
|
||||
{"name": "embedding-dependency-deploy", "replicas": 1 * num_nodes},
|
||||
{"name": "reranking-dependency-deploy", "replicas": 1} if with_rerank else None,
|
||||
{"name": "llm-dependency-deploy", "replicas": (8 * num_nodes) - 1 if with_rerank else 8 * num_nodes},
|
||||
{"name": "dataprep-deploy", "replicas": 1},
|
||||
{"name": "vector-db", "replicas": 1},
|
||||
{"name": "retriever-deploy", "replicas": 1 * num_nodes},
|
||||
]
|
||||
|
||||
resources = [
|
||||
{
|
||||
"name": "chatqna-backend-server-deploy",
|
||||
"resources": {"limits": {"cpu": "16", "memory": "8000Mi"}, "requests": {"cpu": "16", "memory": "8000Mi"}},
|
||||
},
|
||||
{
|
||||
"name": "embedding-dependency-deploy",
|
||||
"resources": {"limits": {"cpu": "80", "memory": "20000Mi"}, "requests": {"cpu": "80", "memory": "20000Mi"}},
|
||||
},
|
||||
(
|
||||
{"name": "reranking-dependency-deploy", "resources": {"limits": {"habana.ai/gaudi": 1}}}
|
||||
if with_rerank
|
||||
else None
|
||||
),
|
||||
{"name": "llm-dependency-deploy", "resources": {"limits": {"habana.ai/gaudi": 1}}},
|
||||
{"name": "retriever-deploy", "resources": {"requests": {"cpu": "8", "memory": "8000Mi"}}},
|
||||
]
|
||||
|
||||
replicas = [replica for replica in replicas if replica]
|
||||
resources = [resource for resource in resources if resource]
|
||||
|
||||
tgi_params = [
|
||||
{
|
||||
"name": "llm-dependency-deploy",
|
||||
"args": [
|
||||
{"name": "--model-id", "value": "$(LLM_MODEL_ID)"},
|
||||
{"name": "--max-input-length", "value": 1280},
|
||||
{"name": "--max-total-tokens", "value": 2048},
|
||||
{"name": "--max-batch-total-tokens", "value": 65536},
|
||||
{"name": "--max-batch-prefill-tokens", "value": 4096},
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
replicas_dict = {item["name"]: item["replicas"] for item in replicas}
|
||||
resources_dict = {item["name"]: item["resources"] for item in resources}
|
||||
tgi_params_dict = {item["name"]: item["args"] for item in tgi_params}
|
||||
|
||||
dicts_to_check = [
|
||||
{"dict": replicas_dict, "key": "replicas"},
|
||||
]
|
||||
if mode == "tuned":
|
||||
dicts_to_check.extend([{"dict": resources_dict, "key": "resources"}, {"dict": tgi_params_dict, "key": "args"}])
|
||||
|
||||
merged_specs = {"podSpecs": []}
|
||||
|
||||
for pod in pods_list:
|
||||
pod_spec = {"name": pod}
|
||||
|
||||
for item in dicts_to_check:
|
||||
if pod in item["dict"]:
|
||||
pod_spec[item["key"]] = item["dict"][pod]
|
||||
|
||||
if len(pod_spec) > 1:
|
||||
merged_specs["podSpecs"].append(pod_spec)
|
||||
|
||||
yaml_data = yaml.dump(merged_specs, default_flow_style=False)
|
||||
|
||||
print(yaml_data)
|
||||
|
||||
if with_rerank:
|
||||
filename = f"{mode}_{num_nodes}_gaudi_with_rerank.yaml"
|
||||
else:
|
||||
filename = f"{mode}_{num_nodes}_gaudi_without_rerank.yaml"
|
||||
with open(filename, "w") as file:
|
||||
file.write(yaml_data)
|
||||
|
||||
current_dir = os.getcwd()
|
||||
filepath = os.path.join(current_dir, filename)
|
||||
print(f"YAML file {filepath} has been generated.")
|
||||
|
||||
return filepath
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--name", help="The name of example pipelines", default="chatqna")
|
||||
parser.add_argument("--folder", help="The path of helmcharts folder", default=".")
|
||||
parser.add_argument(
|
||||
"--num_nodes", help="Number of nodes to deploy", type=int, choices=[1, 2, 4, 8], default=1, required=True
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mode", help="set up your chatqna in the specified mode", type=str, choices=["oob", "tuned"], default="oob"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--workflow",
|
||||
help="with rerank in the pipeline",
|
||||
type=str,
|
||||
choices=["with_rerank", "without_rerank"],
|
||||
default="with_rerank",
|
||||
)
|
||||
|
||||
parser.add_argument("--template", help="helm template", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.workflow == "with_rerank":
|
||||
with_rerank = True
|
||||
workflow_file = "./hpu_with_rerank.yaml"
|
||||
else:
|
||||
with_rerank = False
|
||||
workflow_file = "./hpu_without_rerank.yaml"
|
||||
|
||||
customize_filepath = generate_yaml(args.num_nodes, mode=args.mode, with_rerank=with_rerank)
|
||||
|
||||
if args.template:
|
||||
subprocess.run(
|
||||
["helm", "template", args.folder, "-f", workflow_file, "-f", customize_filepath],
|
||||
check=True,
|
||||
text=True,
|
||||
capture_output=False,
|
||||
)
|
||||
else:
|
||||
subprocess.run(
|
||||
["helm", "install", args.name, args.folder, "-f", workflow_file, "-f", customize_filepath],
|
||||
check=True,
|
||||
text=True,
|
||||
capture_output=False,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,223 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
namespace: default
|
||||
|
||||
config:
|
||||
CONFIG_MAP_NAME: chatqna-config
|
||||
NODE_SELECTOR: opea
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
INDEX_NAME: rag-redis
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
LLM_SERVER_PORT: 9009
|
||||
RERANK_SERVER_PORT: 8808
|
||||
EMBEDDING_SERVER_PORT: 6006
|
||||
|
||||
microservices:
|
||||
- name: chatqna-backend-server-deploy
|
||||
image: opea/chatqna:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
|
||||
- name: dataprep-deploy
|
||||
image: opea/dataprep-redis:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
|
||||
- name: vector-db
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
|
||||
- name: retriever-deploy
|
||||
image: opea/retriever-redis:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
|
||||
- name: embedding-dependency-deploy
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(EMBEDDING_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: reranking-dependency-deploy
|
||||
image: opea/tei-gaudi:latest
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
- value: $(RERANK_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: "512"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(LLM_MODEL_ID)
|
||||
- name: "--max-input-length"
|
||||
value: "2048"
|
||||
- name: "--max-total-tokens"
|
||||
value: "4096"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
services:
|
||||
- name: chatqna-backend-server-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
|
||||
- name: dataprep-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: embedding-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: llm-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: reranking-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: retriever-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
@@ -1,166 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
namespace: default
|
||||
|
||||
config:
|
||||
CONFIG_MAP_NAME: chatqna-config
|
||||
NODE_SELECTOR: opea
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
|
||||
microservices:
|
||||
- name: chatqna-backend-server-deploy
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
|
||||
- name: dataprep-deploy
|
||||
image: opea/dataprep-redis:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
|
||||
- name: vector-db
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
|
||||
- name: retriever-deploy
|
||||
image: opea/retriever-redis:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
|
||||
- name: embedding-dependency-deploy
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(EMBEDDING_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(LLM_MODEL_ID)
|
||||
- name: "--max-input-length"
|
||||
value: "2048"
|
||||
- name: "--max-total-tokens"
|
||||
value: "4096"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
services:
|
||||
- name: chatqna-backend-server-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
|
||||
- name: dataprep-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: embedding-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: llm-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: retriever-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
237
ChatQnA/benchmark/performance/helm_charts/oob_single_node.yaml
Normal file
237
ChatQnA/benchmark/performance/helm_charts/oob_single_node.yaml
Normal file
@@ -0,0 +1,237 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
config:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
|
||||
deployments:
|
||||
- name: chatqna-backend-server-deploy
|
||||
spec:
|
||||
image_name: opea/chatqna-no-wrapper
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
|
||||
- name: dataprep-deploy
|
||||
spec:
|
||||
image_name: opea/dataprep-redis
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
image_name: redis/redis-stack
|
||||
image_tag: 7.2.0-v9
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
|
||||
- name: retriever-deploy
|
||||
spec:
|
||||
image_name: opea/retriever-redis
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
|
||||
- name: embedding-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/text-embeddings-inference
|
||||
image_tag: cpu-1.5
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(EMBEDDING_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: reranking-dependency-deploy
|
||||
spec:
|
||||
image_name: opea/tei-gaudi
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
- value: $(RERANK_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: "512"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/tgi-gaudi
|
||||
image_tag: 2.0.4
|
||||
replicas: 7
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(LLM_MODEL_ID)
|
||||
- name: "--max-input-length"
|
||||
value: "2048"
|
||||
- name: "--max-total-tokens"
|
||||
value: "4096"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
services:
|
||||
- name: chatqna-backend-server-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
|
||||
- name: dataprep-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: embedding-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: llm-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: reranking-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: retriever-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
@@ -4,22 +4,22 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ .Values.config.CONFIG_MAP_NAME }}
|
||||
name: qna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: {{ .Values.config.EMBEDDING_MODEL_ID }}
|
||||
EMBEDDING_SERVER_HOST_IP: {{ .Values.config.EMBEDDING_SERVER_HOST_IP }}
|
||||
HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }}
|
||||
NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
|
||||
RERANK_MODEL_ID: {{ .Values.config.RERANK_MODEL_ID }}
|
||||
INDEX_NAME: {{ .Values.config.INDEX_NAME }}
|
||||
LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
INDEX_NAME: rag-redis
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
LLM_SERVER_PORT: "9009"
|
||||
RERANK_SERVER_PORT: "8808"
|
||||
EMBEDDING_SERVER_PORT: "6006"
|
||||
LLM_SERVER_HOST_IP: {{ .Values.config.LLM_SERVER_HOST_IP }}
|
||||
NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
|
||||
REDIS_URL: {{ .Values.config.REDIS_URL }}
|
||||
RERANK_MODEL_ID: {{ .Values.config.RERANK_MODEL_ID }}
|
||||
RERANK_SERVER_HOST_IP: {{ .Values.config.RERANK_SERVER_HOST_IP }}
|
||||
RETRIEVER_SERVICE_HOST_IP: {{ .Values.config.RETRIEVER_SERVICE_HOST_IP }}
|
||||
TEI_EMBEDDING_ENDPOINT: {{ .Values.config.TEI_EMBEDDING_ENDPOINT }}
|
||||
TEI_ENDPOINT: {{ .Values.config.TEI_ENDPOINT }}
|
||||
TEI_RERANKING_ENDPOINT: {{ .Values.config.TEI_RERANKING_ENDPOINT }}
|
||||
TGI_LLM_ENDPOINT: {{ .Values.config.TGI_LLM_ENDPOINT }}
|
||||
---
|
||||
|
||||
@@ -1,47 +1,31 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
{{- $global := .Values }}
|
||||
{{- range $microservice := .Values.microservices }}
|
||||
{{- range $deployment := .Values.deployments }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ $microservice.name }}
|
||||
name: {{ $deployment.name }}
|
||||
namespace: default
|
||||
spec:
|
||||
{{- $replicas := $microservice.replicas }}
|
||||
{{- range $podSpec := $global.podSpecs }}
|
||||
{{- if eq $podSpec.name $microservice.name }}
|
||||
{{- $replicas = $podSpec.replicas | default $microservice.replicas }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
replicas: {{ $replicas }}
|
||||
|
||||
replicas: {{ $deployment.spec.replicas }}
|
||||
selector:
|
||||
matchLabels:
|
||||
app: {{ $microservice.name }}
|
||||
app: {{ $deployment.name }}
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: {{ $microservice.name }}
|
||||
app: {{ $deployment.name }}
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: {{ $global.config.CONFIG_MAP_NAME }}
|
||||
|
||||
{{- $args := $microservice.args }}
|
||||
{{- range $podSpec := $global.podSpecs }}
|
||||
{{- if eq $podSpec.name $microservice.name }}
|
||||
{{- $args = $podSpec.args | default $microservice.args }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $microservice.args }}
|
||||
name: qna-config
|
||||
{{- if $deployment.spec.args }}
|
||||
args:
|
||||
{{- range $arg := $args }}
|
||||
{{- range $arg := $deployment.spec.args }}
|
||||
{{- if $arg.name }}
|
||||
- {{ $arg.name }}
|
||||
{{- end }}
|
||||
@@ -51,46 +35,30 @@ spec:
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $microservice.env }}
|
||||
{{- if $deployment.spec.env }}
|
||||
env:
|
||||
{{- range $env := $microservice.env }}
|
||||
{{- range $env := $deployment.spec.env }}
|
||||
- name: {{ $env.name }}
|
||||
value: "{{ $env.value }}"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- $image := $microservice.image }}
|
||||
{{- range $podSpec := $global.podSpecs }}
|
||||
{{- if eq $podSpec.name $microservice.name }}
|
||||
{{- $image = $podSpec.image | default $microservice.image }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
image: {{ $image }}
|
||||
|
||||
image: {{ $deployment.spec.image_name }}:{{ $deployment.spec.image_tag }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: {{ $microservice.name }}
|
||||
name: {{ $deployment.name }}
|
||||
|
||||
{{- if $microservice.ports }}
|
||||
{{- if $deployment.spec.ports }}
|
||||
ports:
|
||||
{{- range $port := $microservice.ports }}
|
||||
{{- range $port := $deployment.spec.ports }}
|
||||
{{- range $port_name, $port_id := $port }}
|
||||
- {{ $port_name }}: {{ $port_id }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- $resources := $microservice.resources }}
|
||||
{{- range $podSpec := $global.podSpecs }}
|
||||
{{- if eq $podSpec.name $microservice.name }}
|
||||
{{- if $podSpec.resources }}
|
||||
{{- $resources = $podSpec.resources }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $resources }}
|
||||
{{- if $deployment.spec.resources }}
|
||||
resources:
|
||||
{{- range $resourceType, $resource := $resources }}
|
||||
{{- range $resourceType, $resource := $deployment.spec.resources }}
|
||||
{{ $resourceType }}:
|
||||
{{- range $limitType, $limit := $resource }}
|
||||
{{ $limitType }}: {{ $limit }}
|
||||
@@ -98,9 +66,9 @@ spec:
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $microservice.volumeMounts }}
|
||||
{{- if $deployment.spec.volumeMounts }}
|
||||
volumeMounts:
|
||||
{{- range $volumeMount := $microservice.volumeMounts }}
|
||||
{{- range $volumeMount := $deployment.spec.volumeMounts }}
|
||||
- mountPath: {{ $volumeMount.mountPath }}
|
||||
name: {{ $volumeMount.name }}
|
||||
{{- end }}
|
||||
@@ -108,20 +76,20 @@ spec:
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: {{ $global.config.NODE_SELECTOR }}
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: {{ $microservice.name }}
|
||||
app: {{ $deployment.name }}
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
|
||||
|
||||
{{- if $microservice.volumes }}
|
||||
{{- if $deployment.spec.volumes }}
|
||||
volumes:
|
||||
{{- range $index, $volume := $microservice.volumes }}
|
||||
{{- range $index, $volume := $deployment.spec.volumes }}
|
||||
- name: {{ $volume.name }}
|
||||
{{- if $volume.hostPath }}
|
||||
hostPath:
|
||||
@@ -135,5 +103,6 @@ spec:
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
|
||||
---
|
||||
{{- end }}
|
||||
|
||||
259
ChatQnA/benchmark/performance/helm_charts/tuned_single_node.yaml
Normal file
259
ChatQnA/benchmark/performance/helm_charts/tuned_single_node.yaml
Normal file
@@ -0,0 +1,259 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
config:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
|
||||
deployments:
|
||||
- name: chatqna-backend-server-deploy
|
||||
spec:
|
||||
image_name: opea/chatqna-no-wrapper
|
||||
image_tag: latest
|
||||
replicas: 2
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: "8"
|
||||
memory: "8000Mi"
|
||||
requests:
|
||||
cpu: "8"
|
||||
memory: "8000Mi"
|
||||
|
||||
- name: dataprep-deploy
|
||||
spec:
|
||||
image_name: opea/dataprep-redis
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
image_name: redis/redis-stack
|
||||
image_tag: 7.2.0-v9
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
|
||||
- name: retriever-deploy
|
||||
spec:
|
||||
image_name: opea/retriever-redis
|
||||
image_tag: latest
|
||||
replicas: 2
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: "4"
|
||||
memory: "4000Mi"
|
||||
|
||||
- name: embedding-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/text-embeddings-inference
|
||||
image_tag: cpu-1.5
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(EMBEDDING_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
resources:
|
||||
limits:
|
||||
cpu: "80"
|
||||
memory: "20000Mi"
|
||||
requests:
|
||||
cpu: "80"
|
||||
memory: "20000Mi"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: reranking-dependency-deploy
|
||||
spec:
|
||||
image_name: opea/tei-gaudi
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
- value: $(RERANK_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: "512"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/tgi-gaudi
|
||||
image_tag: 2.0.4
|
||||
replicas: 7
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(LLM_MODEL_ID)
|
||||
- name: "--max-input-length"
|
||||
value: "1280"
|
||||
- name: "--max-total-tokens"
|
||||
value: "2048"
|
||||
- name: "--max-batch-total-tokens"
|
||||
value: "65536"
|
||||
- name: "--max-batch-prefill-tokens"
|
||||
value: "4096"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
services:
|
||||
- name: chatqna-backend-server-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
|
||||
- name: dataprep-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: embedding-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: llm-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: reranking-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: retriever-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
237
ChatQnA/benchmark/performance/helm_charts/values.yaml
Normal file
237
ChatQnA/benchmark/performance/helm_charts/values.yaml
Normal file
@@ -0,0 +1,237 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
config:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
|
||||
deployments:
|
||||
- name: chatqna-backend-server-deploy
|
||||
spec:
|
||||
image_name: opea/chatqna-no-wrapper
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
|
||||
- name: dataprep-deploy
|
||||
spec:
|
||||
image_name: opea/dataprep-redis
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
image_name: redis/redis-stack
|
||||
image_tag: 7.2.0-v9
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
|
||||
- name: retriever-deploy
|
||||
spec:
|
||||
image_name: opea/retriever-redis
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
|
||||
- name: embedding-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/text-embeddings-inference
|
||||
image_tag: cpu-1.5
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(EMBEDDING_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: reranking-dependency-deploy
|
||||
spec:
|
||||
image_name: opea/tei-gaudi
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
- value: $(RERANK_MODEL_ID)
|
||||
- name: "--auto-truncate"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: "512"
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/tgi-gaudi
|
||||
image_tag: 2.0.4
|
||||
replicas: 7
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(LLM_MODEL_ID)
|
||||
- name: "--max-input-length"
|
||||
value: "2048"
|
||||
- name: "--max-total-tokens"
|
||||
value: "4096"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
services:
|
||||
- name: chatqna-backend-server-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
|
||||
- name: dataprep-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: embedding-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: llm-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: reranking-dependency-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: retriever-svc
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
|
||||
- name: vector-db
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
image: opea/chatqna-no-wrapper:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -327,7 +327,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
@@ -29,7 +29,7 @@ metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
image: opea/chatqna-no-wrapper:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -327,7 +327,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
@@ -381,7 +381,7 @@ metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
image: opea/chatqna-no-wrapper:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -327,7 +327,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna
|
||||
image: opea/chatqna-no-wrapper:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -327,7 +327,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
image: opea/chatqna-no-wrapper-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -29,7 +29,7 @@ metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
image: opea/chatqna-no-wrapper-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -295,7 +295,7 @@ metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
image: opea/chatqna-no-wrapper-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
image: opea/chatqna-no-wrapper-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -1,507 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 63
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1280'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
image: opea/chatqna-no-wrapper:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -345,7 +345,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
image: opea/chatqna-no-wrapper:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -345,7 +345,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
@@ -1,507 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 31
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1280'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
image: opea/chatqna-no-wrapper:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -345,7 +345,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
image: opea/chatqna-no-wrapper:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -345,7 +345,7 @@ spec:
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
image: opea/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
@@ -1,507 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 15
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1280'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(RERANK_MODEL_ID)
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: '512'
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
image: opea/chatqna-no-wrapper-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -1,421 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 64
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1280'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
image: opea/chatqna-no-wrapper-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -1,421 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 32
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1280'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
image: opea/chatqna-no-wrapper-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -1,421 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 8
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1280'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
image: opea/chatqna-no-wrapper-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
@@ -1,514 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-config
|
||||
namespace: default
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
|
||||
LLM_SERVER_PORT: '9009'
|
||||
RERANK_SERVER_PORT: '8808'
|
||||
EMBEDDING_SERVER_PORT: '6006'
|
||||
|
||||
---
|
||||
# Source: chatqna-charts/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
# Source: chatqna-charts/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
# Source: chatqna-charts/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
# Source: chatqna-charts/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
# Source: chatqna-charts/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: reranking-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 8808
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: reranking-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
# Source: chatqna-charts/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
# Source: chatqna-charts/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
# Source: chatqna-charts/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-config
|
||||
image: opea/chatqna-model-fixed-root:latest
|
||||
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
# Source: chatqna-charts/templates/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
# Source: chatqna-charts/templates/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
# Source: chatqna-charts/templates/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-config
|
||||
image: opea/retriever-redis:latest
|
||||
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
# Source: chatqna-charts/templates/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-config
|
||||
args:
|
||||
- --model-id
|
||||
- "$(EMBEDDING_MODEL_ID)"
|
||||
- --auto-truncate
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
# Source: chatqna-charts/templates/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: reranking-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: reranking-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-config
|
||||
args:
|
||||
- --model-id
|
||||
- "$(RERANK_MODEL_ID)"
|
||||
- --auto-truncate
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: "none"
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: "habana"
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: "all"
|
||||
- name: MAX_WARMUP_SEQUENCE_LENGTH
|
||||
value: "512"
|
||||
image: opea/tei-gaudi:latest
|
||||
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: reranking-dependency-deploy
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: reranking-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
---
|
||||
# Source: chatqna-charts/templates/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-config
|
||||
command: ["/bin/bash", "-c"]
|
||||
args: ["python3 -m vllm.entrypoints.openai.api_server --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"]
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: "none"
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: "habana"
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: "all"
|
||||
image: opea/llm-vllm-hpu:latest
|
||||
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
@@ -1,421 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
INDEX_NAME: rag-redis
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
LLM_SERVER_HOST_IP: llm-dependency-svc
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
RERANK_SERVER_HOST_IP: reranking-dependency-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
|
||||
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: qna-config
|
||||
namespace: default
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-backend-server-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: chatqna-backend-server-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/chatqna-without-rerank:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: chatqna-backend-server-deploy
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
limits:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
requests:
|
||||
cpu: 8
|
||||
memory: 8000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: chatqna-backend-server-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-backend-server-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
nodePort: 30888
|
||||
port: 8888
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app: chatqna-backend-server-deploy
|
||||
type: NodePort
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dataprep-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: dataprep-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/dataprep-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: dataprep-deploy
|
||||
ports:
|
||||
- containerPort: 6007
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: dataprep-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dataprep-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: port1
|
||||
port: 6007
|
||||
targetPort: 6007
|
||||
selector:
|
||||
app: dataprep-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: embedding-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: embedding-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(EMBEDDING_MODEL_ID)
|
||||
- --auto-truncate
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: embedding-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
requests:
|
||||
cpu: 80
|
||||
memory: 20000Mi
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: embedding-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: embedding-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 6006
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: embedding-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-dependency-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 16
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-dependency-deploy
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --model-id
|
||||
- $(LLM_MODEL_ID)
|
||||
- --max-input-length
|
||||
- '1280'
|
||||
- --max-total-tokens
|
||||
- '2048'
|
||||
- --max-batch-total-tokens
|
||||
- '65536'
|
||||
- --max-batch-prefill-tokens
|
||||
- '4096'
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: HF_TOKEN
|
||||
value: ${HF_TOKEN}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-dependency-deploy
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_NICE
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: llm-dependency-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-dependency-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 9009
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: llm-dependency-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retriever-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: retriever-deploy
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: opea/retriever-redis:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: retriever-deploy
|
||||
ports:
|
||||
- containerPort: 7000
|
||||
resources:
|
||||
requests:
|
||||
cpu: 4
|
||||
memory: 4000Mi
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: retriever-deploy
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retriever-svc
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: service
|
||||
port: 7000
|
||||
targetPort: 7000
|
||||
selector:
|
||||
app: retriever-deploy
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: vector-db
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: qna-config
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: vector-db
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
- containerPort: 8001
|
||||
hostIPC: true
|
||||
nodeSelector:
|
||||
node-type: chatqna-opea
|
||||
serviceAccountName: default
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: vector-db
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vector-db
|
||||
namespace: default
|
||||
spec:
|
||||
ports:
|
||||
- name: vector-db-service
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
- name: vector-db-insight
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
app: vector-db
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -1,197 +1,37 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
|
||||
|
||||
class ChatTemplate:
|
||||
@staticmethod
|
||||
def generate_rag_prompt(question, documents):
|
||||
context_str = "\n".join(documents)
|
||||
if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3:
|
||||
# chinese context
|
||||
template = """
|
||||
### 你将扮演一个乐于助人、尊重他人并诚实的助手,你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案,请避免分享不准确的信息。
|
||||
### 搜索结果:{context}
|
||||
### 问题:{question}
|
||||
### 回答:
|
||||
"""
|
||||
else:
|
||||
template = """
|
||||
### You are a helpful, respectful and honest assistant to help the user with questions. \
|
||||
Please refer to the search results obtained from the local knowledge base. \
|
||||
But be careful to not incorporate the information that you think is not relevant to the question. \
|
||||
If you don't know the answer to a question, please don't share false information. \n
|
||||
### Search results: {context} \n
|
||||
### Question: {question} \n
|
||||
### Answer:
|
||||
"""
|
||||
return template.format(context=context_str, question=question)
|
||||
|
||||
|
||||
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
GUARDRAIL_SERVICE_HOST_IP = os.getenv("GUARDRAIL_SERVICE_HOST_IP", "0.0.0.0")
|
||||
GUARDRAIL_SERVICE_PORT = int(os.getenv("GUARDRAIL_SERVICE_PORT", 80))
|
||||
EMBEDDING_SERVER_HOST_IP = os.getenv("EMBEDDING_SERVER_HOST_IP", "0.0.0.0")
|
||||
EMBEDDING_SERVER_PORT = int(os.getenv("EMBEDDING_SERVER_PORT", 80))
|
||||
EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
|
||||
EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
|
||||
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
|
||||
RERANK_SERVER_HOST_IP = os.getenv("RERANK_SERVER_HOST_IP", "0.0.0.0")
|
||||
RERANK_SERVER_PORT = int(os.getenv("RERANK_SERVER_PORT", 80))
|
||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 80))
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
|
||||
inputs["inputs"] = inputs["text"]
|
||||
del inputs["text"]
|
||||
elif self.services[cur_node].service_type == ServiceType.RETRIEVER:
|
||||
# prepare the retriever params
|
||||
retriever_parameters = kwargs.get("retriever_parameters", None)
|
||||
if retriever_parameters:
|
||||
inputs.update(retriever_parameters.dict())
|
||||
elif self.services[cur_node].service_type == ServiceType.LLM:
|
||||
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
|
||||
next_inputs = {}
|
||||
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
|
||||
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
|
||||
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
|
||||
next_inputs["top_p"] = llm_parameters_dict["top_p"]
|
||||
next_inputs["stream"] = inputs["streaming"]
|
||||
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
|
||||
# next_inputs["presence_penalty"] = inputs["presence_penalty"]
|
||||
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
|
||||
next_inputs["temperature"] = inputs["temperature"]
|
||||
inputs = next_inputs
|
||||
return inputs
|
||||
|
||||
|
||||
def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
next_data = {}
|
||||
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
|
||||
assert isinstance(data, list)
|
||||
next_data = {"text": inputs["inputs"], "embedding": data[0]}
|
||||
elif self.services[cur_node].service_type == ServiceType.RETRIEVER:
|
||||
|
||||
docs = [doc["text"] for doc in data["retrieved_docs"]]
|
||||
|
||||
with_rerank = runtime_graph.downstream(cur_node)[0].startswith("rerank")
|
||||
if with_rerank and docs:
|
||||
# forward to rerank
|
||||
# prepare inputs for rerank
|
||||
next_data["query"] = data["initial_query"]
|
||||
next_data["texts"] = [doc["text"] for doc in data["retrieved_docs"]]
|
||||
else:
|
||||
# forward to llm
|
||||
if not docs and with_rerank:
|
||||
# delete the rerank from retriever -> rerank -> llm
|
||||
for ds in reversed(runtime_graph.downstream(cur_node)):
|
||||
for nds in runtime_graph.downstream(ds):
|
||||
runtime_graph.add_edge(cur_node, nds)
|
||||
runtime_graph.delete_node_if_exists(ds)
|
||||
|
||||
# handle template
|
||||
# if user provides template, then format the prompt with it
|
||||
# otherwise, use the default template
|
||||
prompt = data["initial_query"]
|
||||
chat_template = llm_parameters_dict["chat_template"]
|
||||
if chat_template:
|
||||
prompt_template = PromptTemplate.from_template(chat_template)
|
||||
input_variables = prompt_template.input_variables
|
||||
if sorted(input_variables) == ["context", "question"]:
|
||||
prompt = prompt_template.format(question=data["initial_query"], context="\n".join(docs))
|
||||
elif input_variables == ["question"]:
|
||||
prompt = prompt_template.format(question=data["initial_query"])
|
||||
else:
|
||||
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
|
||||
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
|
||||
else:
|
||||
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
|
||||
|
||||
next_data["inputs"] = prompt
|
||||
|
||||
elif self.services[cur_node].service_type == ServiceType.RERANK:
|
||||
# rerank the inputs with the scores
|
||||
reranker_parameters = kwargs.get("reranker_parameters", None)
|
||||
top_n = reranker_parameters.top_n if reranker_parameters else 1
|
||||
docs = inputs["texts"]
|
||||
reranked_docs = []
|
||||
for best_response in data[:top_n]:
|
||||
reranked_docs.append(docs[best_response["index"]])
|
||||
|
||||
# handle template
|
||||
# if user provides template, then format the prompt with it
|
||||
# otherwise, use the default template
|
||||
prompt = inputs["query"]
|
||||
chat_template = llm_parameters_dict["chat_template"]
|
||||
if chat_template:
|
||||
prompt_template = PromptTemplate.from_template(chat_template)
|
||||
input_variables = prompt_template.input_variables
|
||||
if sorted(input_variables) == ["context", "question"]:
|
||||
prompt = prompt_template.format(question=prompt, context="\n".join(reranked_docs))
|
||||
elif input_variables == ["question"]:
|
||||
prompt = prompt_template.format(question=prompt)
|
||||
else:
|
||||
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
|
||||
prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs)
|
||||
else:
|
||||
prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs)
|
||||
|
||||
next_data["inputs"] = prompt
|
||||
|
||||
else:
|
||||
next_data = data
|
||||
|
||||
return next_data
|
||||
|
||||
|
||||
def align_generator(self, gen, **kwargs):
|
||||
# openai reaponse format
|
||||
# b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
|
||||
for line in gen:
|
||||
line = line.decode("utf-8")
|
||||
start = line.find("{")
|
||||
end = line.rfind("}") + 1
|
||||
|
||||
json_str = line[start:end]
|
||||
try:
|
||||
# sometimes yield empty chunk, do a fallback here
|
||||
json_data = json.loads(json_str)
|
||||
if json_data["choices"][0]["finish_reason"] != "eos_token":
|
||||
yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
|
||||
except Exception as e:
|
||||
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
|
||||
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
|
||||
|
||||
|
||||
class ChatQnAService:
|
||||
def __init__(self, host="0.0.0.0", port=8000):
|
||||
self.host = host
|
||||
self.port = port
|
||||
ServiceOrchestrator.align_inputs = align_inputs
|
||||
ServiceOrchestrator.align_outputs = align_outputs
|
||||
ServiceOrchestrator.align_generator = align_generator
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
|
||||
def add_remote_service(self):
|
||||
|
||||
embedding = MicroService(
|
||||
name="embedding",
|
||||
host=EMBEDDING_SERVER_HOST_IP,
|
||||
port=EMBEDDING_SERVER_PORT,
|
||||
endpoint="/embed",
|
||||
host=EMBEDDING_SERVICE_HOST_IP,
|
||||
port=EMBEDDING_SERVICE_PORT,
|
||||
endpoint="/v1/embeddings",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
)
|
||||
|
||||
retriever = MicroService(
|
||||
name="retriever",
|
||||
host=RETRIEVER_SERVICE_HOST_IP,
|
||||
@@ -200,20 +40,18 @@ class ChatQnAService:
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RETRIEVER,
|
||||
)
|
||||
|
||||
rerank = MicroService(
|
||||
name="rerank",
|
||||
host=RERANK_SERVER_HOST_IP,
|
||||
port=RERANK_SERVER_PORT,
|
||||
endpoint="/rerank",
|
||||
host=RERANK_SERVICE_HOST_IP,
|
||||
port=RERANK_SERVICE_PORT,
|
||||
endpoint="/v1/reranking",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RERANK,
|
||||
)
|
||||
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVER_HOST_IP,
|
||||
port=LLM_SERVER_PORT,
|
||||
host=LLM_SERVICE_HOST_IP,
|
||||
port=LLM_SERVICE_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
@@ -224,109 +62,7 @@ class ChatQnAService:
|
||||
self.megaservice.flow_to(rerank, llm)
|
||||
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
def add_remote_service_without_rerank(self):
|
||||
|
||||
embedding = MicroService(
|
||||
name="embedding",
|
||||
host=EMBEDDING_SERVER_HOST_IP,
|
||||
port=EMBEDDING_SERVER_PORT,
|
||||
endpoint="/embed",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
)
|
||||
|
||||
retriever = MicroService(
|
||||
name="retriever",
|
||||
host=RETRIEVER_SERVICE_HOST_IP,
|
||||
port=RETRIEVER_SERVICE_PORT,
|
||||
endpoint="/v1/retrieval",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RETRIEVER,
|
||||
)
|
||||
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVER_HOST_IP,
|
||||
port=LLM_SERVER_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
self.megaservice.add(embedding).add(retriever).add(llm)
|
||||
self.megaservice.flow_to(embedding, retriever)
|
||||
self.megaservice.flow_to(retriever, llm)
|
||||
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
def add_remote_service_with_guardrails(self):
|
||||
guardrail_in = MicroService(
|
||||
name="guardrail_in",
|
||||
host=GUARDRAIL_SERVICE_HOST_IP,
|
||||
port=GUARDRAIL_SERVICE_PORT,
|
||||
endpoint="/v1/guardrails",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.GUARDRAIL,
|
||||
)
|
||||
embedding = MicroService(
|
||||
name="embedding",
|
||||
host=EMBEDDING_SERVER_HOST_IP,
|
||||
port=EMBEDDING_SERVER_PORT,
|
||||
endpoint="/embed",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
)
|
||||
retriever = MicroService(
|
||||
name="retriever",
|
||||
host=RETRIEVER_SERVICE_HOST_IP,
|
||||
port=RETRIEVER_SERVICE_PORT,
|
||||
endpoint="/v1/retrieval",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RETRIEVER,
|
||||
)
|
||||
rerank = MicroService(
|
||||
name="rerank",
|
||||
host=RERANK_SERVER_HOST_IP,
|
||||
port=RERANK_SERVER_PORT,
|
||||
endpoint="/rerank",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RERANK,
|
||||
)
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVER_HOST_IP,
|
||||
port=LLM_SERVER_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
# guardrail_out = MicroService(
|
||||
# name="guardrail_out",
|
||||
# host=GUARDRAIL_SERVICE_HOST_IP,
|
||||
# port=GUARDRAIL_SERVICE_PORT,
|
||||
# endpoint="/v1/guardrails",
|
||||
# use_remote_service=True,
|
||||
# service_type=ServiceType.GUARDRAIL,
|
||||
# )
|
||||
# self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm).add(guardrail_out)
|
||||
self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm)
|
||||
self.megaservice.flow_to(guardrail_in, embedding)
|
||||
self.megaservice.flow_to(embedding, retriever)
|
||||
self.megaservice.flow_to(retriever, rerank)
|
||||
self.megaservice.flow_to(rerank, llm)
|
||||
# self.megaservice.flow_to(llm, guardrail_out)
|
||||
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--without-rerank", action="store_true")
|
||||
parser.add_argument("--with-guardrails", action="store_true")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
|
||||
if args.without_rerank:
|
||||
chatqna.add_remote_service_without_rerank()
|
||||
elif args.with_guardrails:
|
||||
chatqna.add_remote_service_with_guardrails()
|
||||
else:
|
||||
chatqna.add_remote_service()
|
||||
chatqna.add_remote_service()
|
||||
|
||||
@@ -30,11 +30,21 @@ opea_micro_services:
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
model-id: ${EMBEDDING_MODEL_ID}
|
||||
embedding:
|
||||
host: ${EMBEDDING_SERVICE_HOST_IP}
|
||||
ports: ${EMBEDDING_SERVICE_PORT}
|
||||
image: opea/embedding-tei:latest
|
||||
endpoint: /v1/embeddings
|
||||
retrieval:
|
||||
host: ${RETRIEVER_SERVICE_HOST_IP}
|
||||
ports: ${RETRIEVER_SERVICE_PORT}
|
||||
image: opea/retriever-redis:latest
|
||||
endpoint: /v1/retrieval
|
||||
reranking:
|
||||
host: ${RERANK_SERVICE_HOST_IP}
|
||||
ports: ${RERANK_SERVICE_PORT}
|
||||
image: opea/reranking-tei:latest
|
||||
endpoint: /v1/reranking
|
||||
tgi-service:
|
||||
host: ${TGI_SERVICE_IP}
|
||||
ports: ${TGI_SERVICE_PORT}
|
||||
@@ -54,6 +64,11 @@ opea_micro_services:
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
model-id: ${LLM_MODEL_ID}
|
||||
llm:
|
||||
host: ${LLM_SERVICE_HOST_IP}
|
||||
ports: ${LLM_SERVICE_PORT}
|
||||
image: opea/llm-tgi:latest
|
||||
endpoint: /v1/chat/completions
|
||||
ui:
|
||||
host: ${UI_SERVICE_HOST_IP}
|
||||
ports:
|
||||
|
||||
89
ChatQnA/chatqna_guardrails.py
Normal file
89
ChatQnA/chatqna_guardrails.py
Normal file
@@ -0,0 +1,89 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
|
||||
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
|
||||
|
||||
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
GUARDRAIL_SERVICE_HOST_IP = os.getenv("GUARDRAIL_SERVICE_HOST_IP", "0.0.0.0")
|
||||
GUARDRAIL_SERVICE_PORT = int(os.getenv("GUARDRAIL_SERVICE_PORT", 9090))
|
||||
EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
|
||||
EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
|
||||
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
|
||||
RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
|
||||
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
|
||||
|
||||
|
||||
class ChatQnAService:
|
||||
def __init__(self, host="0.0.0.0", port=8000):
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
|
||||
def add_remote_service(self):
|
||||
guardrail_in = MicroService(
|
||||
name="guardrail_in",
|
||||
host=GUARDRAIL_SERVICE_HOST_IP,
|
||||
port=GUARDRAIL_SERVICE_PORT,
|
||||
endpoint="/v1/guardrails",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.GUARDRAIL,
|
||||
)
|
||||
embedding = MicroService(
|
||||
name="embedding",
|
||||
host=EMBEDDING_SERVICE_HOST_IP,
|
||||
port=EMBEDDING_SERVICE_PORT,
|
||||
endpoint="/v1/embeddings",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
)
|
||||
retriever = MicroService(
|
||||
name="retriever",
|
||||
host=RETRIEVER_SERVICE_HOST_IP,
|
||||
port=RETRIEVER_SERVICE_PORT,
|
||||
endpoint="/v1/retrieval",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RETRIEVER,
|
||||
)
|
||||
rerank = MicroService(
|
||||
name="rerank",
|
||||
host=RERANK_SERVICE_HOST_IP,
|
||||
port=RERANK_SERVICE_PORT,
|
||||
endpoint="/v1/reranking",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RERANK,
|
||||
)
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVICE_HOST_IP,
|
||||
port=LLM_SERVICE_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
# guardrail_out = MicroService(
|
||||
# name="guardrail_out",
|
||||
# host=GUARDRAIL_SERVICE_HOST_IP,
|
||||
# port=GUARDRAIL_SERVICE_PORT,
|
||||
# endpoint="/v1/guardrails",
|
||||
# use_remote_service=True,
|
||||
# service_type=ServiceType.GUARDRAIL,
|
||||
# )
|
||||
# self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm).add(guardrail_out)
|
||||
self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm)
|
||||
self.megaservice.flow_to(guardrail_in, embedding)
|
||||
self.megaservice.flow_to(embedding, retriever)
|
||||
self.megaservice.flow_to(retriever, rerank)
|
||||
self.megaservice.flow_to(rerank, llm)
|
||||
# self.megaservice.flow_to(llm, guardrail_out)
|
||||
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
|
||||
chatqna.add_remote_service()
|
||||
275
ChatQnA/chatqna_no_wrapper.py
Normal file
275
ChatQnA/chatqna_no_wrapper.py
Normal file
@@ -0,0 +1,275 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
|
||||
|
||||
class ChatTemplate:
|
||||
@staticmethod
|
||||
def generate_rag_prompt(question, documents):
|
||||
context_str = "\n".join(documents)
|
||||
if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3:
|
||||
# chinese context
|
||||
template = """
|
||||
### 你将扮演一个乐于助人、尊重他人并诚实的助手,你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案,请避免分享不准确的信息。
|
||||
### 搜索结果:{context}
|
||||
### 问题:{question}
|
||||
### 回答:
|
||||
"""
|
||||
else:
|
||||
template = """
|
||||
### You are a helpful, respectful and honest assistant to help the user with questions. \
|
||||
Please refer to the search results obtained from the local knowledge base. \
|
||||
But be careful to not incorporate the information that you think is not relevant to the question. \
|
||||
If you don't know the answer to a question, please don't share false information. \n
|
||||
### Search results: {context} \n
|
||||
### Question: {question} \n
|
||||
### Answer:
|
||||
"""
|
||||
return template.format(context=context_str, question=question)
|
||||
|
||||
|
||||
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
# EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
|
||||
# EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
|
||||
# RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
|
||||
# RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
|
||||
# RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
|
||||
# RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
|
||||
# LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
|
||||
# LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
|
||||
EMBEDDING_SERVER_HOST_IP = os.getenv("EMBEDDING_SERVER_HOST_IP", "0.0.0.0")
|
||||
EMBEDDING_SERVER_PORT = int(os.getenv("EMBEDDING_SERVER_PORT", 6006))
|
||||
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
|
||||
RERANK_SERVER_HOST_IP = os.getenv("RERANK_SERVER_HOST_IP", "0.0.0.0")
|
||||
RERANK_SERVER_PORT = int(os.getenv("RERANK_SERVER_PORT", 8808))
|
||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 9009))
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
|
||||
inputs["inputs"] = inputs["text"]
|
||||
del inputs["text"]
|
||||
elif self.services[cur_node].service_type == ServiceType.RETRIEVER:
|
||||
# prepare the retriever params
|
||||
retriever_parameters = kwargs.get("retriever_parameters", None)
|
||||
if retriever_parameters:
|
||||
inputs.update(retriever_parameters.dict())
|
||||
elif self.services[cur_node].service_type == ServiceType.LLM:
|
||||
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
|
||||
next_inputs = {}
|
||||
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
|
||||
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
|
||||
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
|
||||
next_inputs["top_p"] = llm_parameters_dict["top_p"]
|
||||
next_inputs["stream"] = inputs["streaming"]
|
||||
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
|
||||
next_inputs["presence_penalty"] = inputs["presence_penalty"]
|
||||
next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
|
||||
next_inputs["temperature"] = inputs["temperature"]
|
||||
inputs = next_inputs
|
||||
|
||||
return inputs
|
||||
|
||||
|
||||
def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
next_data = {}
|
||||
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
|
||||
assert isinstance(data, list)
|
||||
next_data = {"text": inputs["inputs"], "embedding": data[0]}
|
||||
elif self.services[cur_node].service_type == ServiceType.RETRIEVER:
|
||||
|
||||
docs = [doc["text"] for doc in data["retrieved_docs"]]
|
||||
|
||||
with_rerank = runtime_graph.downstream(cur_node)[0].startswith("rerank")
|
||||
if with_rerank and docs:
|
||||
# forward to rerank
|
||||
# prepare inputs for rerank
|
||||
next_data["query"] = data["initial_query"]
|
||||
next_data["texts"] = [doc["text"] for doc in data["retrieved_docs"]]
|
||||
else:
|
||||
# forward to llm
|
||||
if not docs and with_rerank:
|
||||
# delete the rerank from retriever -> rerank -> llm
|
||||
for ds in reversed(runtime_graph.downstream(cur_node)):
|
||||
for nds in runtime_graph.downstream(ds):
|
||||
runtime_graph.add_edge(cur_node, nds)
|
||||
runtime_graph.delete_node_if_exists(ds)
|
||||
|
||||
# handle template
|
||||
# if user provides template, then format the prompt with it
|
||||
# otherwise, use the default template
|
||||
prompt = data["initial_query"]
|
||||
chat_template = llm_parameters_dict["chat_template"]
|
||||
if chat_template:
|
||||
prompt_template = PromptTemplate.from_template(chat_template)
|
||||
input_variables = prompt_template.input_variables
|
||||
if sorted(input_variables) == ["context", "question"]:
|
||||
prompt = prompt_template.format(question=data["initial_query"], context="\n".join(docs))
|
||||
elif input_variables == ["question"]:
|
||||
prompt = prompt_template.format(question=data["initial_query"])
|
||||
else:
|
||||
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
|
||||
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
|
||||
else:
|
||||
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
|
||||
|
||||
next_data["inputs"] = prompt
|
||||
|
||||
elif self.services[cur_node].service_type == ServiceType.RERANK:
|
||||
# rerank the inputs with the scores
|
||||
reranker_parameters = kwargs.get("reranker_parameters", None)
|
||||
top_n = reranker_parameters.top_n if reranker_parameters else 1
|
||||
docs = inputs["texts"]
|
||||
reranked_docs = []
|
||||
for best_response in data[:top_n]:
|
||||
reranked_docs.append(docs[best_response["index"]])
|
||||
|
||||
# handle template
|
||||
# if user provides template, then format the prompt with it
|
||||
# otherwise, use the default template
|
||||
prompt = inputs["query"]
|
||||
chat_template = llm_parameters_dict["chat_template"]
|
||||
if chat_template:
|
||||
prompt_template = PromptTemplate.from_template(chat_template)
|
||||
input_variables = prompt_template.input_variables
|
||||
if sorted(input_variables) == ["context", "question"]:
|
||||
prompt = prompt_template.format(question=prompt, context="\n".join(docs))
|
||||
elif input_variables == ["question"]:
|
||||
prompt = prompt_template.format(question=prompt)
|
||||
else:
|
||||
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
|
||||
prompt = ChatTemplate.generate_rag_prompt(prompt, docs)
|
||||
else:
|
||||
prompt = ChatTemplate.generate_rag_prompt(prompt, docs)
|
||||
|
||||
next_data["inputs"] = prompt
|
||||
|
||||
return next_data
|
||||
|
||||
|
||||
def align_generator(self, gen, **kwargs):
|
||||
# openai reaponse format
|
||||
# b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
|
||||
for line in gen:
|
||||
line = line.decode("utf-8")
|
||||
start = line.find("{")
|
||||
end = line.rfind("}") + 1
|
||||
|
||||
json_str = line[start:end]
|
||||
try:
|
||||
# sometimes yield empty chunk, do a fallback here
|
||||
json_data = json.loads(json_str)
|
||||
if json_data["choices"][0]["finish_reason"] != "eos_token":
|
||||
yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
|
||||
except Exception as e:
|
||||
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
|
||||
class ChatQnAService:
|
||||
def __init__(self, host="0.0.0.0", port=8000):
|
||||
self.host = host
|
||||
self.port = port
|
||||
ServiceOrchestrator.align_inputs = align_inputs
|
||||
ServiceOrchestrator.align_outputs = align_outputs
|
||||
ServiceOrchestrator.align_generator = align_generator
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
|
||||
def add_remote_service(self):
|
||||
|
||||
embedding = MicroService(
|
||||
name="embedding",
|
||||
host=EMBEDDING_SERVER_HOST_IP,
|
||||
port=EMBEDDING_SERVER_PORT,
|
||||
endpoint="/embed",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
)
|
||||
|
||||
retriever = MicroService(
|
||||
name="retriever",
|
||||
host=RETRIEVER_SERVICE_HOST_IP,
|
||||
port=RETRIEVER_SERVICE_PORT,
|
||||
endpoint="/v1/retrieval",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RETRIEVER,
|
||||
)
|
||||
|
||||
rerank = MicroService(
|
||||
name="rerank",
|
||||
host=RERANK_SERVER_HOST_IP,
|
||||
port=RERANK_SERVER_PORT,
|
||||
endpoint="/rerank",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RERANK,
|
||||
)
|
||||
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVER_HOST_IP,
|
||||
port=LLM_SERVER_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
self.megaservice.add(embedding).add(retriever).add(rerank).add(llm)
|
||||
self.megaservice.flow_to(embedding, retriever)
|
||||
self.megaservice.flow_to(retriever, rerank)
|
||||
self.megaservice.flow_to(rerank, llm)
|
||||
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
def add_remote_service_without_rerank(self):
|
||||
|
||||
embedding = MicroService(
|
||||
name="embedding",
|
||||
host=EMBEDDING_SERVER_HOST_IP,
|
||||
port=EMBEDDING_SERVER_PORT,
|
||||
endpoint="/embed",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
)
|
||||
|
||||
retriever = MicroService(
|
||||
name="retriever",
|
||||
host=RETRIEVER_SERVICE_HOST_IP,
|
||||
port=RETRIEVER_SERVICE_PORT,
|
||||
endpoint="/v1/retrieval",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RETRIEVER,
|
||||
)
|
||||
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVER_HOST_IP,
|
||||
port=LLM_SERVER_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
self.megaservice.add(embedding).add(retriever).add(llm)
|
||||
self.megaservice.flow_to(embedding, retriever)
|
||||
self.megaservice.flow_to(retriever, llm)
|
||||
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--without-rerank", action="store_true")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
|
||||
if args.without_rerank:
|
||||
chatqna.add_remote_service_without_rerank()
|
||||
else:
|
||||
chatqna.add_remote_service()
|
||||
57
ChatQnA/chatqna_without_rerank.py
Normal file
57
ChatQnA/chatqna_without_rerank.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
|
||||
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
|
||||
|
||||
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
|
||||
EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
|
||||
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
|
||||
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
|
||||
|
||||
|
||||
class ChatQnAService:
|
||||
def __init__(self, host="0.0.0.0", port=8000):
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
|
||||
def add_remote_service(self):
|
||||
embedding = MicroService(
|
||||
name="embedding",
|
||||
host=EMBEDDING_SERVICE_HOST_IP,
|
||||
port=EMBEDDING_SERVICE_PORT,
|
||||
endpoint="/v1/embeddings",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
)
|
||||
retriever = MicroService(
|
||||
name="retriever",
|
||||
host=RETRIEVER_SERVICE_HOST_IP,
|
||||
port=RETRIEVER_SERVICE_PORT,
|
||||
endpoint="/v1/retrieval",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RETRIEVER,
|
||||
)
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVICE_HOST_IP,
|
||||
port=LLM_SERVICE_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
self.megaservice.add(embedding).add(retriever).add(llm)
|
||||
self.megaservice.flow_to(embedding, retriever)
|
||||
self.megaservice.flow_to(retriever, llm)
|
||||
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
|
||||
chatqna.add_remote_service()
|
||||
@@ -2,111 +2,11 @@
|
||||
|
||||
This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on AIPC. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
We use [Ollama](https://ollama.com/) as our LLM service for AIPC.
|
||||
|
||||
Please follow the instructions to set up Ollama on your PC. This will set the entrypoint needed for the Ollama to suit the ChatQnA examples.
|
||||
|
||||
### Set Up Ollama LLM Service
|
||||
|
||||
#### Install Ollama Service
|
||||
|
||||
Install Ollama service with one command:
|
||||
|
||||
```
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
```
|
||||
|
||||
#### Set Ollama Service Configuration
|
||||
|
||||
Ollama Service Configuration file is /etc/systemd/system/ollama.service. Edit the file to set OLLAMA_HOST environment.
|
||||
Replace **<host_ip>** with your host IPV4 (please use external public IP). For example the host_ip is 10.132.x.y, then `Environment="OLLAMA_HOST=10.132.x.y:11434"'.
|
||||
|
||||
```
|
||||
Environment="OLLAMA_HOST=host_ip:11434"
|
||||
```
|
||||
|
||||
#### Set https_proxy environment for Ollama
|
||||
|
||||
If your system access network through proxy, add https_proxy in Ollama Service Configuration file
|
||||
|
||||
```
|
||||
Environment="https_proxy=Your_HTTPS_Proxy"
|
||||
```
|
||||
|
||||
#### Restart Ollama services
|
||||
|
||||
```
|
||||
$ sudo systemctl daemon-reload
|
||||
$ sudo systemctl restart ollama.service
|
||||
```
|
||||
|
||||
#### Check the service started
|
||||
|
||||
```
|
||||
netstat -tuln | grep 11434
|
||||
```
|
||||
|
||||
The output are:
|
||||
|
||||
```
|
||||
tcp 0 0 10.132.x.y:11434 0.0.0.0:* LISTEN
|
||||
```
|
||||
|
||||
#### Pull Ollama LLM model
|
||||
|
||||
Run the command to download LLM models. The <host_ip> is the one set in [Ollama Service Configuration](#Set-Ollama-Service-Configuration)
|
||||
|
||||
```
|
||||
export host_ip=<host_ip>
|
||||
export OLLAMA_HOST=http://${host_ip}:11434
|
||||
ollama pull llama3.2
|
||||
```
|
||||
|
||||
After downloaded the models, you can list the models by `ollama list`.
|
||||
|
||||
The output should be similar to the following:
|
||||
|
||||
```
|
||||
NAME ID SIZE MODIFIED
|
||||
llama3.2:latest a80c4f17acd5 2.0 GB 2 minutes ago
|
||||
```
|
||||
|
||||
### Consume Ollama LLM Service
|
||||
|
||||
Access ollama service to verify that the ollama is functioning correctly.
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:11434/api/generate -d '{"model": "llama3.2", "prompt":"What is Deep Learning?"}'
|
||||
```
|
||||
|
||||
The outputs are similar to these:
|
||||
|
||||
```
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.098813868Z","response":"Deep","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.124514468Z","response":" learning","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.149754216Z","response":" is","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.180420784Z","response":" a","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.229185873Z","response":" subset","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.263956118Z","response":" of","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.289097354Z","response":" machine","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.316838918Z","response":" learning","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.342309506Z","response":" that","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.367221264Z","response":" involves","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.39205893Z","response":" the","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.417933974Z","response":" use","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.443110388Z","response":" of","done":false}
|
||||
...
|
||||
```
|
||||
|
||||
## 🚀 Build Docker Images
|
||||
|
||||
First of all, you need to build Docker Images locally and install the python package of it.
|
||||
|
||||
```bash
|
||||
mkdir ~/OPEA -p
|
||||
cd ~/OPEA
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
```
|
||||
@@ -116,60 +16,112 @@ If you are in a proxy environment, set the proxy-related environment variables:
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
|
||||
### 1. Build Retriever Image
|
||||
### 1. Build Embedding Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build Retriever Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 2 Build LLM Image
|
||||
### 3. Build Rerank Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
|
||||
```
|
||||
|
||||
### 4. Set up Ollama Service and Build LLM Image
|
||||
|
||||
We use [Ollama](https://ollama.com/) as our LLM service for AIPC.
|
||||
|
||||
Please set up Ollama on your PC follow the instructions. This will set the entrypoint needed for the Ollama to suit the ChatQnA examples.
|
||||
|
||||
#### 4.1 Set Up Ollama LLM Service
|
||||
|
||||
Install Ollama service with one command
|
||||
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
|
||||
##### Set Ollama Service Configuration
|
||||
|
||||
Ollama Service Configuration file is /etc/systemd/system/ollama.service. Edit the file to set OLLAMA_HOST environment (Replace **${host_ip}** with your host IPV4).
|
||||
|
||||
```
|
||||
Environment="OLLAMA_HOST=${host_ip}:11434"
|
||||
```
|
||||
|
||||
##### Set https_proxy environment for Ollama
|
||||
|
||||
if your system access network through proxy, add https_proxy in Ollama Service Configuration file
|
||||
|
||||
```
|
||||
Environment="https_proxy="Your_HTTPS_Proxy"
|
||||
```
|
||||
|
||||
##### Restart Ollam services
|
||||
|
||||
```
|
||||
$ sudo systemctl daemon-reload
|
||||
$ sudo systemctl restart ollama.service
|
||||
```
|
||||
|
||||
##### Pull LLM model
|
||||
|
||||
```
|
||||
#export OLLAMA_HOST=http://${host_ip}:11434
|
||||
#ollama pull llam3
|
||||
#ollama lists
|
||||
NAME ID SIZE MODIFIED
|
||||
llama3:latest 365c0bd3c000 4.7 GB 5 days ago
|
||||
```
|
||||
|
||||
#### 4.2 Build LLM Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build Dataprep Image
|
||||
### 5. Build Dataprep Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
|
||||
cd ..
|
||||
```
|
||||
|
||||
### 4. Build MegaService Docker Image
|
||||
### 6. Build MegaService Docker Image
|
||||
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:
|
||||
|
||||
```bash
|
||||
cd ~/OPEA
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
cd GenAIExamples/ChatQnA
|
||||
docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||
docker build --no-cache -t opea/chatqna:latest -f Dockerfile .
|
||||
cd ../../..
|
||||
```
|
||||
|
||||
### 5. Build UI Docker Image
|
||||
### 7. Build UI Docker Image
|
||||
|
||||
Build frontend Docker image via below command:
|
||||
|
||||
```bash
|
||||
cd ~/OPEA/GenAIExamples/ChatQnA/ui
|
||||
cd GenAIExamples/ChatQnA/ui
|
||||
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
|
||||
cd ../../../..
|
||||
```
|
||||
|
||||
### 6. Build Nginx Docker Image
|
||||
|
||||
```bash
|
||||
cd GenAIComps
|
||||
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have the following 6 Docker Images:
|
||||
Then run the command `docker images`, you will have the following 7 Docker Images:
|
||||
|
||||
1. `opea/dataprep-redis:latest`
|
||||
2. `opea/retriever-redis:latest`
|
||||
3. `opea/llm-ollama:latest`
|
||||
4. `opea/chatqna:latest`
|
||||
5. `opea/chatqna-ui:latest`
|
||||
6. `opea/nginx:latest`
|
||||
2. `opea/embedding-tei:latest`
|
||||
3. `opea/retriever-redis:latest`
|
||||
4. `opea/reranking-tei:latest`
|
||||
5. `opea/llm-ollama:latest`
|
||||
6. `opea/chatqna:latest`
|
||||
7. `opea/chatqna-ui:latest`
|
||||
|
||||
## 🚀 Start Microservices
|
||||
|
||||
@@ -209,10 +161,21 @@ export http_proxy=${your_http_proxy}
|
||||
export https_proxy=${your_http_proxy}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
||||
|
||||
export OLLAMA_ENDPOINT=http://${host_ip}:11434
|
||||
export OLLAMA_MODEL="llama3.2"
|
||||
export OLLAMA_MODEL="llama3"
|
||||
```
|
||||
|
||||
- Windows PC
|
||||
@@ -220,10 +183,21 @@ export OLLAMA_MODEL="llama3.2"
|
||||
```bash
|
||||
set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
|
||||
set RERANK_MODEL_ID=BAAI/bge-reranker-base
|
||||
set TEI_EMBEDDING_ENDPOINT=http://%host_ip%:6006
|
||||
set TEI_RERANKING_ENDPOINT=http://%host_ip%:8808
|
||||
set REDIS_URL=redis://%host_ip%:6379
|
||||
set INDEX_NAME=rag-redis
|
||||
set HUGGINGFACEHUB_API_TOKEN=%your_hf_api_token%
|
||||
set MEGA_SERVICE_HOST_IP=%host_ip%
|
||||
set EMBEDDING_SERVICE_HOST_IP=%host_ip%
|
||||
set RETRIEVER_SERVICE_HOST_IP=%host_ip%
|
||||
set RERANK_SERVICE_HOST_IP=%host_ip%
|
||||
set LLM_SERVICE_HOST_IP=%host_ip%
|
||||
set BACKEND_SERVICE_ENDPOINT=http://%host_ip%:8888/v1/chatqna
|
||||
set DATAPREP_SERVICE_ENDPOINT=http://%host_ip%:6007/v1/dataprep
|
||||
|
||||
set OLLAMA_ENDPOINT=http://host.docker.internal:11434
|
||||
set OLLAMA_MODEL="llama3.2"
|
||||
set OLLAMA_MODEL="llama3"
|
||||
```
|
||||
|
||||
Note: Please replace with `host_ip` with you external IP address, do not use localhost.
|
||||
@@ -233,8 +207,14 @@ Note: Please replace with `host_ip` with you external IP address, do not use loc
|
||||
> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file
|
||||
|
||||
```bash
|
||||
cd ~/OPEA/GenAIExamples/ChatQnA/docker_compose/intel/cpu/aipc/
|
||||
cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/aipc/
|
||||
docker compose up -d
|
||||
|
||||
# let ollama service runs
|
||||
# e.g. ollama run llama3
|
||||
OLLAMA_HOST=${host_ip}:11434 ollama run $OLLAMA_MODEL
|
||||
# for windows
|
||||
# ollama run %OLLAMA_MODEL%
|
||||
```
|
||||
|
||||
### Validate Microservices
|
||||
@@ -251,7 +231,16 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
2. Retriever Microservice
|
||||
2. Embedding Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:6000/v1/embeddings\
|
||||
-X POST \
|
||||
-d '{"text":"hello"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. Retriever Microservice
|
||||
To validate the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:
|
||||
|
||||
```bash
|
||||
@@ -262,7 +251,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. TEI Reranking Service
|
||||
4. TEI Reranking Service
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8808/rerank \
|
||||
@@ -271,13 +260,22 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
4. Ollama Service
|
||||
5. Reranking Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:11434/api/generate -d '{"model": "llama3.2", "prompt":"What is Deep Learning?"}'
|
||||
curl http://${host_ip}:8000/v1/reranking\
|
||||
-X POST \
|
||||
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
5. LLM Microservice
|
||||
6. Ollama Service
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:11434/api/generate -d '{"model": "llama3", "prompt":"What is Deep Learning?"}'
|
||||
```
|
||||
|
||||
7. LLM Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:9000/v1/chat/completions\
|
||||
@@ -286,51 +284,37 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
6. MegaService
|
||||
8. MegaService
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
|
||||
"messages": "What is the revenue of Nike in 2023?"
|
||||
"messages": "What is the revenue of Nike in 2023?", "model": "'"${OLLAMA_MODEL}"'"
|
||||
}'
|
||||
```
|
||||
|
||||
7. Upload RAG Files through Dataprep Microservice (Optional)
|
||||
9. Dataprep Microservice(Optional)
|
||||
|
||||
To chat with retrieved information, you need to upload a file using Dataprep service.
|
||||
If you want to update the default knowledge base, you can use the following commands:
|
||||
|
||||
Here is an example of Nike 2023 pdf file.
|
||||
Update Knowledge Base via Local File Upload:
|
||||
|
||||
```bash
|
||||
# download pdf file
|
||||
wget https://raw.githubusercontent.com/opea-project/GenAIComps/main/comps/retrievers/redis/data/nke-10k-2023.pdf
|
||||
```bash
|
||||
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F "files=@./nke-10k-2023.pdf"
|
||||
```
|
||||
|
||||
# upload pdf file with dataprep
|
||||
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F "files=@./nke-10k-2023.pdf"
|
||||
```
|
||||
This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
|
||||
|
||||
This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
|
||||
Add Knowledge Base via HTTP Links:
|
||||
|
||||
Alternatively, you can add knowledge base via HTTP Links:
|
||||
```bash
|
||||
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F 'link_list=["https://opea.dev"]'
|
||||
```
|
||||
|
||||
```bash
|
||||
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F 'link_list=["https://opea.dev"]'
|
||||
```
|
||||
|
||||
This command updates a knowledge base by submitting a list of HTTP links for processing.
|
||||
|
||||
To check the uploaded files, you are able to get the file list that uploaded:
|
||||
|
||||
```bash
|
||||
curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
|
||||
-H "Content-Type: application/json"
|
||||
```
|
||||
|
||||
the output is:
|
||||
`[{"name":"nke-10k-2023.pdf","id":"nke-10k-2023.pdf","type":"File","parent":""}]`
|
||||
This command updates a knowledge base by submitting a list of HTTP links for processing.
|
||||
|
||||
## 🚀 Launch the UI
|
||||
|
||||
|
||||
@@ -13,17 +13,15 @@ services:
|
||||
container_name: dataprep-redis-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6007:6007"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
@@ -38,6 +36,20 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
@@ -50,11 +62,9 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
@@ -72,6 +82,23 @@ services:
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
container_name: reranking-tei-aipc-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
ports:
|
||||
- "8000:8000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-ollama
|
||||
container_name: llm-ollama
|
||||
@@ -82,6 +109,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
@@ -92,10 +120,11 @@ services:
|
||||
container_name: chatqna-aipc-backend-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- dataprep-redis-service
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- reranking
|
||||
- llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
@@ -103,15 +132,11 @@ services:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=chaqna-aipc-backend-server
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=80
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- RERANK_SERVER_HOST_IP=tei-reranking-service
|
||||
- RERANK_SERVER_PORT=80
|
||||
- LLM_SERVER_HOST_IP=llm
|
||||
- LLM_SERVER_PORT=9000
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
ipc: host
|
||||
restart: always
|
||||
chaqna-aipc-ui-server:
|
||||
@@ -125,27 +150,8 @@ services:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
ipc: host
|
||||
restart: always
|
||||
chaqna-aipc-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: chaqna-aipc-nginx-server
|
||||
depends_on:
|
||||
- chaqna-aipc-backend-server
|
||||
- chaqna-aipc-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
|
||||
- FRONTEND_SERVICE_PORT=5173
|
||||
- BACKEND_SERVICE_NAME=chatqna
|
||||
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
|
||||
- BACKEND_SERVICE_PORT=8888
|
||||
- DATAPREP_SERVICE_IP=dataprep-redis-service
|
||||
- DATAPREP_SERVICE_PORT=6007
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
if [ -z "${your_hf_api_token}" ]; then
|
||||
echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set your_hf_api_token."
|
||||
fi
|
||||
|
||||
if [ -z "${host_ip}" ]; then
|
||||
echo "Error: host_ip is not set. Please set host_ip first."
|
||||
fi
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export OLLAMA_ENDPOINT=http://${host_ip}:11434
|
||||
export OLLAMA_MODEL="llama3.2"
|
||||
@@ -97,20 +97,61 @@ After launching your instance, you can connect to it using SSH (for Linux instan
|
||||
|
||||
First of all, you need to build Docker Images locally and install the python package of it.
|
||||
|
||||
### 1. Build Retriever Image
|
||||
### 1. Build Embedding Image
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build Retriever Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build Dataprep Image
|
||||
### 3. Build Rerank Image
|
||||
|
||||
> Skip for ChatQnA without Rerank pipeline
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
|
||||
```
|
||||
|
||||
### 4. Build LLM Image
|
||||
|
||||
#### Use TGI as backend
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
```
|
||||
|
||||
#### Use vLLM as backend
|
||||
|
||||
Build vLLM docker.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/vllm-project/vllm.git
|
||||
cd ./vllm/
|
||||
docker build --no-cache -t opea/vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.cpu .
|
||||
cd ..
|
||||
```
|
||||
|
||||
Build microservice.
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Build Dataprep Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
|
||||
cd ..
|
||||
```
|
||||
|
||||
### 3. Build MegaService Docker Image
|
||||
### 6. Build MegaService Docker Image
|
||||
|
||||
1. MegaService with Rerank
|
||||
|
||||
@@ -132,7 +173,7 @@ cd ..
|
||||
docker build --no-cache -t opea/chatqna-without-rerank:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.without_rerank .
|
||||
```
|
||||
|
||||
### 4. Build UI Docker Image
|
||||
### 7. Build UI Docker Image
|
||||
|
||||
Build frontend Docker image via below command:
|
||||
|
||||
@@ -141,7 +182,7 @@ cd GenAIExamples/ChatQnA/ui
|
||||
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Build Conversational React UI Docker Image (Optional)
|
||||
### 8. Build Conversational React UI Docker Image (Optional)
|
||||
|
||||
Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command:
|
||||
|
||||
@@ -152,20 +193,23 @@ cd GenAIExamples/ChatQnA/ui
|
||||
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
|
||||
```
|
||||
|
||||
### 6. Build Nginx Docker Image
|
||||
### 9. Build Nginx Docker Image
|
||||
|
||||
```bash
|
||||
cd GenAIComps
|
||||
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have the following 5 Docker Images:
|
||||
Then run the command `docker images`, you will have the following 8 Docker Images:
|
||||
|
||||
1. `opea/dataprep-redis:latest`
|
||||
2. `opea/retriever-redis:latest`
|
||||
3. `opea/chatqna:latest` or `opea/chatqna-without-rerank:latest`
|
||||
4. `opea/chatqna-ui:latest`
|
||||
5. `opea/nginx:latest`
|
||||
2. `opea/embedding-tei:latest`
|
||||
3. `opea/retriever-redis:latest`
|
||||
4. `opea/reranking-tei:latest`
|
||||
5. `opea/llm-tgi:latest` or `opea/llm-vllm:latest`
|
||||
6. `opea/chatqna:latest` or `opea/chatqna-without-rerank:latest`
|
||||
7. `opea/chatqna-ui:latest`
|
||||
8. `opea/nginx:latest`
|
||||
|
||||
## 🚀 Start Microservices
|
||||
|
||||
@@ -271,7 +315,16 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
2. Retriever Microservice
|
||||
2. Embedding Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:6000/v1/embeddings\
|
||||
-X POST \
|
||||
-d '{"text":"hello"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. Retriever Microservice
|
||||
|
||||
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
|
||||
is determined by the embedding model.
|
||||
@@ -287,7 +340,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. TEI Reranking Service
|
||||
4. TEI Reranking Service
|
||||
|
||||
> Skip for ChatQnA without Rerank pipeline
|
||||
|
||||
@@ -298,7 +351,18 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
4. LLM backend Service
|
||||
5. Reranking Microservice
|
||||
|
||||
> Skip for ChatQnA without Rerank pipeline
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8000/v1/reranking\
|
||||
-X POST \
|
||||
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
6. LLM backend Service
|
||||
|
||||
In first startup, this service will take more time to download the model files. After it's finished, the service will be ready.
|
||||
|
||||
@@ -331,7 +395,31 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-d '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}'
|
||||
```
|
||||
|
||||
5. MegaService
|
||||
7. LLM Microservice
|
||||
|
||||
This service depends on above LLM backend service startup. It will be ready after long time, to wait for them being ready in first startup.
|
||||
|
||||
```bash
|
||||
# TGI service
|
||||
curl http://${host_ip}:9000/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
For parameters in TGI modes, please refer to [HuggingFace InferenceClient API](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation) (except we rename "max_new_tokens" to "max_tokens".)
|
||||
|
||||
```bash
|
||||
# vLLM Service
|
||||
curl http://${host_ip}:9000/v1/chat/completions \
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":1,"temperature":0.7,"frequency_penalty":0,"presence_penalty":0, "streaming":false}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
For parameters in vLLM modes, can refer to [LangChain VLLMOpenAI API](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.vllm.VLLMOpenAI.html)
|
||||
|
||||
8. MegaService
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
|
||||
@@ -339,7 +427,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
}'
|
||||
```
|
||||
|
||||
6. Nginx Service
|
||||
9. Nginx Service
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \
|
||||
@@ -347,7 +435,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-d '{"messages": "What is the revenue of Nike in 2023?"}'
|
||||
```
|
||||
|
||||
7. Dataprep Microservice(Optional)
|
||||
10. Dataprep Microservice(Optional)
|
||||
|
||||
If you want to update the default knowledge base, you can use the following commands:
|
||||
|
||||
|
||||
@@ -70,20 +70,38 @@ git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
```
|
||||
|
||||
### 1. Build Retriever Image
|
||||
### 1. Build Embedding Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build Retriever Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/retriever-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/qdrant/haystack/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build Dataprep Image
|
||||
### 3. Build Rerank Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .`
|
||||
```
|
||||
|
||||
### 4. Build LLM Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Build Dataprep Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/dataprep-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/qdrant/langchain/Dockerfile .
|
||||
cd ..
|
||||
```
|
||||
|
||||
### 3. Build MegaService Docker Image
|
||||
### 6. Build MegaService Docker Image
|
||||
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:
|
||||
|
||||
@@ -94,7 +112,7 @@ docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_pr
|
||||
cd ../../..
|
||||
```
|
||||
|
||||
### 4. Build UI Docker Image
|
||||
### 7. Build UI Docker Image
|
||||
|
||||
Build frontend Docker image via below command:
|
||||
|
||||
@@ -104,7 +122,7 @@ docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https
|
||||
cd ../../../..
|
||||
```
|
||||
|
||||
### 5. Build Conversational React UI Docker Image (Optional)
|
||||
### 8. Build Conversational React UI Docker Image (Optional)
|
||||
|
||||
Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command:
|
||||
|
||||
@@ -118,20 +136,15 @@ docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https
|
||||
cd ../../../..
|
||||
```
|
||||
|
||||
### 6. Build Nginx Docker Image
|
||||
|
||||
```bash
|
||||
cd GenAIComps
|
||||
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have the following 5 Docker Images:
|
||||
Then run the command `docker images`, you will have the following 7 Docker Images:
|
||||
|
||||
1. `opea/dataprep-qdrant:latest`
|
||||
2. `opea/retriever-qdrant:latest`
|
||||
3. `opea/chatqna:latest`
|
||||
4. `opea/chatqna-ui:latest`
|
||||
5. `opea/nginx:latest`
|
||||
2. `opea/embedding-tei:latest`
|
||||
3. `opea/retriever-qdrant:latest`
|
||||
4. `opea/reranking-tei:latest`
|
||||
5. `opea/llm-tgi:latest`
|
||||
6. `opea/chatqna:latest`
|
||||
7. `opea/chatqna-ui:latest`
|
||||
|
||||
## 🚀 Start Microservices
|
||||
|
||||
@@ -180,7 +193,20 @@ export https_proxy=${your_http_proxy}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6040"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:6041"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:6042"
|
||||
export QDRANT_HOST=${host_ip}
|
||||
export QDRANT_PORT=6333
|
||||
export INDEX_NAME="rag-qdrant"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8912/v1/chatqna"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6043/v1/dataprep"
|
||||
```
|
||||
|
||||
Note: Please replace with `host_ip` with you external IP address, do not use localhost.
|
||||
@@ -208,7 +234,16 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
2. Retriever Microservice
|
||||
2. Embedding Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:6044/v1/embeddings\
|
||||
-X POST \
|
||||
-d '{"text":"hello"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. Retriever Microservice
|
||||
|
||||
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
|
||||
is determined by the embedding model.
|
||||
@@ -224,7 +259,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. TEI Reranking Service
|
||||
4. TEI Reranking Service
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:6041/rerank \
|
||||
@@ -233,7 +268,16 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
4. TGI Service
|
||||
5. Reranking Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:6046/v1/reranking\
|
||||
-X POST \
|
||||
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
6. TGI Service
|
||||
|
||||
In first startup, this service will take more time to download the model files. After it's finished, the service will be ready.
|
||||
|
||||
@@ -258,7 +302,16 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
5. MegaService
|
||||
7. LLM Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:6047/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
8. MegaService
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8912/v1/chatqna -H "Content-Type: application/json" -d '{
|
||||
@@ -266,7 +319,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
}'
|
||||
```
|
||||
|
||||
6. Dataprep Microservice(Optional)
|
||||
9. Dataprep Microservice(Optional)
|
||||
|
||||
If you want to update the default knowledge base, you can use the following commands:
|
||||
|
||||
|
||||
@@ -20,10 +20,10 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_HOST: ${REDIS_HOST}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
@@ -38,6 +38,20 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
@@ -50,10 +64,9 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
@@ -72,6 +85,23 @@ services:
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
container_name: reranking-tei-xeon-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
ports:
|
||||
- "8000:8000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
container_name: tgi-service
|
||||
@@ -88,65 +118,83 @@ services:
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
chatqna-xeon-backend-server:
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
chaqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
container_name: chatqna-xeon-backend-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- dataprep-redis-service
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- reranking
|
||||
- tgi-service
|
||||
- llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- RERANK_SERVER_HOST_IP=tei-reranking-service
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=tgi-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-xeon-ui-server:
|
||||
chaqna-xeon-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-xeon-ui-server
|
||||
depends_on:
|
||||
- chatqna-xeon-backend-server
|
||||
- chaqna-xeon-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
|
||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-xeon-nginx-server:
|
||||
chaqna-xeon-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: chatqna-xeon-nginx-server
|
||||
container_name: chaqna-xeon-nginx-server
|
||||
depends_on:
|
||||
- chatqna-xeon-backend-server
|
||||
- chatqna-xeon-ui-server
|
||||
- chaqna-xeon-backend-server
|
||||
- chaqna-xeon-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
|
||||
- FRONTEND_SERVICE_PORT=5173
|
||||
- BACKEND_SERVICE_NAME=chatqna
|
||||
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
|
||||
- BACKEND_SERVICE_PORT=8888
|
||||
- DATAPREP_SERVICE_IP=dataprep-redis-service
|
||||
- DATAPREP_SERVICE_PORT=6007
|
||||
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
|
||||
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
|
||||
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
|
||||
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
|
||||
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
|
||||
184
ChatQnA/docker_compose/intel/cpu/xeon/compose_no_wrapper.yaml
Normal file
184
ChatQnA/docker_compose/intel/cpu/xeon/compose_no_wrapper.yaml
Normal file
@@ -0,0 +1,184 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
redis-vector-db:
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
container_name: redis-vector-db
|
||||
ports:
|
||||
- "6379:6379"
|
||||
- "8001:8001"
|
||||
dataprep-redis-service:
|
||||
image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
|
||||
container_name: dataprep-redis-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6007:6007"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_HOST: ${REDIS_HOST}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-embedding-server
|
||||
ports:
|
||||
- "6006:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
# embedding:
|
||||
# image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
# container_name: embedding-tei-server
|
||||
# depends_on:
|
||||
# - tei-embedding-service
|
||||
# ports:
|
||||
# - "6000:6000"
|
||||
# ipc: host
|
||||
# environment:
|
||||
# no_proxy: ${no_proxy}
|
||||
# http_proxy: ${http_proxy}
|
||||
# https_proxy: ${https_proxy}
|
||||
# TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
# restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
ports:
|
||||
- "7000:7000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
# reranking:
|
||||
# image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
# container_name: reranking-tei-xeon-server
|
||||
# depends_on:
|
||||
# - tei-reranking-service
|
||||
# ports:
|
||||
# - "8000:8000"
|
||||
# ipc: host
|
||||
# environment:
|
||||
# no_proxy: ${no_proxy}
|
||||
# http_proxy: ${http_proxy}
|
||||
# https_proxy: ${https_proxy}
|
||||
# TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
# HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
# HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
# HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
# restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "9009:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
# llm:
|
||||
# image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
# container_name: llm-tgi-server
|
||||
# depends_on:
|
||||
# - tgi-service
|
||||
# ports:
|
||||
# - "9000:9000"
|
||||
# ipc: host
|
||||
# environment:
|
||||
# no_proxy: ${no_proxy}
|
||||
# http_proxy: ${http_proxy}
|
||||
# https_proxy: ${https_proxy}
|
||||
# TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
# HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
# HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
# HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
# restart: unless-stopped
|
||||
chaqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-no-wrapper:${TAG:-latest}
|
||||
container_name: chatqna-xeon-backend-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
# - embedding
|
||||
- dataprep-redis-service
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
# - reranking
|
||||
- tgi-service
|
||||
# - llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVER_HOST_IP}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RERANK_SERVER_HOST_IP=${RERANK_SERVER_HOST_IP}
|
||||
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
|
||||
ipc: host
|
||||
restart: always
|
||||
chaqna-xeon-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-xeon-ui-server
|
||||
depends_on:
|
||||
- chaqna-xeon-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
|
||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
@@ -20,10 +20,10 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
QDRANT_HOST: qdrant-vector-db
|
||||
QDRANT_HOST: ${QDRANT_HOST}
|
||||
QDRANT_PORT: 6333
|
||||
COLLECTION_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
@@ -38,6 +38,20 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6044:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-qdrant:${TAG:-latest}
|
||||
container_name: retriever-qdrant-server
|
||||
@@ -50,10 +64,9 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
QDRANT_HOST: qdrant-vector-db
|
||||
QDRANT_HOST: ${QDRANT_HOST}
|
||||
QDRANT_PORT: 6333
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
@@ -71,6 +84,23 @@ services:
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
container_name: reranking-tei-xeon-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
ports:
|
||||
- "6046:8000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
container_name: tgi-service
|
||||
@@ -87,65 +117,67 @@ services:
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
chatqna-xeon-backend-server:
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "6047:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
chaqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
container_name: chatqna-xeon-backend-server
|
||||
depends_on:
|
||||
- qdrant-vector-db
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- reranking
|
||||
- tgi-service
|
||||
- llm
|
||||
ports:
|
||||
- "8912:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- RETRIEVER_SERVICE_PORT=${RETRIEVER_SERVICE_PORT:-7000}
|
||||
- RERANK_SERVER_HOST_IP=tei-reranking-service
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=tgi-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_PORT=${EMBEDDING_SERVICE_PORT}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RETRIEVER_SERVICE_PORT=${RETRIEVER_SERVICE_PORT}
|
||||
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
|
||||
- RERANK_SERVICE_PORT=${RERANK_SERVICE_PORT}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-xeon-ui-server:
|
||||
chaqna-xeon-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-xeon-ui-server
|
||||
depends_on:
|
||||
- chatqna-xeon-backend-server
|
||||
- chaqna-xeon-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-xeon-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: chatqna-xeon-nginx-server
|
||||
depends_on:
|
||||
- chatqna-xeon-backend-server
|
||||
- chatqna-xeon-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
|
||||
- FRONTEND_SERVICE_PORT=5173
|
||||
- BACKEND_SERVICE_NAME=chatqna
|
||||
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
|
||||
- BACKEND_SERVICE_PORT=8888
|
||||
- DATAPREP_SERVICE_IP=dataprep-qdrant-service
|
||||
- DATAPREP_SERVICE_PORT=6007
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
|
||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
|
||||
@@ -20,10 +20,9 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
@@ -38,6 +37,23 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-embedding-service"
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
@@ -50,10 +66,12 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-retriever-service"
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
@@ -72,6 +90,26 @@ services:
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
container_name: reranking-tei-xeon-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
ports:
|
||||
- "8000:8000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-reranking-service"
|
||||
restart: unless-stopped
|
||||
vllm_service:
|
||||
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
|
||||
container_name: vllm-service
|
||||
@@ -87,64 +125,67 @@ services:
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
|
||||
chatqna-xeon-backend-server:
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
|
||||
container_name: llm-vllm-server
|
||||
depends_on:
|
||||
- vllm_service
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
vLLM_ENDPOINT: ${vLLM_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_MODEL: ${LLM_MODEL_ID}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-llm-service"
|
||||
restart: unless-stopped
|
||||
chaqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
container_name: chatqna-xeon-backend-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- reranking
|
||||
- vllm_service
|
||||
- llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- RERANK_SERVER_HOST_IP=tei-reranking-service
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=vllm_service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-xeon-ui-server:
|
||||
chaqna-xeon-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-xeon-ui-server
|
||||
depends_on:
|
||||
- chatqna-xeon-backend-server
|
||||
- chaqna-xeon-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-xeon-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: chatqna-xeon-nginx-server
|
||||
depends_on:
|
||||
- chatqna-xeon-backend-server
|
||||
- chatqna-xeon-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
|
||||
- FRONTEND_SERVICE_PORT=5173
|
||||
- BACKEND_SERVICE_NAME=chatqna
|
||||
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
|
||||
- BACKEND_SERVICE_PORT=8888
|
||||
- DATAPREP_SERVICE_IP=dataprep-redis-service
|
||||
- DATAPREP_SERVICE_PORT=6007
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
|
||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
|
||||
@@ -20,10 +20,10 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_HOST: ${REDIS_HOST}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
@@ -38,6 +38,20 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
@@ -50,10 +64,9 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
@@ -72,62 +85,61 @@ services:
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
chatqna-xeon-backend-server:
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
chaqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-without-rerank:${TAG:-latest}
|
||||
container_name: chatqna-xeon-backend-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- dataprep-redis-service
|
||||
- retriever
|
||||
- tgi-service
|
||||
- llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- LLM_SERVER_HOST_IP=tgi-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-xeon-ui-server:
|
||||
chaqna-xeon-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-xeon-ui-server
|
||||
depends_on:
|
||||
- chatqna-xeon-backend-server
|
||||
- chaqna-xeon-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-xeon-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: chatqna-xeon-nginx-server
|
||||
depends_on:
|
||||
- chatqna-xeon-backend-server
|
||||
- chatqna-xeon-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
|
||||
- FRONTEND_SERVICE_PORT=5173
|
||||
- BACKEND_SERVICE_NAME=chatqna
|
||||
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
|
||||
- BACKEND_SERVICE_PORT=8888
|
||||
- DATAPREP_SERVICE_IP=dataprep-redis-service
|
||||
- DATAPREP_SERVICE_PORT=6007
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
|
||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
|
||||
@@ -7,4 +7,24 @@
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
|
||||
export vLLM_LLM_ENDPOINT="http://${host_ip}:9009"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export REDIS_HOST=${host_ip}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
|
||||
export FRONTEND_SERVICE_IP=${host_ip}
|
||||
export FRONTEND_SERVICE_PORT=5173
|
||||
export BACKEND_SERVICE_NAME=chatqna
|
||||
export BACKEND_SERVICE_IP=${host_ip}
|
||||
export BACKEND_SERVICE_PORT=8888
|
||||
|
||||
@@ -70,19 +70,73 @@ curl http://${host_ip}:8888/v1/chatqna \
|
||||
|
||||
First of all, you need to build Docker Images locally. This step can be ignored after the Docker images published to Docker hub.
|
||||
|
||||
### 1. Build Retriever Image
|
||||
### 1. Build Embedding Image
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build Retriever Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build Dataprep Image
|
||||
### 3. Build Rerank Image
|
||||
|
||||
> Skip for ChatQnA without Rerank pipeline
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
|
||||
```
|
||||
|
||||
### 4. Build LLM Image
|
||||
|
||||
You can use different LLM serving solutions, choose one of following four options.
|
||||
|
||||
#### 4.1 Use TGI
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
```
|
||||
|
||||
#### 4.2 Use VLLM
|
||||
|
||||
Build vllm docker.
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-vllm-hpu:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_hpu .
|
||||
```
|
||||
|
||||
Build microservice docker.
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
#### 4.3 Use VLLM-on-Ray
|
||||
|
||||
Build vllm-on-ray docker.
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-vllm-ray-hpu:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/ray/dependency/Dockerfile .
|
||||
```
|
||||
|
||||
Build microservice docker.
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-vllm-ray:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/ray/Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Build Dataprep Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build Guardrails Docker Image (Optional)
|
||||
### 6. Build Guardrails Docker Image (Optional)
|
||||
|
||||
To fortify AI initiatives in production, Guardrails microservice can secure model inputs and outputs, building Trustworthy, Safe, and Secure LLM-based Applications.
|
||||
|
||||
@@ -90,7 +144,7 @@ To fortify AI initiatives in production, Guardrails microservice can secure mode
|
||||
docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/llama_guard/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 4. Build MegaService Docker Image
|
||||
### 7. Build MegaService Docker Image
|
||||
|
||||
1. MegaService with Rerank
|
||||
|
||||
@@ -122,7 +176,7 @@ docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy
|
||||
docker build --no-cache -t opea/chatqna-without-rerank:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.without_rerank .
|
||||
```
|
||||
|
||||
### 5. Build UI Docker Image
|
||||
### 8. Build UI Docker Image
|
||||
|
||||
Construct the frontend Docker image using the command below:
|
||||
|
||||
@@ -131,7 +185,7 @@ cd GenAIExamples/ChatQnA/ui
|
||||
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
|
||||
```
|
||||
|
||||
### 6. Build Conversational React UI Docker Image (Optional)
|
||||
### 9. Build Conversational React UI Docker Image (Optional)
|
||||
|
||||
Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command:
|
||||
|
||||
@@ -142,18 +196,21 @@ cd GenAIExamples/ChatQnA/ui
|
||||
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
|
||||
```
|
||||
|
||||
### 7. Build Nginx Docker Image
|
||||
### 10. Build Nginx Docker Image
|
||||
|
||||
```bash
|
||||
cd GenAIComps
|
||||
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have the following 5 Docker Images:
|
||||
Then run the command `docker images`, you will have the following 8 Docker Images:
|
||||
|
||||
- `opea/embedding-tei:latest`
|
||||
- `opea/retriever-redis:latest`
|
||||
- `opea/reranking-tei:latest`
|
||||
- `opea/llm-tgi:latest` or `opea/llm-vllm:latest` or `opea/llm-vllm-ray:latest`
|
||||
- `opea/dataprep-redis:latest`
|
||||
- `opea/chatqna:latest`
|
||||
- `opea/chatqna:latest` or `opea/chatqna-guardrails:latest` or `opea/chatqna-without-rerank:latest`
|
||||
- `opea/chatqna-ui:latest`
|
||||
- `opea/nginx:latest`
|
||||
|
||||
@@ -281,7 +338,16 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
2. Retriever Microservice
|
||||
2. Embedding Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:6000/v1/embeddings \
|
||||
-X POST \
|
||||
-d '{"text":"hello"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. Retriever Microservice
|
||||
|
||||
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
|
||||
is determined by the embedding model.
|
||||
@@ -297,7 +363,7 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. TEI Reranking Service
|
||||
4. TEI Reranking Service
|
||||
|
||||
> Skip for ChatQnA without Rerank pipeline
|
||||
|
||||
@@ -308,7 +374,18 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
4. LLM backend Service
|
||||
5. Reranking Microservice
|
||||
|
||||
> Skip for ChatQnA without Rerank pipeline
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8000/v1/reranking \
|
||||
-X POST \
|
||||
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
6. LLM backend Service
|
||||
|
||||
In first startup, this service will take more time to download the model files. After it's finished, the service will be ready.
|
||||
|
||||
@@ -353,7 +430,39 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
|
||||
-d '{"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
|
||||
```
|
||||
|
||||
5. MegaService
|
||||
7. LLM Microservice
|
||||
|
||||
```bash
|
||||
# TGI service
|
||||
curl http://${host_ip}:9000/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
For parameters in TGI mode, please refer to [HuggingFace InferenceClient API](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation) (except we rename "max_new_tokens" to "max_tokens".)
|
||||
|
||||
```bash
|
||||
# vLLM Service
|
||||
curl http://${host_ip}:9000/v1/chat/completions \
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":1,"temperature":0.7,"frequency_penalty":0,"presence_penalty":0, "streaming":false}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
For parameters in vLLM Mode, can refer to [LangChain VLLMOpenAI API](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.vllm.VLLMOpenAI.html)
|
||||
|
||||
```bash
|
||||
# vLLM-on-Ray Service
|
||||
curl http://${host_ip}:9000/v1/chat/completions \
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"presence_penalty":1.03","streaming":false}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
For parameters in vLLM-on-Ray mode, can refer to [LangChain ChatOpenAI API](https://python.langchain.com/v0.2/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html)
|
||||
|
||||
8. MegaService
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
|
||||
@@ -361,7 +470,7 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
|
||||
}'
|
||||
```
|
||||
|
||||
6. Nginx Service
|
||||
9. Nginx Service
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \
|
||||
@@ -369,7 +478,7 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
|
||||
-d '{"messages": "What is the revenue of Nike in 2023?"}'
|
||||
```
|
||||
|
||||
7. Dataprep Microservice(Optional)
|
||||
10. Dataprep Microservice(Optional)
|
||||
|
||||
If you want to update the default knowledge base, you can use the following commands:
|
||||
|
||||
@@ -438,7 +547,7 @@ curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
|
||||
-H "Content-Type: application/json"
|
||||
```
|
||||
|
||||
8. Guardrails (Optional)
|
||||
10. Guardrails (Optional)
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:9090/v1/guardrails\
|
||||
@@ -454,7 +563,7 @@ curl http://${host_ip}:9090/v1/guardrails\
|
||||
To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
|
||||
|
||||
```yaml
|
||||
chatqna-gaudi-ui-server:
|
||||
chaqna-gaudi-ui-server:
|
||||
image: opea/chatqna-ui:latest
|
||||
...
|
||||
ports:
|
||||
@@ -467,10 +576,10 @@ If you want to launch the UI using Nginx, open this URL: `http://${host_ip}:${NG
|
||||
|
||||
## 🚀 Launch the Conversational UI (Optional)
|
||||
|
||||
To access the Conversational UI (react based) frontend, modify the UI service in the `compose.yaml` file. Replace `chatqna-gaudi-ui-server` service with the `chatqna-gaudi-conversation-ui-server` service as per the config below:
|
||||
To access the Conversational UI (react based) frontend, modify the UI service in the `compose.yaml` file. Replace `chaqna-gaudi-ui-server` service with the `chatqna-gaudi-conversation-ui-server` service as per the config below:
|
||||
|
||||
```yaml
|
||||
chatqna-gaudi-conversation-ui-server:
|
||||
chaqna-gaudi-conversation-ui-server:
|
||||
image: opea/chatqna-conversation-ui:latest
|
||||
container_name: chatqna-gaudi-conversation-ui-server
|
||||
environment:
|
||||
@@ -479,7 +588,7 @@ chatqna-gaudi-conversation-ui-server:
|
||||
ports:
|
||||
- "5174:80"
|
||||
depends_on:
|
||||
- chatqna-gaudi-backend-server
|
||||
- chaqna-gaudi-backend-server
|
||||
ipc: host
|
||||
restart: always
|
||||
```
|
||||
@@ -487,7 +596,7 @@ chatqna-gaudi-conversation-ui-server:
|
||||
Once the services are up, open the following URL in your browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
|
||||
|
||||
```yaml
|
||||
chatqna-gaudi-conversation-ui-server:
|
||||
chaqna-gaudi-conversation-ui-server:
|
||||
image: opea/chatqna-conversation-ui:latest
|
||||
...
|
||||
ports:
|
||||
|
||||
@@ -20,10 +20,9 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
@@ -40,12 +39,26 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
HABANA_VISIBLE_DEVICES: ${tei_embedding_devices}
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
INIT_HCCL_ON_ACQUIRE: 0
|
||||
ENABLE_EXPERIMENTAL_FLAGS: true
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
@@ -58,33 +71,42 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-gaudi-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
container_name: reranking-tei-gaudi-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
ports:
|
||||
- "8000:8000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
@@ -99,7 +121,7 @@ services:
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
HABANA_VISIBLE_DEVICES: ${llm_service_devices}
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
@@ -109,65 +131,83 @@ services:
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096
|
||||
chatqna-gaudi-backend-server:
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-gaudi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
chaqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-backend-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- reranking
|
||||
- tgi-service
|
||||
- llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=chatqna-gaudi-backend-server
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- RERANK_SERVER_HOST_IP=tei-reranking-service
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=tgi-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-gaudi-ui-server:
|
||||
chaqna-gaudi-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-ui-server
|
||||
depends_on:
|
||||
- chatqna-gaudi-backend-server
|
||||
- chaqna-gaudi-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
|
||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-gaudi-nginx-server:
|
||||
chaqna-gaudi-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-nginx-server
|
||||
container_name: chaqna-gaudi-nginx-server
|
||||
depends_on:
|
||||
- chatqna-gaudi-backend-server
|
||||
- chatqna-gaudi-ui-server
|
||||
- chaqna-gaudi-backend-server
|
||||
- chaqna-gaudi-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=chatqna-gaudi-ui-server
|
||||
- FRONTEND_SERVICE_PORT=5173
|
||||
- BACKEND_SERVICE_NAME=chatqna
|
||||
- BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
|
||||
- BACKEND_SERVICE_PORT=8888
|
||||
- DATAPREP_SERVICE_IP=dataprep-redis-service
|
||||
- DATAPREP_SERVICE_PORT=6007
|
||||
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
|
||||
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
|
||||
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
|
||||
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
|
||||
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
|
||||
@@ -20,10 +20,9 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tgi-guardrails-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
@@ -60,8 +59,8 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
SAFETY_GUARD_MODEL_ID: ${GURADRAILS_MODEL_ID}
|
||||
SAFETY_GUARD_ENDPOINT: http://tgi-guardrails-service:80
|
||||
SAFETY_GUARD_MODEL_ID: ${SAFETY_GUARD_MODEL_ID}
|
||||
SAFETY_GUARD_ENDPOINT: ${SAFETY_GUARD_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-embedding-service:
|
||||
@@ -79,12 +78,24 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
@@ -97,33 +108,42 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-gaudi-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
container_name: reranking-tei-gaudi-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
ports:
|
||||
- "8000:8000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
@@ -149,7 +169,24 @@ services:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
|
||||
chatqna-gaudi-backend-server:
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-gaudi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
chaqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-guardrails:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-guardrails-server
|
||||
depends_on:
|
||||
@@ -157,60 +194,41 @@ services:
|
||||
- tgi-guardrails-service
|
||||
- guardrails
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- reranking
|
||||
- tgi-service
|
||||
- llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=chatqna-gaudi-backend-server
|
||||
- GUARDRAIL_SERVICE_HOST_IP=guardrails
|
||||
- GUARDRAIL_SERVICE_PORT=${GUARDRAIL_SERVICE_PORT:-9090}
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- RERANK_SERVER_HOST_IP=tei-reranking-service
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=tgi-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- GUARDRAIL_SERVICE_HOST_IP=${GUARDRAIL_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-gaudi-ui-server:
|
||||
chaqna-gaudi-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-ui-server
|
||||
depends_on:
|
||||
- chatqna-gaudi-backend-server
|
||||
- chaqna-gaudi-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-gaudi-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-nginx-server
|
||||
depends_on:
|
||||
- chatqna-gaudi-backend-server
|
||||
- chatqna-gaudi-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=chatqna-gaudi-ui-server
|
||||
- FRONTEND_SERVICE_PORT=5173
|
||||
- BACKEND_SERVICE_NAME=chatqna
|
||||
- BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
|
||||
- BACKEND_SERVICE_PORT=8888
|
||||
- DATAPREP_SERVICE_IP=dataprep-redis-service
|
||||
- DATAPREP_SERVICE_PORT=6007
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
|
||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
|
||||
201
ChatQnA/docker_compose/intel/hpu/gaudi/compose_no_wrapper.yaml
Normal file
201
ChatQnA/docker_compose/intel/hpu/gaudi/compose_no_wrapper.yaml
Normal file
@@ -0,0 +1,201 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
redis-vector-db:
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
container_name: redis-vector-db
|
||||
ports:
|
||||
- "6379:6379"
|
||||
- "8001:8001"
|
||||
dataprep-redis-service:
|
||||
image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
|
||||
container_name: dataprep-redis-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6007:6007"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
INIT_HCCL_ON_ACQUIRE: 0
|
||||
ENABLE_EXPERIMENTAL_FLAGS: true
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
# embedding:
|
||||
# image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
# container_name: embedding-tei-server
|
||||
# depends_on:
|
||||
# - tei-embedding-service
|
||||
# ports:
|
||||
# - "6000:6000"
|
||||
# ipc: host
|
||||
# environment:
|
||||
# no_proxy: ${no_proxy}
|
||||
# http_proxy: ${http_proxy}
|
||||
# https_proxy: ${https_proxy}
|
||||
# TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
# restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
ports:
|
||||
- "7000:7000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-gaudi-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
# reranking:
|
||||
# image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
# container_name: reranking-tei-gaudi-server
|
||||
# depends_on:
|
||||
# - tei-reranking-service
|
||||
# ports:
|
||||
# - "8000:8000"
|
||||
# ipc: host
|
||||
# environment:
|
||||
# no_proxy: ${no_proxy}
|
||||
# http_proxy: ${http_proxy}
|
||||
# https_proxy: ${https_proxy}
|
||||
# TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
# HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
# HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
# HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
# restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8005:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096
|
||||
# llm:
|
||||
# image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
# container_name: llm-tgi-gaudi-server
|
||||
# depends_on:
|
||||
# - tgi-service
|
||||
# ports:
|
||||
# - "9000:9000"
|
||||
# ipc: host
|
||||
# environment:
|
||||
# no_proxy: ${no_proxy}
|
||||
# http_proxy: ${http_proxy}
|
||||
# https_proxy: ${https_proxy}
|
||||
# TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
# HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
# HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
# HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
# restart: unless-stopped
|
||||
chaqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-no-wrapper:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-backend-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
# - embedding
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
# - reranking
|
||||
- tgi-service
|
||||
# - llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVER_HOST_IP}
|
||||
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-8090}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RERANK_SERVER_HOST_IP=${RERANK_SERVER_HOST_IP}
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-8808}
|
||||
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-8005}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
chaqna-gaudi-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-ui-server
|
||||
depends_on:
|
||||
- chaqna-gaudi-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
|
||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
@@ -20,10 +20,9 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
@@ -40,12 +39,24 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
command: --model-id ${EMBEDDING_MODEL_ID}
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
@@ -58,33 +69,42 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-gaudi-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
container_name: reranking-tei-gaudi-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
ports:
|
||||
- "8000:8000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
restart: unless-stopped
|
||||
vllm-service:
|
||||
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
|
||||
container_name: vllm-gaudi-server
|
||||
@@ -105,64 +125,63 @@ services:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"
|
||||
chatqna-gaudi-backend-server:
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
|
||||
container_name: llm-vllm-gaudi-server
|
||||
depends_on:
|
||||
- vllm-service
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
vLLM_ENDPOINT: ${vLLM_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_MODEL: ${LLM_MODEL_ID}
|
||||
restart: unless-stopped
|
||||
chaqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-backend-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- reranking
|
||||
- vllm-service
|
||||
- llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=chatqna-gaudi-backend-server
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- RERANK_SERVER_HOST_IP=tei-reranking-service
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=vllm-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-gaudi-ui-server:
|
||||
chaqna-gaudi-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-ui-server
|
||||
depends_on:
|
||||
- chatqna-gaudi-backend-server
|
||||
- chaqna-gaudi-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-gaudi-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-nginx-server
|
||||
depends_on:
|
||||
- chatqna-gaudi-backend-server
|
||||
- chatqna-gaudi-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=chatqna-gaudi-ui-server
|
||||
- FRONTEND_SERVICE_PORT=5173
|
||||
- BACKEND_SERVICE_NAME=chatqna
|
||||
- BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
|
||||
- BACKEND_SERVICE_PORT=8888
|
||||
- DATAPREP_SERVICE_IP=dataprep-redis-service
|
||||
- DATAPREP_SERVICE_PORT=6007
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
|
||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
|
||||
@@ -20,10 +20,9 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
@@ -40,12 +39,24 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
command: --model-id ${EMBEDDING_MODEL_ID}
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
@@ -58,33 +69,42 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-reranking-gaudi-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
container_name: reranking-tei-gaudi-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
ports:
|
||||
- "8000:8000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
restart: unless-stopped
|
||||
vllm-ray-service:
|
||||
image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest}
|
||||
container_name: vllm-ray-gaudi-server
|
||||
@@ -105,64 +125,63 @@ services:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: /bin/bash -c "ray start --head && python vllm_ray_openai.py --port_number 8000 --model_id_or_path $LLM_MODEL_ID --tensor_parallel_size 2 --enforce_eager True"
|
||||
chatqna-gaudi-backend-server:
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-vllm-ray:${TAG:-latest}
|
||||
container_name: llm-vllm-ray-gaudi-server
|
||||
depends_on:
|
||||
- vllm-ray-service
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
vLLM_RAY_ENDPOINT: ${vLLM_RAY_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_MODEL: ${LLM_MODEL_ID}
|
||||
restart: unless-stopped
|
||||
chaqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-backend-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- reranking
|
||||
- vllm-ray-service
|
||||
- llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=chatqna-gaudi-backend-server
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- RERANK_SERVER_HOST_IP=tei-reranking-service
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=vllm-ray-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-8000}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-gaudi-ui-server:
|
||||
chaqna-gaudi-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-ui-server
|
||||
depends_on:
|
||||
- chatqna-gaudi-backend-server
|
||||
- chaqna-gaudi-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-gaudi-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-nginx-server
|
||||
depends_on:
|
||||
- chatqna-gaudi-backend-server
|
||||
- chatqna-gaudi-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=chatqna-gaudi-ui-server
|
||||
- FRONTEND_SERVICE_PORT=5173
|
||||
- BACKEND_SERVICE_NAME=chatqna
|
||||
- BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
|
||||
- BACKEND_SERVICE_PORT=8888
|
||||
- DATAPREP_SERVICE_IP=dataprep-redis-service
|
||||
- DATAPREP_SERVICE_PORT=6007
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
|
||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
|
||||
@@ -20,10 +20,9 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
@@ -40,12 +39,26 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
INIT_HCCL_ON_ACQUIRE: 0
|
||||
ENABLE_EXPERIMENTAL_FLAGS: true
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
@@ -58,11 +71,8 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
@@ -89,61 +99,60 @@ services:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
|
||||
chatqna-gaudi-backend-server:
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-gaudi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
chaqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-without-rerank:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-backend-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- retriever
|
||||
- tgi-service
|
||||
- llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=chatqna-gaudi-backend-server
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- LLM_SERVER_HOST_IP=tgi-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-gaudi-ui-server:
|
||||
chaqna-gaudi-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-ui-server
|
||||
depends_on:
|
||||
- chatqna-gaudi-backend-server
|
||||
- chaqna-gaudi-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-gaudi-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-nginx-server
|
||||
depends_on:
|
||||
- chatqna-gaudi-backend-server
|
||||
- chatqna-gaudi-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=chatqna-gaudi-ui-server
|
||||
- FRONTEND_SERVICE_PORT=5173
|
||||
- BACKEND_SERVICE_NAME=chatqna
|
||||
- BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
|
||||
- BACKEND_SERVICE_PORT=8888
|
||||
- DATAPREP_SERVICE_IP=dataprep-redis-service
|
||||
- DATAPREP_SERVICE_PORT=6007
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
|
||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
|
||||
@@ -26,6 +26,14 @@ The warning messages point out the veriabls are **NOT** set.
|
||||
|
||||
```
|
||||
ubuntu@gaudi-vm:~/GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi$ docker compose -f ./compose.yaml up -d
|
||||
WARN[0000] The "LANGCHAIN_API_KEY" variable is not set. Defaulting to a blank string.
|
||||
WARN[0000] The "LANGCHAIN_TRACING_V2" variable is not set. Defaulting to a blank string.
|
||||
WARN[0000] The "LANGCHAIN_API_KEY" variable is not set. Defaulting to a blank string.
|
||||
WARN[0000] The "LANGCHAIN_TRACING_V2" variable is not set. Defaulting to a blank string.
|
||||
WARN[0000] The "LANGCHAIN_API_KEY" variable is not set. Defaulting to a blank string.
|
||||
WARN[0000] The "LANGCHAIN_TRACING_V2" variable is not set. Defaulting to a blank string.
|
||||
WARN[0000] The "LANGCHAIN_API_KEY" variable is not set. Defaulting to a blank string.
|
||||
WARN[0000] The "LANGCHAIN_TRACING_V2" variable is not set. Defaulting to a blank string.
|
||||
WARN[0000] /home/ubuntu/GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml: `version` is obsolete
|
||||
```
|
||||
|
||||
@@ -164,7 +172,24 @@ This test the embedding service. It sends "What is Deep Learning?" to the embedd
|
||||
|
||||
**Note**: The vector dimension are decided by the embedding model and the output value is dependent on model and input data.
|
||||
|
||||
### 2 Retriever Microservice
|
||||
### 2 Embedding Microservice
|
||||
|
||||
```
|
||||
curl http://${host_ip}:6000/v1/embeddings\
|
||||
-X POST \
|
||||
-d '{"text":"What is Deep Learning?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
This test the embedding microservice. In this test, it sends out `What is Deep Learning?` to embedding.
|
||||
Embedding microservice get input data, call embedding service to embedding data.
|
||||
Embedding server are with NO state, but microservice keep the state. There is `id` in the output of `Embedding Microservice`.
|
||||
|
||||
```
|
||||
{"id":"e8c85e588a235a4bc4747a23b3a71d8f","text":"What is Deep Learning?","embedding":[0.00030903306,-0.06356524,0.0025720573,-0.012404448,0.050649878, ..., 0.02776986,-0.0246678,0.03999176,0.037477136,-0.006806653,0.02261455,-0.04570737,-0.033122733,0.022785513,0.0160026,-0.021343587,-0.029969815,-0.0049176104]}
|
||||
```
|
||||
|
||||
### 3 Retriever Microservice
|
||||
|
||||
To consume the retriever microservice, you need to generate a mock embedding vector by Python script.
|
||||
The length of embedding vector is determined by the embedding model.
|
||||
@@ -187,7 +212,7 @@ The output is retrieved text that relevant to the input data:
|
||||
|
||||
```
|
||||
|
||||
### 3 TEI Reranking Service
|
||||
### 4 TEI Reranking Service
|
||||
|
||||
Reranking service
|
||||
|
||||
@@ -203,7 +228,24 @@ Output is:
|
||||
|
||||
It scores the input
|
||||
|
||||
### 4 TGI Service
|
||||
### 5 Reranking Microservice
|
||||
|
||||
```
|
||||
curl http://${host_ip}:8000/v1/reranking\
|
||||
-X POST \
|
||||
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
Here is the output:
|
||||
|
||||
```
|
||||
{"id":"e1eb0e44f56059fc01aa0334b1dac313","query":"Human: Answer the question based only on the following context:\n Deep learning is...\n Question: What is Deep Learning?","max_new_tokens":1024,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}
|
||||
```
|
||||
|
||||
You may notice reranking microservice are with state ('ID' and other meta data), while reranking service are not.
|
||||
|
||||
### 6 TGI Service
|
||||
|
||||
```
|
||||
curl http://${host_ip}:8008/generate \
|
||||
@@ -235,7 +277,56 @@ and the log shows model warm up, please wait for a while and try it later.
|
||||
2024-06-05T05:45:27.867833811Z 2024-06-05T05:45:27.867759Z INFO text_generation_router: router/src/main.rs:221: Warming up model
|
||||
```
|
||||
|
||||
### 5 MegaService
|
||||
### 7 LLM Microservice
|
||||
|
||||
```
|
||||
curl http://${host_ip}:9000/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
You will get generated text from LLM:
|
||||
|
||||
```
|
||||
data: b'\n'
|
||||
|
||||
data: b'\n'
|
||||
|
||||
data: b'Deep'
|
||||
|
||||
data: b' learning'
|
||||
|
||||
data: b' is'
|
||||
|
||||
data: b' a'
|
||||
|
||||
data: b' subset'
|
||||
|
||||
data: b' of'
|
||||
|
||||
data: b' machine'
|
||||
|
||||
data: b' learning'
|
||||
|
||||
data: b' that'
|
||||
|
||||
data: b' uses'
|
||||
|
||||
data: b' algorithms'
|
||||
|
||||
data: b' to'
|
||||
|
||||
data: b' learn'
|
||||
|
||||
data: b' from'
|
||||
|
||||
data: b' data'
|
||||
|
||||
data: [DONE]
|
||||
```
|
||||
|
||||
### 8 MegaService
|
||||
|
||||
```
|
||||
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
|
||||
|
||||
@@ -7,4 +7,22 @@
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8005"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
|
||||
export FRONTEND_SERVICE_IP=${host_ip}
|
||||
export FRONTEND_SERVICE_PORT=5173
|
||||
export BACKEND_SERVICE_NAME=chatqna
|
||||
export BACKEND_SERVICE_IP=${host_ip}
|
||||
export BACKEND_SERVICE_PORT=8888
|
||||
|
||||
@@ -77,19 +77,37 @@ git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
```
|
||||
|
||||
### 2. Build Retriever Image
|
||||
### 2. Build Embedding Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build Retriever Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build Dataprep Image
|
||||
### 4. Build Rerank Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Build LLM Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
```
|
||||
|
||||
### 6. Build Dataprep Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 4. Build MegaService Docker Image
|
||||
### 7. Build MegaService Docker Image
|
||||
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build the MegaService Docker image using the command below:
|
||||
|
||||
@@ -100,7 +118,7 @@ docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_pr
|
||||
cd ../../..
|
||||
```
|
||||
|
||||
### 5. Build UI Docker Image
|
||||
### 8. Build UI Docker Image
|
||||
|
||||
Construct the frontend Docker image using the command below:
|
||||
|
||||
@@ -110,7 +128,7 @@ docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https
|
||||
cd ../../../..
|
||||
```
|
||||
|
||||
### 6. Build React UI Docker Image (Optional)
|
||||
### 9. Build React UI Docker Image (Optional)
|
||||
|
||||
Construct the frontend Docker image using the command below:
|
||||
|
||||
@@ -120,20 +138,23 @@ docker build --no-cache -t opea/chatqna-react-ui:latest --build-arg https_proxy=
|
||||
cd ../../../..
|
||||
```
|
||||
|
||||
### 7. Build Nginx Docker Image
|
||||
### 10. Build Nginx Docker Image
|
||||
|
||||
```bash
|
||||
cd GenAIComps
|
||||
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have the following 5 Docker Images:
|
||||
Then run the command `docker images`, you will have the following 8 Docker Images:
|
||||
|
||||
1. `opea/retriever-redis:latest`
|
||||
2. `opea/dataprep-redis:latest`
|
||||
3. `opea/chatqna:latest`
|
||||
4. `opea/chatqna-ui:latest` or `opea/chatqna-react-ui:latest`
|
||||
5. `opea/nginx:latest`
|
||||
1. `opea/embedding-tei:latest`
|
||||
2. `opea/retriever-redis:latest`
|
||||
3. `opea/reranking-tei:latest`
|
||||
4. `opea/llm-tgi:latest`
|
||||
5. `opea/dataprep-redis:latest`
|
||||
6. `opea/chatqna:latest`
|
||||
7. `opea/chatqna-ui:latest` or `opea/chatqna-react-ui:latest`
|
||||
8. `opea/nginx:latest`
|
||||
|
||||
## 🚀 Start MicroServices and MegaService
|
||||
|
||||
@@ -194,7 +215,16 @@ docker compose up -d
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
2. Retriever Microservice
|
||||
2. Embedding Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:6000/v1/embeddings \
|
||||
-X POST \
|
||||
-d '{"text":"hello"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. Retriever Microservice
|
||||
|
||||
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
|
||||
is determined by the embedding model.
|
||||
@@ -210,7 +240,7 @@ docker compose up -d
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. TEI Reranking Service
|
||||
4. TEI Reranking Service
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8808/rerank \
|
||||
@@ -219,7 +249,16 @@ docker compose up -d
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
4. TGI Service
|
||||
5. Reranking Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8000/v1/reranking \
|
||||
-X POST \
|
||||
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
6. TGI Service
|
||||
|
||||
In first startup, this service will take more time to download the model files. After it's finished, the service will be ready.
|
||||
|
||||
@@ -244,7 +283,16 @@ docker compose up -d
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
5. MegaService
|
||||
7. LLM Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:9000/v1/chat/completions \
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
8. MegaService
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
|
||||
@@ -252,7 +300,7 @@ docker compose up -d
|
||||
}'
|
||||
```
|
||||
|
||||
6. Nginx Service
|
||||
9. Nginx Service
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \
|
||||
@@ -260,7 +308,7 @@ docker compose up -d
|
||||
-d '{"messages": "What is the revenue of Nike in 2023?"}'
|
||||
```
|
||||
|
||||
7. Dataprep Microservice(Optional)
|
||||
10. Dataprep Microservice(Optional)
|
||||
|
||||
If you want to update the default knowledge base, you can use the following commands:
|
||||
|
||||
|
||||
@@ -46,6 +46,20 @@ services:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
@@ -60,7 +74,6 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
@@ -85,6 +98,23 @@ services:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
|
||||
container_name: reranking-tei-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
ports:
|
||||
- "8000:8000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.2.0
|
||||
container_name: tgi-server
|
||||
@@ -108,15 +138,35 @@ services:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
restart: unless-stopped
|
||||
chaqna-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
container_name: chatqna-backend-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- reranking
|
||||
- tgi-service
|
||||
- llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
|
||||
@@ -8,9 +8,15 @@ export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
|
||||
|
||||
@@ -23,6 +23,18 @@ services:
|
||||
dockerfile: ./Dockerfile.without_rerank
|
||||
extends: chatqna
|
||||
image: ${REGISTRY:-opea}/chatqna-without-rerank:${TAG:-latest}
|
||||
chatqna-no-wrapper:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: ./Dockerfile.no_wrapper
|
||||
extends: chatqna
|
||||
image: ${REGISTRY:-opea}/chatqna-no-wrapper:${TAG:-latest}
|
||||
chatqna-no-wrapper-without-rerank:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: ./Dockerfile.no_wrapper_without_rerank
|
||||
extends: chatqna
|
||||
image: ${REGISTRY:-opea}/chatqna-no-wrapper-without-rerank:${TAG:-latest}
|
||||
chatqna-ui:
|
||||
build:
|
||||
context: ../ui
|
||||
|
||||
@@ -16,9 +16,12 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
|
||||
|
||||
- redis-vector-db: redis/redis-stack:7.2.0-v9
|
||||
- tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
- embedding: opea/embedding-tei:latest
|
||||
- retriever: opea/retriever-redis:latest
|
||||
- tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
- reranking: opea/reranking-tei:latest
|
||||
- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
- llm: opea/llm-tgi:latest
|
||||
- chaqna-xeon-backend-server: opea/chatqna:latest
|
||||
|
||||
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
|
||||
|
||||
@@ -27,6 +27,27 @@ data:
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc-config
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/guardrails-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -51,6 +72,50 @@ data:
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
---
|
||||
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc-config
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -143,7 +208,7 @@ metadata:
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
|
||||
PORT: "2083"
|
||||
PORT: "2080"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
@@ -297,6 +362,31 @@ spec:
|
||||
app.kubernetes.io/name: data-prep
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/embedding-usvc/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 6000
|
||||
targetPort: 6000
|
||||
protocol: TCP
|
||||
name: embedding-usvc
|
||||
selector:
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/guardrails-usvc/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -322,6 +412,31 @@ spec:
|
||||
app.kubernetes.io/name: guardrails-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/redis-vector-db/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -351,6 +466,31 @@ spec:
|
||||
app.kubernetes.io/name: redis-vector-db
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/reranking-usvc/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 8000
|
||||
targetPort: 8000
|
||||
protocol: TCP
|
||||
name: reranking-usvc
|
||||
selector:
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/retriever-usvc/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -444,7 +584,7 @@ spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 2083
|
||||
targetPort: 2080
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
@@ -557,7 +697,7 @@ spec:
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: ui
|
||||
containerPort: 5173
|
||||
containerPort: 80
|
||||
protocol: TCP
|
||||
resources:
|
||||
{}
|
||||
@@ -646,36 +786,39 @@ spec:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml
|
||||
# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-redis-vector-db
|
||||
name: chatqna-embedding-usvc
|
||||
labels:
|
||||
helm.sh/chart: redis-vector-db-1.0.0
|
||||
app.kubernetes.io/name: redis-vector-db
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "7.2.0-v9"
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: redis-vector-db
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: redis-vector-db
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: redis-vector-db
|
||||
- name: chatqna
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-embedding-usvc-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
@@ -686,35 +829,38 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "redis/redis-stack:7.2.0-v9"
|
||||
image: "opea/embedding-tei:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: embedding-usvc
|
||||
containerPort: 6000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: data-volume
|
||||
- mountPath: /redisinsight
|
||||
name: redisinsight-volume
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: redis-service
|
||||
containerPort: 6379
|
||||
protocol: TCP
|
||||
- name: redis-insight
|
||||
containerPort: 8001
|
||||
protocol: TCP
|
||||
startupProbe:
|
||||
tcpSocket:
|
||||
port: 6379 # Probe the Redis port
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: data-volume
|
||||
emptyDir: {}
|
||||
- name: redisinsight-volume
|
||||
emptyDir: {}
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
@@ -796,6 +942,234 @@ spec:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/llm-uservice/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: chatqna
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-llm-uservice-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-redis-vector-db
|
||||
labels:
|
||||
helm.sh/chart: redis-vector-db-1.0.0
|
||||
app.kubernetes.io/name: redis-vector-db
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "7.2.0-v9"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: redis-vector-db
|
||||
app.kubernetes.io/instance: chatqna
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: redis-vector-db
|
||||
app.kubernetes.io/instance: chatqna
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: redis-vector-db
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "redis/redis-stack:7.2.0-v9"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: data-volume
|
||||
- mountPath: /redisinsight
|
||||
name: redisinsight-volume
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: redis-service
|
||||
containerPort: 6379
|
||||
protocol: TCP
|
||||
- name: redis-insight
|
||||
containerPort: 8001
|
||||
protocol: TCP
|
||||
startupProbe:
|
||||
tcpSocket:
|
||||
port: 6379 # Probe the Redis port
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
failureThreshold: 120
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: data-volume
|
||||
emptyDir: {}
|
||||
- name: redisinsight-volume
|
||||
emptyDir: {}
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: chatqna
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-reranking-usvc-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/reranking-tei:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: reranking-usvc
|
||||
containerPort: 8000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -1109,7 +1483,7 @@ spec:
|
||||
name: tmp
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 2083
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
@@ -1250,24 +1624,16 @@ spec:
|
||||
containers:
|
||||
- name: chatqna
|
||||
env:
|
||||
- name: LLM_SERVER_HOST_IP
|
||||
value: chatqna-tgi
|
||||
- name: LLM_SERVER_PORT
|
||||
value: "2080"
|
||||
- name: RERANK_SERVER_HOST_IP
|
||||
value: chatqna-teirerank
|
||||
- name: RERANK_SERVER_PORT
|
||||
value: "2082"
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: chatqna-llm-uservice
|
||||
- name: RERANK_SERVICE_HOST_IP
|
||||
value: chatqna-reranking-usvc
|
||||
- name: RETRIEVER_SERVICE_HOST_IP
|
||||
value: chatqna-retriever-usvc
|
||||
- name: EMBEDDING_SERVER_HOST_IP
|
||||
value: chatqna-tei
|
||||
- name: EMBEDDING_SERVER_PORT
|
||||
value: "2081"
|
||||
- name: EMBEDDING_SERVICE_HOST_IP
|
||||
value: chatqna-embedding-usvc
|
||||
- name: GUARDRAIL_SERVICE_HOST_IP
|
||||
value: chatqna-guardrails-usvc
|
||||
- name: GUARDRAIL_SERVICE_PORT
|
||||
value: "9090"
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
|
||||
@@ -27,6 +27,71 @@ data:
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc-config
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc-config
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -247,6 +312,56 @@ spec:
|
||||
app.kubernetes.io/name: data-prep
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/embedding-usvc/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 6000
|
||||
targetPort: 6000
|
||||
protocol: TCP
|
||||
name: embedding-usvc
|
||||
selector:
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/redis-vector-db/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -276,6 +391,31 @@ spec:
|
||||
app.kubernetes.io/name: redis-vector-db
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/reranking-usvc/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 8000
|
||||
targetPort: 8000
|
||||
protocol: TCP
|
||||
name: reranking-usvc
|
||||
selector:
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/retriever-usvc/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -457,7 +597,7 @@ spec:
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: ui
|
||||
containerPort: 5173
|
||||
containerPort: 80
|
||||
protocol: TCP
|
||||
resources:
|
||||
{}
|
||||
@@ -546,6 +686,162 @@ spec:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: chatqna
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-embedding-usvc-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/embedding-tei:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: embedding-usvc
|
||||
containerPort: 6000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/llm-uservice/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: chatqna
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-llm-uservice-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -618,6 +914,84 @@ spec:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: chatqna
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-reranking-usvc-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/reranking-tei:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: reranking-usvc
|
||||
containerPort: 8000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -992,20 +1366,16 @@ spec:
|
||||
containers:
|
||||
- name: chatqna
|
||||
env:
|
||||
- name: LLM_SERVER_HOST_IP
|
||||
value: chatqna-tgi
|
||||
- name: LLM_SERVER_PORT
|
||||
value: "2080"
|
||||
- name: RERANK_SERVER_HOST_IP
|
||||
value: chatqna-teirerank
|
||||
- name: RERANK_SERVER_PORT
|
||||
value: "2082"
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: chatqna-llm-uservice
|
||||
- name: RERANK_SERVICE_HOST_IP
|
||||
value: chatqna-reranking-usvc
|
||||
- name: RETRIEVER_SERVICE_HOST_IP
|
||||
value: chatqna-retriever-usvc
|
||||
- name: EMBEDDING_SERVER_HOST_IP
|
||||
value: chatqna-tei
|
||||
- name: EMBEDDING_SERVER_PORT
|
||||
value: "2081"
|
||||
- name: EMBEDDING_SERVICE_HOST_IP
|
||||
value: chatqna-embedding-usvc
|
||||
- name: GUARDRAIL_SERVICE_HOST_IP
|
||||
value: chatqna-guardrails-usvc
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
|
||||
@@ -27,6 +27,71 @@ data:
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc-config
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc-config
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -248,6 +313,56 @@ spec:
|
||||
app.kubernetes.io/name: data-prep
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/embedding-usvc/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 6000
|
||||
targetPort: 6000
|
||||
protocol: TCP
|
||||
name: embedding-usvc
|
||||
selector:
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/redis-vector-db/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -277,6 +392,31 @@ spec:
|
||||
app.kubernetes.io/name: redis-vector-db
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/reranking-usvc/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 8000
|
||||
targetPort: 8000
|
||||
protocol: TCP
|
||||
name: reranking-usvc
|
||||
selector:
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/retriever-usvc/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -458,7 +598,7 @@ spec:
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: ui
|
||||
containerPort: 5173
|
||||
containerPort: 80
|
||||
protocol: TCP
|
||||
resources:
|
||||
{}
|
||||
@@ -547,6 +687,162 @@ spec:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: chatqna
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-embedding-usvc-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/embedding-tei:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: embedding-usvc
|
||||
containerPort: 6000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/llm-uservice/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: chatqna
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-llm-uservice-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -619,6 +915,84 @@ spec:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: chatqna
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-reranking-usvc-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/reranking-tei:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: reranking-usvc
|
||||
containerPort: 8000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -995,20 +1369,16 @@ spec:
|
||||
containers:
|
||||
- name: chatqna
|
||||
env:
|
||||
- name: LLM_SERVER_HOST_IP
|
||||
value: chatqna-tgi
|
||||
- name: LLM_SERVER_PORT
|
||||
value: "2080"
|
||||
- name: RERANK_SERVER_HOST_IP
|
||||
value: chatqna-teirerank
|
||||
- name: RERANK_SERVER_PORT
|
||||
value: "2082"
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: chatqna-llm-uservice
|
||||
- name: RERANK_SERVICE_HOST_IP
|
||||
value: chatqna-reranking-usvc
|
||||
- name: RETRIEVER_SERVICE_HOST_IP
|
||||
value: chatqna-retriever-usvc
|
||||
- name: EMBEDDING_SERVER_HOST_IP
|
||||
value: chatqna-tei
|
||||
- name: EMBEDDING_SERVER_PORT
|
||||
value: "2081"
|
||||
- name: EMBEDDING_SERVICE_HOST_IP
|
||||
value: chatqna-embedding-usvc
|
||||
- name: GUARDRAIL_SERVICE_HOST_IP
|
||||
value: chatqna-guardrails-usvc
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
|
||||
@@ -27,6 +27,27 @@ data:
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc-config
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/guardrails-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -51,6 +72,50 @@ data:
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
---
|
||||
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc-config
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -143,7 +208,7 @@ metadata:
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
|
||||
PORT: "2083"
|
||||
PORT: "2080"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
@@ -299,6 +364,31 @@ spec:
|
||||
app.kubernetes.io/name: data-prep
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/embedding-usvc/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 6000
|
||||
targetPort: 6000
|
||||
protocol: TCP
|
||||
name: embedding-usvc
|
||||
selector:
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/guardrails-usvc/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -324,6 +414,31 @@ spec:
|
||||
app.kubernetes.io/name: guardrails-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/redis-vector-db/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -353,6 +468,31 @@ spec:
|
||||
app.kubernetes.io/name: redis-vector-db
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/reranking-usvc/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 8000
|
||||
targetPort: 8000
|
||||
protocol: TCP
|
||||
name: reranking-usvc
|
||||
selector:
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
---
|
||||
# Source: chatqna/charts/retriever-usvc/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -446,7 +586,7 @@ spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 2083
|
||||
targetPort: 2080
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
@@ -559,7 +699,7 @@ spec:
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: ui
|
||||
containerPort: 5173
|
||||
containerPort: 80
|
||||
protocol: TCP
|
||||
resources:
|
||||
{}
|
||||
@@ -648,6 +788,84 @@ spec:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-embedding-usvc
|
||||
labels:
|
||||
helm.sh/chart: embedding-usvc-1.0.0
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: embedding-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: chatqna
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-embedding-usvc-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/embedding-tei:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: embedding-usvc
|
||||
containerPort: 6000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: embedding-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/guardrails-usvc/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -726,6 +944,84 @@ spec:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/llm-uservice/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-1.0.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: chatqna
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: chatqna
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-llm-uservice-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -798,6 +1094,84 @@ spec:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chatqna-reranking-usvc
|
||||
labels:
|
||||
helm.sh/chart: reranking-usvc-1.0.0
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: reranking-usvc
|
||||
app.kubernetes.io/instance: chatqna
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: chatqna
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: chatqna-reranking-usvc-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/reranking-tei:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: reranking-usvc
|
||||
containerPort: 8000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: reranking-usvc
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -1112,7 +1486,7 @@ spec:
|
||||
name: tmp
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 2083
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
@@ -1255,24 +1629,16 @@ spec:
|
||||
containers:
|
||||
- name: chatqna
|
||||
env:
|
||||
- name: LLM_SERVER_HOST_IP
|
||||
value: chatqna-tgi
|
||||
- name: LLM_SERVER_PORT
|
||||
value: "2080"
|
||||
- name: RERANK_SERVER_HOST_IP
|
||||
value: chatqna-teirerank
|
||||
- name: RERANK_SERVER_PORT
|
||||
value: "2082"
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: chatqna-llm-uservice
|
||||
- name: RERANK_SERVICE_HOST_IP
|
||||
value: chatqna-reranking-usvc
|
||||
- name: RETRIEVER_SERVICE_HOST_IP
|
||||
value: chatqna-retriever-usvc
|
||||
- name: EMBEDDING_SERVER_HOST_IP
|
||||
value: chatqna-tei
|
||||
- name: EMBEDDING_SERVER_PORT
|
||||
value: "2081"
|
||||
- name: EMBEDDING_SERVICE_HOST_IP
|
||||
value: chatqna-embedding-usvc
|
||||
- name: GUARDRAIL_SERVICE_HOST_IP
|
||||
value: chatqna-guardrails-usvc
|
||||
- name: GUARDRAIL_SERVICE_PORT
|
||||
value: "9090"
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user