Compare commits

..

1 Commits

Author SHA1 Message Date
Zhenzhong1
09fa201b30 76 -> 80 2024-09-28 21:29:04 -07:00
225 changed files with 6331 additions and 35436 deletions

8
.github/CODEOWNERS vendored
View File

@@ -1,17 +1,13 @@
/AgentQnA/ kaokao.lv@intel.com
/AgentQnA/ xuhui.ren@intel.com
/AudioQnA/ sihan.chen@intel.com
/ChatQnA/ liang1.lv@intel.com
/CodeGen/ liang1.lv@intel.com
/CodeTrans/ sihan.chen@intel.com
/DocSum/ letong.han@intel.com
/DocIndexRetriever/ kaokao.lv@intel.com chendi.xue@intel.com
/InstructionTuning xinyu.ye@intel.com
/RerankFinetuning xinyu.ye@intel.com
/MultimodalQnA tiep.le@intel.com
/DocIndexRetriever/ xuhui.ren@intel.com chendi.xue@intel.com
/FaqGen/ xinyao.wang@intel.com
/SearchQnA/ sihan.chen@intel.com
/Translation/ liang1.lv@intel.com
/VisualQnA/ liang1.lv@intel.com
/ProductivitySuite/ hoong.tee.yeoh@intel.com
/VideoQnA huiling.bao@intel.com
/*/ liang1.lv@intel.com

View File

@@ -12,10 +12,6 @@ on:
example:
required: true
type: string
services:
default: ""
required: false
type: string
tag:
default: "latest"
required: false
@@ -81,7 +77,6 @@ jobs:
with:
work_dir: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
service_list: ${{ inputs.services }}
registry: ${OPEA_IMAGE_REPO}opea
tag: ${{ inputs.tag }}
@@ -110,6 +105,7 @@ jobs:
example: ${{ inputs.example }}
hardware: ${{ inputs.node }}
tag: ${{ inputs.tag }}
context: "CD"
secrets: inherit
####################################################################################################

View File

@@ -20,6 +20,11 @@ on:
description: "Tag to apply to images, default is latest"
required: false
type: string
context:
default: "CI"
description: "CI or CD"
required: false
type: string
jobs:
manifest-test:
@@ -46,7 +51,7 @@ jobs:
- name: Set variables
run: |
echo "IMAGE_REPO=${OPEA_IMAGE_REPO}opea" >> $GITHUB_ENV
echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
echo "IMAGE_TAG=${{ inputs.tag }}" >> $GITHUB_ENV
lower_example=$(echo "${{ inputs.example }}" | tr '[:upper:]' '[:lower:]')
echo "NAMESPACE=$lower_example-$(tr -dc a-z0-9 </dev/urandom | head -c 16)" >> $GITHUB_ENV
@@ -55,6 +60,7 @@ jobs:
echo "continue_test=true" >> $GITHUB_ENV
echo "should_cleanup=false" >> $GITHUB_ENV
echo "skip_validate=true" >> $GITHUB_ENV
echo "CONTEXT=${{ inputs.context }}" >> $GITHUB_ENV
echo "NAMESPACE=$NAMESPACE"
- name: Kubectl install

View File

@@ -118,7 +118,6 @@ jobs:
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
PINECONE_KEY_LANGCHAIN_TEST: ${{ secrets.PINECONE_KEY_LANGCHAIN_TEST }}
IMAGE_REPO: ${{ inputs.registry }}
IMAGE_TAG: ${{ inputs.tag }}
example: ${{ inputs.example }}

View File

@@ -1,59 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Build specific images on manual event
on:
workflow_dispatch:
inputs:
nodes:
default: "gaudi,xeon"
description: "Hardware to run test"
required: true
type: string
example:
default: "ChatQnA"
description: 'Build images belong to which example?'
required: true
type: string
services:
default: "chatqna,chatqna-without-rerank"
description: 'Service list to build'
required: true
type: string
tag:
default: "latest"
description: "Tag to apply to images"
required: true
type: string
opea_branch:
default: "main"
description: 'OPEA branch for image build'
required: false
type: string
jobs:
get-test-matrix:
runs-on: ubuntu-latest
outputs:
nodes: ${{ steps.get-matrix.outputs.nodes }}
steps:
- name: Create Matrix
id: get-matrix
run: |
nodes=($(echo ${{ inputs.nodes }} | tr ',' ' '))
nodes_json=$(printf '%s\n' "${nodes[@]}" | sort -u | jq -R '.' | jq -sc '.')
echo "nodes=$nodes_json" >> $GITHUB_OUTPUT
image-build:
needs: get-test-matrix
strategy:
matrix:
node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
fail-fast: false
uses: ./.github/workflows/_example-workflow.yml
with:
node: ${{ matrix.node }}
example: ${{ inputs.example }}
services: ${{ inputs.services }}
tag: ${{ inputs.tag }}
opea_branch: ${{ inputs.opea_branch }}
secrets: inherit

50
.github/workflows/pr-bum_list_check.yml vendored Normal file
View File

@@ -0,0 +1,50 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Check Requirements
on: [pull_request]
jobs:
check-requirements:
runs-on: ubuntu-latest
steps:
- name: Checkout PR branch
uses: actions/checkout@v4
- name: Save PR requirements
run: |
find . -name "requirements.txt" -exec cat {} \; | \
grep -v '^\s*#' | \
grep -v '^\s*$' | \
grep -v '^\s*-' | \
sed 's/^\s*//' | \
awk -F'[>=<]' '{print $1}' | \
sort -u > pr-requirements.txt
cat pr-requirements.txt
- name: Checkout main branch
uses: actions/checkout@v4
with:
ref: main
path: main-branch
- name: Save main branch requirements
run: |
find ./main-branch -name "requirements.txt" -exec cat {} \; | \
grep -v '^\s*#' | \
grep -v '^\s*$' | \
grep -v '^\s*-' | \
sed 's/^\s*//' | \
awk -F'[>=<]' '{print $1}' | \
sort -u > main-requirements.txt
cat main-requirements.txt
- name: Compare requirements
run: |
comm -23 pr-requirements.txt main-requirements.txt > added-packages.txt
if [ -s added-packages.txt ]; then
echo "New packages found in PR:" && cat added-packages.txt
else
echo "No new packages found😊."
fi

View File

@@ -8,8 +8,6 @@ on:
branches: ["main", "*rc"]
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- "**/Dockerfile**"
- "**.py"
- "**/kubernetes/**/manifests/**"
- "**/tests/test_manifest**"
- "!**.md"

View File

@@ -50,40 +50,28 @@ jobs:
- name: Checkout Repo GenAIExamples
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Check the Validity of Hyperlinks
run: |
cd ${{github.workspace}}
fail="FALSE"
merged_commit=$(git log -1 --format='%H')
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
if [ -n "$changed_files" ]; then
for changed_file in $changed_files; do
echo $changed_file
url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIExamples/blob/main') || true
if [ -n "$url_lines" ]; then
for url_line in $url_lines; do
echo $url_line
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
if [ "$response" -ne 200 ]; then
echo "**********Validation failed, try again**********"
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
if [ "$response_retry" -eq 200 ]; then
echo "*****Retry successfully*****"
else
echo "Invalid link from ${{github.workspace}}/$path: $url"
fail="TRUE"
fi
fi
done
url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .)
if [ -n "$url_lines" ]; then
for url_line in $url_lines; do
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
if [ "$response" -ne 200 ]; then
echo "**********Validation failed, try again**********"
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
if [ "$response_retry" -eq 200 ]; then
echo "*****Retry successfully*****"
else
echo "Invalid link from ${{github.workspace}}/$path: $url"
fail="TRUE"
fi
fi
done
else
echo "No changed .md file."
fi
if [[ "$fail" == "TRUE" ]]; then
@@ -101,8 +89,6 @@ jobs:
- name: Checkout Repo GenAIExamples
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Checking Relative Path Validity
run: |
@@ -116,34 +102,33 @@ jobs:
branch="https://github.com/opea-project/GenAIExamples/blob/${{ github.event.pull_request.head.ref }}"
fi
link_head="https://github.com/opea-project/GenAIExamples/blob/main"
merged_commit=$(git log -1 --format='%H')
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http')
if [ -n "$png_lines" ]; then
for png_line in $png_lines; do
refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-)
png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1)
if [[ "${png_path:0:1}" == "/" ]]; then
check_path=$png_path
elif [[ "$png_path" == *#* ]]; then
relative_path=$(echo "$png_path" | cut -d '#' -f1)
if [ -n "$relative_path" ]; then
check_path=$(dirname "$refer_path")/$relative_path
png_path=$(echo "$png_path" | awk -F'#' '{print "#" $2}')
else
check_path=$refer_path
fi
check_path=${{github.workspace}}$png_path
elif [[ "${png_path:0:1}" == "#" ]]; then
check_path=${{github.workspace}}/$refer_path$png_path
else
check_path=$(dirname "$refer_path")/$png_path
check_path=${{github.workspace}}/$(dirname "$refer_path")/$png_path
fi
if [ -e "$check_path" ]; then
real_path=$(realpath $check_path)
if [[ "$png_line" == *#* ]]; then
if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then
url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIExamples||')$png_path
real_path=$(realpath $check_path)
if [ $? -ne 0 ]; then
echo "Path $png_path in file ${{github.workspace}}/$refer_path does not exist"
fail="TRUE"
else
url=$link_head$(echo "$real_path" | sed 's|.*/GenAIExamples||')
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url")
if [ "$response" -ne 200 ]; then
echo "**********Validation failed, try again**********"
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
if [ "$response_retry" -eq 200 ]; then
echo "*****Retry successfully*****"
else
echo "Retry failed. Check branch ${{ github.event.pull_request.head.ref }}"
url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIExamples||')
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev")
if [ "$response" -ne 200 ]; then
echo "**********Validation failed, try again**********"
@@ -155,13 +140,10 @@ jobs:
fail="TRUE"
fi
else
echo "Validation succeed $png_line"
echo "Check branch ${{ github.event.pull_request.head.ref }} successfully."
fi
fi
fi
else
echo "${{github.workspace}}/$refer_path:$png_path does not exist"
fail="TRUE"
fi
done
fi

View File

@@ -23,10 +23,12 @@ jobs:
image-build:
needs: job1
strategy:
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
matrix:
example: ${{ fromJSON(needs.job1.outputs.run_matrix).include.*.example }}
node: ["gaudi","xeon"]
fail-fast: false
uses: ./.github/workflows/_example-workflow.yml
with:
node: ${{ matrix.hardware }}
node: ${{ matrix.node }}
example: ${{ matrix.example }}
secrets: inherit

View File

@@ -79,7 +79,7 @@ repos:
- id: isort
- repo: https://github.com/PyCQA/docformatter
rev: 06907d0
rev: v1.7.5
hooks:
- id: docformatter
args: [

View File

@@ -5,73 +5,6 @@
This example showcases a hierarchical multi-agent system for question-answering applications. The architecture diagram is shown below. The supervisor agent interfaces with the user and dispatch tasks to the worker agent and other tools to gather information and come up with answers. The worker agent uses the retrieval tool to generate answers to the queries posted by the supervisor agent. Other tools used by the supervisor agent may include APIs to interface knowledge graphs, SQL databases, external knowledge bases, etc.
![Architecture Overview](assets/agent_qna_arch.png)
The AgentQnA example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
```mermaid
---
config:
flowchart:
nodeSpacing: 400
rankSpacing: 100
curve: linear
themeVariables:
fontSize: 50px
---
flowchart LR
%% Colors %%
classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
classDef invisible fill:transparent,stroke:transparent;
%% Subgraphs %%
subgraph DocIndexRetriever-MegaService["DocIndexRetriever MegaService "]
direction LR
EM([Embedding MicroService]):::blue
RET([Retrieval MicroService]):::blue
RER([Rerank MicroService]):::blue
end
subgraph UserInput[" User Input "]
direction LR
a([User Input Query]):::orchid
Ingest([Ingest data]):::orchid
end
AG_REACT([Agent MicroService - react]):::blue
AG_RAG([Agent MicroService - rag]):::blue
LLM_gen{{LLM Service <br>}}
DP([Data Preparation MicroService]):::blue
TEI_RER{{Reranking service<br>}}
TEI_EM{{Embedding service <br>}}
VDB{{Vector DB<br><br>}}
R_RET{{Retriever service <br>}}
%% Questions interaction
direction LR
a[User Input Query] --> AG_REACT
AG_REACT --> AG_RAG
AG_RAG --> DocIndexRetriever-MegaService
EM ==> RET
RET ==> RER
Ingest[Ingest data] --> DP
%% Embedding service flow
direction LR
AG_RAG <-.-> LLM_gen
AG_REACT <-.-> LLM_gen
EM <-.-> TEI_EM
RET <-.-> R_RET
RER <-.-> TEI_RER
direction TB
%% Vector DB interaction
R_RET <-.-> VDB
DP <-.-> VDB
```
### Why Agent for question answering?
1. Improve relevancy of retrieved context.

View File

@@ -2,63 +2,6 @@
AudioQnA is an example that demonstrates the integration of Generative AI (GenAI) models for performing question-answering (QnA) on audio files, with the added functionality of Text-to-Speech (TTS) for generating spoken responses. The example showcases how to convert audio input to text using Automatic Speech Recognition (ASR), generate answers to user queries using a language model, and then convert those answers back to speech using Text-to-Speech (TTS).
The AudioQnA example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
```mermaid
---
config:
flowchart:
nodeSpacing: 400
rankSpacing: 100
curve: linear
themeVariables:
fontSize: 50px
---
flowchart LR
%% Colors %%
classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
classDef invisible fill:transparent,stroke:transparent;
style AudioQnA-MegaService stroke:#000000
%% Subgraphs %%
subgraph AudioQnA-MegaService["AudioQnA MegaService "]
direction LR
ASR([ASR MicroService]):::blue
LLM([LLM MicroService]):::blue
TTS([TTS MicroService]):::blue
end
subgraph UserInterface[" User Interface "]
direction LR
a([User Input Query]):::orchid
UI([UI server<br>]):::orchid
end
WSP_SRV{{whisper service<br>}}
SPC_SRV{{speecht5 service <br>}}
LLM_gen{{LLM Service <br>}}
GW([AudioQnA GateWay<br>]):::orange
%% Questions interaction
direction LR
a[User Audio Query] --> UI
UI --> GW
GW <==> AudioQnA-MegaService
ASR ==> LLM
LLM ==> TTS
%% Embedding service flow
direction LR
ASR <-.-> WSP_SRV
LLM <-.-> LLM_gen
TTS <-.-> SPC_SRV
```
## Deploy AudioQnA Service
The AudioQnA service can be deployed on either Intel Gaudi2 or Intel Xeon Scalable Processor.

View File

@@ -1,98 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import asyncio
import base64
import os
from comps import AudioQnAGateway, MicroService, ServiceOrchestrator, ServiceType
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
WHISPER_SERVER_HOST_IP = os.getenv("WHISPER_SERVER_HOST_IP", "0.0.0.0")
WHISPER_SERVER_PORT = int(os.getenv("WHISPER_SERVER_PORT", 7066))
GPT_SOVITS_SERVER_HOST_IP = os.getenv("GPT_SOVITS_SERVER_HOST_IP", "0.0.0.0")
GPT_SOVITS_SERVER_PORT = int(os.getenv("GPT_SOVITS_SERVER_PORT", 9088))
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888))
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
print(inputs)
if self.services[cur_node].service_type == ServiceType.ASR:
# {'byte_str': 'UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA'}
inputs["audio"] = inputs["byte_str"]
del inputs["byte_str"]
elif self.services[cur_node].service_type == ServiceType.LLM:
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
next_inputs = {}
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
next_inputs["top_p"] = llm_parameters_dict["top_p"]
next_inputs["stream"] = inputs["streaming"] # False as default
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
# next_inputs["presence_penalty"] = inputs["presence_penalty"]
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
next_inputs["temperature"] = inputs["temperature"]
inputs = next_inputs
elif self.services[cur_node].service_type == ServiceType.TTS:
next_inputs = {}
next_inputs["text"] = inputs["choices"][0]["message"]["content"]
next_inputs["text_language"] = kwargs["tts_text_language"] if "tts_text_language" in kwargs else "zh"
inputs = next_inputs
return inputs
def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
if self.services[cur_node].service_type == ServiceType.TTS:
audio_base64 = base64.b64encode(data).decode("utf-8")
return {"byte_str": audio_base64}
return data
class AudioQnAService:
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
ServiceOrchestrator.align_inputs = align_inputs
ServiceOrchestrator.align_outputs = align_outputs
self.megaservice = ServiceOrchestrator()
def add_remote_service(self):
asr = MicroService(
name="asr",
host=WHISPER_SERVER_HOST_IP,
port=WHISPER_SERVER_PORT,
# endpoint="/v1/audio/transcriptions",
endpoint="/v1/asr",
use_remote_service=True,
service_type=ServiceType.ASR,
)
llm = MicroService(
name="llm",
host=LLM_SERVER_HOST_IP,
port=LLM_SERVER_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
)
tts = MicroService(
name="tts",
host=GPT_SOVITS_SERVER_HOST_IP,
port=GPT_SOVITS_SERVER_PORT,
# endpoint="/v1/audio/speech",
endpoint="/",
use_remote_service=True,
service_type=ServiceType.TTS,
)
self.megaservice.add(asr).add(llm).add(tts)
self.megaservice.flow_to(asr, llm)
self.megaservice.flow_to(llm, tts)
self.gateway = AudioQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
if __name__ == "__main__":
audioqna = AudioQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
audioqna.add_remote_service()

View File

@@ -1,4 +1,4 @@
# AudioQnA Accuracy
# AudioQnA accuracy Evaluation
AudioQnA is an example that demonstrates the integration of Generative AI (GenAI) models for performing question-answering (QnA) on audio scene, which contains Automatic Speech Recognition (ASR) and Text-to-Speech (TTS). The following is the piepline for evaluating the ASR accuracy.

View File

@@ -1,5 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
python online_evaluate.py

View File

@@ -127,13 +127,9 @@ curl http://${host_ip}:3002/v1/audio/speech \
## 🚀 Test MegaService
Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the
base64 string to the megaservice endpoint. The megaservice will return a spoken response as a base64 string. To listen
to the response, decode the base64 string and save it as a .wav file.
```bash
curl http://${host_ip}:3008/v1/audioqna \
-X POST \
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
-H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
-H 'Content-Type: application/json'
```

View File

@@ -1,64 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
whisper-service:
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
container_name: whisper-service
ports:
- "7066:7066"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
restart: unless-stopped
command: --language "zh"
gpt-sovits-service:
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
container_name: gpt-sovits-service
ports:
- "9880:9880"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
container_name: tgi-service
ports:
- "3006:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
audioqna-xeon-backend-server:
image: ${REGISTRY:-opea}/audioqna-multilang:${TAG:-latest}
container_name: audioqna-xeon-backend-server
ports:
- "3008:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
- LLM_MODEL_ID=${LLM_MODEL_ID}
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
- GPT_SOVITS_SERVER_HOST_IP=${GPT_SOVITS_SERVER_HOST_IP}
- GPT_SOVITS_SERVER_PORT=${GPT_SOVITS_SERVER_PORT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -79,8 +79,6 @@ export LLM_SERVICE_PORT=3007
## 🚀 Start the MegaService
> **_NOTE:_** Users will need at least three Gaudi cards for AudioQnA.
```bash
cd GenAIExamples/AudioQnA/docker_compose/intel/hpu/gaudi/
docker compose up -d
@@ -129,13 +127,9 @@ curl http://${host_ip}:3002/v1/audio/speech \
## 🚀 Test MegaService
Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the
base64 string to the megaservice endpoint. The megaservice will return a spoken response as a base64 string. To listen
to the response, decode the base64 string and save it as a .wav file.
```bash
curl http://${host_ip}:3008/v1/audioqna \
-X POST \
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
-H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
-H 'Content-Type: application/json'
```

View File

@@ -53,9 +53,3 @@ services:
dockerfile: comps/tts/speecht5/Dockerfile
extends: audioqna
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
gpt-sovits:
build:
context: GenAIComps
dockerfile: comps/tts/gpt-sovits/Dockerfile
extends: audioqna
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}

View File

@@ -19,8 +19,7 @@ RUN git clone https://github.com/opea-project/GenAIComps.git
WORKDIR /home/user/GenAIComps
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
pip install --no-cache-dir langchain_core
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
COPY ./chatqna.py /home/user/chatqna.py

View File

@@ -19,10 +19,9 @@ RUN git clone https://github.com/opea-project/GenAIComps.git
WORKDIR /home/user/GenAIComps
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
pip install --no-cache-dir langchain_core
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
COPY ./chatqna.py /home/user/chatqna.py
COPY ./chatqna_guardrails.py /home/user/chatqna_guardrails.py
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
@@ -32,4 +31,4 @@ WORKDIR /home/user
RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
ENTRYPOINT ["python", "chatqna.py", "--with-guardrails"]
ENTRYPOINT ["python", "chatqna_guardrails.py"]

View File

@@ -8,6 +8,7 @@ FROM python:3.11-slim
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
libgl1-mesa-glx \
libjemalloc-dev \
vim \
git
RUN useradd -m -s /bin/bash user && \
@@ -19,9 +20,10 @@ RUN git clone https://github.com/opea-project/GenAIComps.git
WORKDIR /home/user/GenAIComps
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
pip install --no-cache-dir langchain_core
COPY ./audioqna_multilang.py /home/user/audioqna_multilang.py
COPY ./chatqna_no_wrapper.py /home/user/chatqna_no_wrapper.py
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
@@ -29,4 +31,4 @@ USER user
WORKDIR /home/user
ENTRYPOINT ["python", "audioqna_multilang.py"]
ENTRYPOINT ["python", "chatqna_no_wrapper.py"]

View File

@@ -0,0 +1,34 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
FROM python:3.11-slim
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
libgl1-mesa-glx \
libjemalloc-dev \
vim \
git
RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/
WORKDIR /home/user/
RUN git clone https://github.com/opea-project/GenAIComps.git
WORKDIR /home/user/GenAIComps
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
pip install --no-cache-dir langchain_core
COPY ./chatqna_no_wrapper.py /home/user/chatqna_no_wrapper.py
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
USER user
WORKDIR /home/user
ENTRYPOINT ["python", "chatqna_no_wrapper.py", "--without-rerank"]

View File

@@ -6,9 +6,9 @@
FROM python:3.11-slim
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
git \
libgl1-mesa-glx \
libjemalloc-dev
libjemalloc-dev \
git
RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
@@ -19,10 +19,9 @@ RUN git clone https://github.com/opea-project/GenAIComps.git
WORKDIR /home/user/GenAIComps
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
pip install --no-cache-dir langchain_core
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
COPY ./chatqna.py /home/user/chatqna.py
COPY ./chatqna_without_rerank.py /home/user/chatqna_without_rerank.py
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
@@ -32,4 +31,4 @@ WORKDIR /home/user
RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
ENTRYPOINT ["python", "chatqna.py", "--without-rerank"]
ENTRYPOINT ["python", "chatqna_without_rerank.py"]

View File

@@ -1,170 +0,0 @@
# ChatQnA Accuracy
ChatQnA is a Retrieval-Augmented Generation (RAG) pipeline, which can enhance generative models through external information retrieval.
For evaluating the accuracy, we use 2 latest published datasets and 10+ metrics which are popular and comprehensive:
- Dataset
- [MultiHop](https://arxiv.org/pdf/2401.15391) (English dataset)
- [CRUD](https://arxiv.org/abs/2401.17043) (Chinese dataset)
- metrics (measure accuracy of both the context retrieval and response generation)
- evaluation for retrieval/reranking
- MRR@10
- MAP@10
- Hits@10
- Hits@4
- LLM-as-a-Judge
- evaluation for the generated response from the end-to-end pipeline
- BLEU
- ROGUE(L)
- LLM-as-a-Judge
## Prerequisite
### Environment
```bash
git clone https://github.com/opea-project/GenAIEval
cd GenAIEval
pip install -r requirements.txt
pip install -e .
```
## MultiHop (English dataset)
[MultiHop-RAG](https://arxiv.org/pdf/2401.15391): a QA dataset to evaluate retrieval and reasoning across documents with metadata in the RAG pipelines. It contains 2556 queries, with evidence for each query distributed across 2 to 4 documents. The queries also involve document metadata, reflecting complex scenarios commonly found in real-world RAG applications.
### Launch Service of RAG System
Please refer to this [guide](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/README.md) to launch the service of `ChatQnA`.
### Launch Service of LLM-as-a-Judge
To setup a LLM model, we can use [tgi-gaudi](https://github.com/huggingface/tgi-gaudi) to launch a service. For example, the follow command is to setup the [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) model on 2 Gaudi2 cards:
```
# please set your llm_port and hf_token
docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2
# for better performance, set `PREFILL_BATCH_BUCKET_SIZE`, `BATCH_BUCKET_SIZE`, `max-batch-total-tokens`, `max-batch-prefill-tokens`
docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} -e PREFILL_BATCH_BUCKET_SIZE=1 -e BATCH_BUCKET_SIZE=8 --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 2048
```
### Prepare Dataset
We use the evaluation dataset from [MultiHop-RAG](https://github.com/yixuantt/MultiHop-RAG) repo, use the below command to prepare the dataset.
```bash
git clone https://github.com/yixuantt/MultiHop-RAG.git
```
### Evaluation
Use below command to run the evaluation, please note that for the first run, argument `--ingest_docs` should be added in the command to ingest the documents into the vector database, while for the subsequent run, this argument should be omitted. Set `--retrieval_metrics` to get retrieval related metrics (MRR@10/MAP@10/Hits@10/Hits@4). Set `--ragas_metrics` and `--llm_endpoint` to get end-to-end rag pipeline metrics (faithfulness/answer_relevancy/...), which are judged by LLMs. We set `--limits` is 100 as default, which means only 100 examples are evaluated by llm-as-judge as it is very time consuming.
If you are using docker compose to deploy `ChatQnA` system, you can simply run the evaluation as following:
```bash
python eval_multihop.py --docs_path MultiHop-RAG/dataset/corpus.json --dataset_path MultiHop-RAG/dataset/MultiHopRAG.json --ingest_docs --retrieval_metrics --ragas_metrics --llm_endpoint http://{llm_as_judge_ip}:{llm_as_judge_port}/generate
```
If you are using Kubernetes manifest/helm to deploy `ChatQnA` system, you must specify more arguments as following:
```bash
python eval_multihop.py --docs_path MultiHop-RAG/dataset/corpus.json --dataset_path MultiHop-RAG/dataset/MultiHopRAG.json --ingest_docs --retrieval_metrics --ragas_metrics --llm_endpoint http://{llm_as_judge_ip}:{llm_as_judge_port}/generate --database_endpoint http://{your_dataprep_ip}:{your_dataprep_port}/v1/dataprep --embedding_endpoint http://{your_embedding_ip}:{your_embedding_port}/v1/embeddings --tei_embedding_endpoint http://{your_tei_embedding_ip}:{your_tei_embedding_port} --retrieval_endpoint http://{your_retrieval_ip}:{your_retrieval_port}/v1/retrieval --service_url http://{your_chatqna_ip}:{your_chatqna_port}/v1/chatqna
```
The default values for arguments are:
|Argument|Default value|
|--------|-------------|
|service_url|http://localhost:8888/v1/chatqna|
|database_endpoint|http://localhost:6007/v1/dataprep|
|embedding_endpoint|http://localhost:6000/v1/embeddings|
|tei_embedding_endpoint|http://localhost:8090|
|retrieval_endpoint|http://localhost:7000/v1/retrieval|
|reranking_endpoint|http://localhost:8000/v1/reranking|
|output_dir|./output|
|temperature|0.1|
|max_new_tokens|1280|
|chunk_size|256|
|chunk_overlap|100|
|search_type|similarity|
|retrival_k|10|
|fetch_k|20|
|lambda_mult|0.5|
|dataset_path|None|
|docs_path|None|
|limits|100|
You can check arguments details use below command:
```bash
python eval_multihop.py --help
```
## CRUD (Chinese dataset)
[CRUD-RAG](https://arxiv.org/abs/2401.17043) is a Chinese benchmark for RAG (Retrieval-Augmented Generation) system. This example utilize CRUD-RAG for evaluating the RAG system.
### Prepare Dataset
We use the evaluation dataset from [CRUD-RAG](https://github.com/IAAR-Shanghai/CRUD_RAG) repo, use the below command to prepare the dataset.
```bash
git clone https://github.com/IAAR-Shanghai/CRUD_RAG
mkdir data/
cp CRUD_RAG/data/crud_split/split_merged.json data/
cp -r CRUD_RAG/data/80000_docs/ data/
python process_crud_dataset.py
```
### Launch Service of RAG System
Please refer to this [guide](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/README.md) to launch the service of `ChatQnA` system. For Chinese dataset, you should replace the English emebdding and llm model with Chinese, for example, `EMBEDDING_MODEL_ID="BAAI/bge-base-zh-v1.5"` and `LLM_MODEL_ID=Qwen/Qwen2-7B-Instruct`.
### Evaluation
Use below command to run the evaluation, please note that for the first run, argument `--ingest_docs` should be added in the command to ingest the documents into the vector database, while for the subsequent run, this argument should be omitted.
If you are using docker compose to deploy `ChatQnA` system, you can simply run the evaluation as following:
```bash
python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/80000_docs --ingest_docs
# if you want to get ragas metrics
python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/80000_docs --contain_original_data --llm_endpoint "http://{llm_as_judge_ip}:{llm_as_judge_port}" --ragas_metrics
```
If you are using Kubernetes manifest/helm to deploy `ChatQnA` system, you must specify more arguments as following:
```bash
python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/80000_docs --ingest_docs --database_endpoint http://{your_dataprep_ip}:{your_dataprep_port}/v1/dataprep --embedding_endpoint http://{your_embedding_ip}:{your_embedding_port}/v1/embeddings --retrieval_endpoint http://{your_retrieval_ip}:{your_retrieval_port}/v1/retrieval --service_url http://{your_chatqna_ip}:{your_chatqna_port}/v1/chatqna
```
The default values for arguments are:
|Argument|Default value|
|--------|-------------|
|service_url|http://localhost:8888/v1/chatqna|
|database_endpoint|http://localhost:6007/v1/dataprep|
|embedding_endpoint|http://localhost:6000/v1/embeddings|
|retrieval_endpoint|http://localhost:7000/v1/retrieval|
|reranking_endpoint|http://localhost:8000/v1/reranking|
|output_dir|./output|
|temperature|0.1|
|max_new_tokens|1280|
|chunk_size|256|
|chunk_overlap|100|
|dataset_path|./data/split_merged.json|
|docs_path|./data/80000_docs|
|tasks|["question_answering"]|
You can check arguments details use below command:
```bash
python eval_crud.py --help
```
## Acknowledgements
This example is mostly adapted from [MultiHop-RAG](https://github.com/yixuantt/MultiHop-RAG) and [CRUD-RAG](https://github.com/IAAR-Shanghai/CRUD_RAG) repo, we thank the authors for their great work!

View File

@@ -1,210 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import argparse
import json
import os
from evals.evaluation.rag_eval import Evaluator
from evals.evaluation.rag_eval.template import CRUDTemplate
from evals.metrics.ragas import RagasMetric
from tqdm import tqdm
class CRUD_Evaluator(Evaluator):
def get_ground_truth_text(self, data: dict):
if self.task == "summarization":
ground_truth_text = data["summary"]
elif self.task == "question_answering":
ground_truth_text = data["answers"]
elif self.task == "continuation":
ground_truth_text = data["continuing"]
elif self.task == "hallucinated_modified":
ground_truth_text = data["hallucinatedMod"]
else:
raise NotImplementedError(
f"Unknown task {self.task}, only support "
"summarization, question_answering, continuation and hallucinated_modified."
)
return ground_truth_text
def get_query(self, data: dict):
if self.task == "summarization":
query = data["text"]
elif self.task == "question_answering":
query = data["questions"]
elif self.task == "continuation":
query = data["beginning"]
elif self.task == "hallucinated_modified":
query = data["newsBeginning"]
else:
raise NotImplementedError(
f"Unknown task {self.task}, only support "
"summarization, question_answering, continuation and hallucinated_modified."
)
return query
def get_document(self, data: dict):
if self.task == "summarization":
document = data["text"]
elif self.task == "question_answering":
document = data["news1"]
elif self.task == "continuation":
document = data["beginning"]
elif self.task == "hallucinated_modified":
document = data["newsBeginning"]
else:
raise NotImplementedError(
f"Unknown task {self.task}, only support "
"summarization, question_answering, continuation and hallucinated_modified."
)
return document
def get_template(self):
if self.task == "summarization":
template = CRUDTemplate.get_summarization_template()
elif self.task == "question_answering":
template = CRUDTemplate.get_question_answering_template()
elif self.task == "continuation":
template = CRUDTemplate.get_continuation_template()
else:
raise NotImplementedError(
f"Unknown task {self.task}, only support "
"summarization, question_answering, continuation and hallucinated_modified."
)
return template
def post_process(self, result):
return result.split("<response>")[-1].split("</response>")[0].strip()
def get_ragas_metrics(self, results, arguments):
from langchain_huggingface import HuggingFaceEndpointEmbeddings
embeddings = HuggingFaceEndpointEmbeddings(model=arguments.tei_embedding_endpoint)
metric = RagasMetric(
threshold=0.5,
model=arguments.llm_endpoint,
embeddings=embeddings,
metrics=["faithfulness", "answer_relevancy"],
)
all_answer_relevancy = 0
all_faithfulness = 0
ragas_inputs = {
"question": [],
"answer": [],
"ground_truth": [],
"contexts": [],
}
valid_results = self.remove_invalid(results["results"])
for data in tqdm(valid_results):
data = data["original_data"]
query = self.get_query(data)
generated_text = data["generated_text"]
ground_truth = data["ground_truth_text"]
retrieved_documents = data["retrieved_documents"]
ragas_inputs["question"].append(query)
ragas_inputs["answer"].append(generated_text)
ragas_inputs["ground_truth"].append(ground_truth)
ragas_inputs["contexts"].append(retrieved_documents[:3])
ragas_metrics = metric.measure(ragas_inputs)
return ragas_metrics
def args_parser():
parser = argparse.ArgumentParser()
parser.add_argument(
"--service_url", type=str, default="http://localhost:8888/v1/chatqna", help="Service URL address."
)
parser.add_argument("--output_dir", type=str, default="./output", help="Directory to save evaluation results.")
parser.add_argument(
"--temperature", type=float, default=0.1, help="Controls the randomness of the model's text generation"
)
parser.add_argument(
"--max_new_tokens", type=int, default=1280, help="Maximum number of new tokens to be generated by the model"
)
parser.add_argument(
"--chunk_size", type=int, default=256, help="the maximum number of characters that a chunk can contain"
)
parser.add_argument(
"--chunk_overlap",
type=int,
default=100,
help="the number of characters that should overlap between two adjacent chunks",
)
parser.add_argument("--dataset_path", default="../data/split_merged.json", help="Path to the dataset")
parser.add_argument("--docs_path", default="../data/80000_docs", help="Path to the retrieval documents")
# Retriever related options
parser.add_argument("--tasks", default=["question_answering"], nargs="+", help="Task to perform")
parser.add_argument("--ingest_docs", action="store_true", help="Whether to ingest documents to vector database")
parser.add_argument(
"--database_endpoint", type=str, default="http://localhost:6007/v1/dataprep", help="Service URL address."
)
parser.add_argument(
"--embedding_endpoint", type=str, default="http://localhost:6000/v1/embeddings", help="Service URL address."
)
parser.add_argument(
"--retrieval_endpoint", type=str, default="http://localhost:7000/v1/retrieval", help="Service URL address."
)
parser.add_argument(
"--tei_embedding_endpoint",
type=str,
default="http://localhost:8090",
help="Service URL address of tei embedding.",
)
parser.add_argument("--ragas_metrics", action="store_true", help="Whether to compute ragas metrics.")
parser.add_argument("--llm_endpoint", type=str, default=None, help="Service URL address.")
parser.add_argument(
"--show_progress_bar", action="store", default=True, type=bool, help="Whether to show a progress bar"
)
parser.add_argument("--contain_original_data", action="store_true", help="Whether to contain original data")
args = parser.parse_args()
return args
def main():
args = args_parser()
if os.path.isfile(args.dataset_path):
with open(args.dataset_path) as f:
all_datasets = json.load(f)
else:
raise FileNotFoundError(f"Evaluation dataset file {args.dataset_path} not exist.")
os.makedirs(args.output_dir, exist_ok=True)
for task in args.tasks:
if task == "question_answering":
dataset = all_datasets["questanswer_1doc"]
elif task == "summarization":
dataset = all_datasets["event_summary"]
else:
raise NotImplementedError(
f"Unknown task {task}, only support "
"summarization, question_answering, continuation and hallucinated_modified."
)
output_save_path = os.path.join(args.output_dir, f"{task}.json")
evaluator = CRUD_Evaluator(dataset=dataset, output_path=output_save_path, task=task)
if args.ingest_docs:
CRUD_Evaluator.ingest_docs(args.docs_path, args.database_endpoint, args.chunk_size, args.chunk_overlap)
results = evaluator.evaluate(
args, show_progress_bar=args.show_progress_bar, contain_original_data=args.contain_original_data
)
print(results["overall"])
if args.ragas_metrics:
ragas_metrics = evaluator.get_ragas_metrics(results, args)
print(ragas_metrics)
print(f"Evaluation results of task {task} saved to {output_save_path}.")
if __name__ == "__main__":
main()

View File

@@ -1,279 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import argparse
import json
import os
import requests
from evals.evaluation.rag_eval import Evaluator
from evals.metrics.ragas import RagasMetric
from evals.metrics.retrieval import RetrievalBaseMetric
from tqdm import tqdm
class MultiHop_Evaluator(Evaluator):
def get_ground_truth_text(self, data: dict):
return data["answer"]
def get_query(self, data: dict):
return data["query"]
def get_template(self):
return None
def get_reranked_documents(self, query, docs, arguments):
data = {
"initial_query": query,
"retrieved_docs": [{"text": doc} for doc in docs],
"top_n": 10,
}
headers = {"Content-Type": "application/json"}
response = requests.post(arguments.reranking_endpoint, data=json.dumps(data), headers=headers)
if response.ok:
reranked_documents = response.json()["documents"]
return reranked_documents
else:
print(f"Request for retrieval failed due to {response.text}.")
return []
def get_retrieved_documents(self, query, arguments):
data = {"text": query}
headers = {"Content-Type": "application/json"}
response = requests.post(arguments.embedding_endpoint, data=json.dumps(data), headers=headers)
if response.ok:
embedding = response.json()["embedding"]
else:
print(f"Request for embedding failed due to {response.text}.")
return []
data = {
"text": query,
"embedding": embedding,
"search_type": arguments.search_type,
"k": arguments.retrival_k,
"fetch_k": arguments.fetch_k,
"lambda_mult": arguments.lambda_mult,
}
response = requests.post(arguments.retrieval_endpoint, data=json.dumps(data), headers=headers)
if response.ok:
retrieved_documents = response.json()["retrieved_docs"]
return [doc["text"] for doc in retrieved_documents]
else:
print(f"Request for retrieval failed due to {response.text}.")
return []
def get_retrieval_metrics(self, all_queries, arguments):
print("start to retrieve...")
metric = RetrievalBaseMetric()
hits_at_10 = 0
hits_at_4 = 0
map_at_10 = 0
mrr_at_10 = 0
total = 0
for data in tqdm(all_queries):
if data["question_type"] == "null_query":
continue
query = data["query"]
retrieved_documents = self.get_retrieved_documents(query, arguments)
if arguments.rerank:
retrieved_documents = self.get_reranked_documents(query, retrieved_documents, arguments)
golden_context = [each["fact"] for each in data["evidence_list"]]
test_case = {
"input": query,
"golden_context": golden_context,
"retrieval_context": retrieved_documents,
}
results = metric.measure(test_case)
hits_at_10 += results["Hits@10"]
hits_at_4 += results["Hits@4"]
map_at_10 += results["MAP@10"]
mrr_at_10 += results["MRR@10"]
total += 1
# Calculate average metrics over all queries
hits_at_10 = hits_at_10 / total
hits_at_4 = hits_at_4 / total
map_at_10 = map_at_10 / total
mrr_at_10 = mrr_at_10 / total
return {
"Hits@10": hits_at_10,
"Hits@4": hits_at_4,
"MAP@10": map_at_10,
"MRR@10": mrr_at_10,
}
def evaluate(self, all_queries, arguments):
results = []
accuracy = 0
index = 0
for data in tqdm(all_queries):
if data["question_type"] == "null_query":
continue
generated_text = self.send_request(data, arguments)
data["generated_text"] = generated_text
# same method with paper: https://github.com/yixuantt/MultiHop-RAG/issues/8
if data["answer"] in generated_text:
accuracy += 1
result = {"id": index, **self.scoring(data)}
results.append(result)
index += 1
valid_results = self.remove_invalid(results)
try:
overall = self.compute_overall(valid_results) if len(valid_results) > 0 else {}
except Exception as e:
print(repr(e))
overall = dict()
overall.update({"accuracy": accuracy / len(results)})
return overall
def get_ragas_metrics(self, all_queries, arguments):
from langchain_huggingface import HuggingFaceEndpointEmbeddings
embeddings = HuggingFaceEndpointEmbeddings(model=arguments.tei_embedding_endpoint)
metric = RagasMetric(threshold=0.5, model=arguments.llm_endpoint, embeddings=embeddings)
all_answer_relevancy = 0
all_faithfulness = 0
ragas_inputs = {
"question": [],
"answer": [],
"ground_truth": [],
"contexts": [],
}
for data in tqdm(all_queries):
if data["question_type"] == "null_query":
continue
retrieved_documents = self.get_retrieved_documents(data["query"], arguments)
generated_text = self.send_request(data, arguments)
data["generated_text"] = generated_text
ragas_inputs["question"].append(data["query"])
ragas_inputs["answer"].append(generated_text)
ragas_inputs["ground_truth"].append(data["answer"])
ragas_inputs["contexts"].append(retrieved_documents[:3])
if len(ragas_inputs["question"]) >= arguments.limits:
break
ragas_metrics = metric.measure(ragas_inputs)
return ragas_metrics
def args_parser():
parser = argparse.ArgumentParser()
parser.add_argument(
"--service_url", type=str, default="http://localhost:8888/v1/chatqna", help="Service URL address."
)
parser.add_argument("--output_dir", type=str, default="./output", help="Directory to save evaluation results.")
parser.add_argument(
"--temperature", type=float, default=0.1, help="Controls the randomness of the model's text generation"
)
parser.add_argument(
"--max_new_tokens", type=int, default=1280, help="Maximum number of new tokens to be generated by the model"
)
parser.add_argument(
"--chunk_size", type=int, default=256, help="the maximum number of characters that a chunk can contain"
)
parser.add_argument(
"--chunk_overlap",
type=int,
default=100,
help="the number of characters that should overlap between two adjacent chunks",
)
parser.add_argument("--search_type", type=str, default="similarity", help="similarity type")
parser.add_argument("--retrival_k", type=int, default=10, help="Number of Documents to return.")
parser.add_argument(
"--fetch_k", type=int, default=20, help="Number of Documents to fetch to pass to MMR algorithm."
)
parser.add_argument(
"--lambda_mult",
type=float,
default=0.5,
help="Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.",
)
parser.add_argument("--dataset_path", default=None, help="Path to the dataset")
parser.add_argument("--docs_path", default=None, help="Path to the retrieval documents")
# Retriever related options
parser.add_argument("--ingest_docs", action="store_true", help="Whether to ingest documents to vector database")
parser.add_argument("--retrieval_metrics", action="store_true", help="Whether to compute retrieval metrics.")
parser.add_argument("--ragas_metrics", action="store_true", help="Whether to compute ragas metrics.")
parser.add_argument("--limits", type=int, default=100, help="Number of examples to be evaluated by llm-as-judge")
parser.add_argument(
"--database_endpoint", type=str, default="http://localhost:6007/v1/dataprep", help="Service URL address."
)
parser.add_argument(
"--embedding_endpoint", type=str, default="http://localhost:6000/v1/embeddings", help="Service URL address."
)
parser.add_argument(
"--tei_embedding_endpoint",
type=str,
default="http://localhost:8090",
help="Service URL address of tei embedding.",
)
parser.add_argument(
"--retrieval_endpoint", type=str, default="http://localhost:7000/v1/retrieval", help="Service URL address."
)
parser.add_argument("--rerank", action="store_true", help="Whether to use rerank microservice.")
parser.add_argument(
"--reranking_endpoint", type=str, default="http://localhost:8000/v1/reranking", help="Service URL address."
)
parser.add_argument("--llm_endpoint", type=str, default=None, help="Service URL address.")
parser.add_argument(
"--show_progress_bar", action="store", default=True, type=bool, help="Whether to show a progress bar"
)
parser.add_argument("--contain_original_data", action="store_true", help="Whether to contain original data")
args = parser.parse_args()
return args
def main():
args = args_parser()
evaluator = MultiHop_Evaluator()
with open(args.docs_path, "r") as file:
doc_data = json.load(file)
documents = []
for doc in doc_data:
metadata = {"title": doc["title"], "published_at": doc["published_at"], "source": doc["source"]}
documents.append(doc["body"])
# save docs to a tmp file
tmp_corpus_file = "tmp_corpus.txt"
with open(tmp_corpus_file, "w") as f:
for doc in documents:
f.write(doc + "\n")
if args.ingest_docs:
evaluator.ingest_docs(tmp_corpus_file, args.database_endpoint, args.chunk_size, args.chunk_overlap)
with open(args.dataset_path, "r") as file:
all_queries = json.load(file)
# get retrieval quality
if args.retrieval_metrics:
retrieval_metrics = evaluator.get_retrieval_metrics(all_queries, args)
print(retrieval_metrics)
# get rag quality
if args.ragas_metrics:
ragas_metrics = evaluator.get_ragas_metrics(all_queries, args)
print(ragas_metrics)
if __name__ == "__main__":
main()

View File

@@ -1,9 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
path = os.path.join(os.path.dirname(__file__), "./data/80000_docs")
for file in os.listdir(path):
src_file = os.path.join(path, file)
os.rename(src_file, src_file + ".txt")

View File

@@ -1,64 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -x
function main {
init_params "$@"
# run_benchmark
echo $dataset
if [[ ${dataset} == "MultiHop" ]]; then
run_multihop
elif [[ ${dataset} == "crud" ]]; then
run_crud
fi
}
# init params
function init_params {
for var in "$@"
do
case $var in
--dataset=*)
dataset=$( echo $var |cut -f2 -d=)
;;
*)
echo "Error: No such parameter: ${var}"
exit 1
;;
esac
done
}
# run_multihop
function run_multihop {
git clone https://github.com/yixuantt/MultiHop-RAG.git
python eval_multihop.py \
--docs_path MultiHop-RAG/dataset/corpus.json \
--dataset_path MultiHop-RAG/dataset/MultiHopRAG.json \
--ingest_docs \
--retrieval_metrics
}
# run_crud
function run_crud {
git clone https://github.com/IAAR-Shanghai/CRUD_RAG
mkdir data/
cp CRUD_RAG/data/crud_split/split_merged.json data/
cp -r CRUD_RAG/data/80000_docs/ data/
python process_crud_dataset.py
python eval_crud.py \
--dataset_path ./data/split_merged.json \
--docs_path ./data/80000_docs \
--ingest_docs
}
main "$@"

View File

@@ -88,9 +88,22 @@ find . -name '*.yaml' -type f -exec sed -i "s#\$(EMBEDDING_MODEL_ID)#${EMBEDDING
find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL_ID}#g" {} \;
```
### Benchmark tool preparation
The test uses the [benchmark tool](https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark/README.md) to do performance test. We need to set up benchmark tool at the master node of Kubernetes which is k8s-master.
```bash
# on k8s-master node
git clone https://github.com/opea-project/GenAIEval.git
cd GenAIEval
python3 -m venv stress_venv
source stress_venv/bin/activate
pip install -r requirements.txt
```
### Test Configurations
By default, the workload and benchmark configuration is as below:
Workload configuration:
| Key | Value |
| -------- | ------- |
@@ -176,21 +189,24 @@ curl -X POST "http://${cluster_ip}:6007/v1/dataprep" \
###### 3.2 Run Benchmark Test
Before the benchmark, we can configure the number of test queries and test output directory by:
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
```bash
export DEPLOYMENT_TYPE="k8s"
export SERVICE_IP = None
export SERVICE_PORT = None
export USER_QUERIES="[640, 640, 640, 640]"
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_1"
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
```
And then run the benchmark by:
And then run the benchmark tool by:
```bash
bash benchmark.sh -n 1
cd GenAIEval/evals/benchmark
python benchmark.py
```
The argument `-n` refers to the number of test nodes. Note that necessary dependencies will be automatically installed when running benchmark for the first time.
##### 4. Data collection
All the test results will come to this folder `/home/sdp/benchmark_output/node_1` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
@@ -226,20 +242,22 @@ kubectl apply -f .
##### 3. Run tests
Before the benchmark, we can configure the number of test queries and test output directory by:
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
```bash
````bash
export DEPLOYMENT_TYPE="k8s"
export SERVICE_IP = None
export SERVICE_PORT = None
export USER_QUERIES="[1280, 1280, 1280, 1280]"
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_2"
```
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
And then run the benchmark by:
And then run the benchmark tool by:
```bash
bash benchmark.sh -n 2
```
The argument `-n` refers to the number of test nodes. Note that necessary dependencies will be automatically installed when running benchmark for the first time.
cd GenAIEval/evals/benchmark
python benchmark.py
````
##### 4. Data collection
@@ -275,21 +293,24 @@ kubectl apply -f .
##### 3. Run tests
Before the benchmark, we can configure the number of test queries and test output directory by:
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
```bash
export DEPLOYMENT_TYPE="k8s"
export SERVICE_IP = None
export SERVICE_PORT = None
export USER_QUERIES="[2560, 2560, 2560, 2560]"
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_4"
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
```
And then run the benchmark by:
And then run the benchmark tool by:
```bash
bash benchmark.sh -n 4
cd GenAIEval/evals/benchmark
python benchmark.py
```
The argument `-n` refers to the number of test nodes. Note that necessary dependencies will be automatically installed when running benchmark for the first time.
##### 4. Data collection
All the test results will come to this folder `/home/sdp/benchmark_output/node_4` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
@@ -348,21 +369,24 @@ Refer to the [NVIDIA GPU Guide](../../docker_compose/nvidia/gpu/README.md) for m
### Run tests
Before the benchmark, we can configure the number of test queries and test output directory by:
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.deployment_type`, `test_suite_config.service_ip`, `test_suite_config.service_port`, `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
```bash
export DEPLOYMENT_TYPE="docker"
export SERVICE_IP = "ChatQnA Service IP"
export SERVICE_PORT = "ChatQnA Service Port"
export USER_QUERIES="[640, 640, 640, 640]"
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/docker"
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
```
And then run the benchmark by:
And then run the benchmark tool by:
```bash
bash benchmark.sh -d docker -i <service-ip> -p <service-port>
cd GenAIEval/evals/benchmark
python benchmark.py
```
The argument `-i` and `-p` refer to the deployed ChatQnA service IP and port, respectively. Note that necessary dependencies will be automatically installed when running benchmark for the first time.
### Data collection
All the test results will come to this folder `/home/sdp/benchmark_output/docker` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.

View File

@@ -1,99 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
deployment_type="k8s"
node_number=1
service_port=8888
query_per_node=640
benchmark_tool_path="$(pwd)/GenAIEval"
usage() {
echo "Usage: $0 [-d deployment_type] [-n node_number] [-i service_ip] [-p service_port]"
echo " -d deployment_type ChatQnA deployment type, select between k8s and docker (default: k8s)"
echo " -n node_number Test node number, required only for k8s deployment_type, (default: 1)"
echo " -i service_ip chatqna service ip, required only for docker deployment_type"
echo " -p service_port chatqna service port, required only for docker deployment_type, (default: 8888)"
exit 1
}
while getopts ":d:n:i:p:" opt; do
case ${opt} in
d )
deployment_type=$OPTARG
;;
n )
node_number=$OPTARG
;;
i )
service_ip=$OPTARG
;;
p )
service_port=$OPTARG
;;
\? )
echo "Invalid option: -$OPTARG" 1>&2
usage
;;
: )
echo "Invalid option: -$OPTARG requires an argument" 1>&2
usage
;;
esac
done
if [[ "$deployment_type" == "docker" && -z "$service_ip" ]]; then
echo "Error: service_ip is required for docker deployment_type" 1>&2
usage
fi
if [[ "$deployment_type" == "k8s" && ( -n "$service_ip" || -n "$service_port" ) ]]; then
echo "Warning: service_ip and service_port are ignored for k8s deployment_type" 1>&2
fi
function main() {
if [[ ! -d ${benchmark_tool_path} ]]; then
echo "Benchmark tool not found, setting up..."
setup_env
fi
run_benchmark
}
function setup_env() {
git clone https://github.com/opea-project/GenAIEval.git
pushd ${benchmark_tool_path}
python3 -m venv stress_venv
source stress_venv/bin/activate
pip install -r requirements.txt
popd
}
function run_benchmark() {
source ${benchmark_tool_path}/stress_venv/bin/activate
export DEPLOYMENT_TYPE=${deployment_type}
export SERVICE_IP=${service_ip:-"None"}
export SERVICE_PORT=${service_port:-"None"}
if [[ -z $USER_QUERIES ]]; then
user_query=$((query_per_node*node_number))
export USER_QUERIES="[${user_query}, ${user_query}, ${user_query}, ${user_query}]"
echo "USER_QUERIES not configured, setting to: ${USER_QUERIES}."
fi
export WARMUP=$(echo $USER_QUERIES | sed -e 's/[][]//g' -e 's/,.*//')
if [[ -z $WARMUP ]]; then export WARMUP=0; fi
if [[ -z $TEST_OUTPUT_DIR ]]; then
if [[ $DEPLOYMENT_TYPE == "k8s" ]]; then
export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/node_${node_number}"
else
export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/docker"
fi
echo "TEST_OUTPUT_DIR not configured, setting to: ${TEST_OUTPUT_DIR}."
fi
envsubst < ./benchmark.yaml > ${benchmark_tool_path}/evals/benchmark/benchmark.yaml
cd ${benchmark_tool_path}/evals/benchmark
python benchmark.py
}
main

View File

@@ -6,24 +6,14 @@ test_suite_config: # Overall configuration settings for the test suite
deployment_type: ${DEPLOYMENT_TYPE} # Default is "k8s", can also be "docker"
service_ip: ${SERVICE_IP} # Leave as None for k8s, specify for Docker
service_port: ${SERVICE_PORT} # Leave as None for k8s, specify for Docker
warm_ups: ${WARMUP} # Number of test requests for warm-up
run_time: 60m # The max total run time for the test suite
seed: # The seed for all RNGs
concurrent_level: 5 # The concurrency level, adjustable based on requirements
user_queries: ${USER_QUERIES} # Number of test requests at each concurrency level
query_timeout: 120 # Number of seconds to wait for a simulated user to complete any executing task before exiting. 120 sec by defeult.
random_prompt: false # Use random prompts if true, fixed prompts if false
run_time: 60m # The max total run time for the test suite
collect_service_metric: false # Collect service metrics if true, do not collect service metrics if false
data_visualization: false # Generate data visualization if true, do not generate data visualization if false
llm_model: "Intel/neural-chat-7b-v3-3" # The LLM model used for the test
test_output_dir: "${TEST_OUTPUT_DIR}" # The directory to store the test output
load_shape: # Tenant concurrency pattern
name: constant # poisson or constant(locust default load shape)
params: # Loadshape-specific parameters
constant: # Constant load shape specific parameters, activate only if load_shape.name is constant
concurrent_level: 5 # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
# arrival_rate: 1.0 # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate
poisson: # Poisson load shape specific parameters, activate only if load_shape.name is poisson
arrival_rate: 1.0 # Request arrival rate
test_cases:
chatqna:

View File

@@ -1,6 +1,6 @@
# Benchmarking Deployment
# ChatQnA Deployment
This document guides you through deploying this example pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
## Getting Started
@@ -8,19 +8,31 @@ This document guides you through deploying this example pipelines using Helm cha
```bash
# on k8s-master node
cd GenAIExamples/{example_name}/benchmark/performance/helm_charts
cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
# Replace <your token> with your actual Hugging Face token and run the following command:
HUGGINGFACE_TOKEN=<your token>
find . -name '*.yaml' -type f -exec sed -i "s#\${HF_TOKEN}#${HUGGINGFACE_TOKEN}#g" {} \;
# Replace the following placeholders with the desired model IDs:
LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
RERANK_MODEL_ID=BAAI/bge-reranker-base
find . -name '*.yaml' -type f -exec sed -i "s#\$(LLM_MODEL_ID)#${LLM_MODEL_ID}#g" {} \;
find . -name '*.yaml' -type f -exec sed -i "s#\$(EMBEDDING_MODEL_ID)#${EMBEDDING_MODEL_ID}#g" {} \;
find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL_ID}#g" {} \;
# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
# vim hpu_with_rerank.yaml or hpu_without_rerank.yaml
HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
```
### Deployment
### ChatQnA Installation
```bash
# Options:
# --num_nodes choices=[1, 2, 4, 8]
# --mode choices=["tuned", "oob"]
# --workflow choices=["with_rerank", "without_rerank"]
python deployment.py --workflow=with_rerank --mode=tuned --num_nodes=1
# Deploy a ChatQnA pipeline using the specified YAML configuration.
# To deploy with different configurations, simply provide a different YAML file.
helm install chatqna helm_charts/ -f helm_charts/oob_single_node.yaml
# Tips: To display rendered manifests according to the given yaml.
helm template chatqna helm_charts/ -f helm_charts/oob_single_node.yaml
```
Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.

View File

@@ -1,48 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
podSpecs:
- name: chatqna-backend-server-deploy
replicas: 2
resources:
limits:
cpu: "8"
memory: "8000Mi"
requests:
cpu: "8"
memory: "8000Mi"
- name: embedding-dependency-deploy
replicas: 1
resources:
limits:
cpu: "80"
memory: "20000Mi"
requests:
cpu: "80"
memory: "20000Mi"
- name: reranking-dependency-deploy
replicas: 1
resources:
limits:
habana.ai/gaudi: 1
- name: llm-dependency-deploy
replicas: 7
resources:
limits:
habana.ai/gaudi: 1
- name: dataprep-deploy
replicas: 1
- name: vector-db
replicas: 1
- name: retriever-deploy
replicas: 2
resources:
requests:
cpu: "4"
memory: "4000Mi"

View File

@@ -1,168 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import argparse
import os
import subprocess
import yaml
def generate_yaml(num_nodes, mode="oob", with_rerank="True"):
common_pods = [
"chatqna-backend-server-deploy",
"embedding-dependency-deploy",
"dataprep-deploy",
"vector-db",
"retriever-deploy",
]
if with_rerank:
pods_list = common_pods + ["reranking-dependency-deploy", "llm-dependency-deploy"]
else:
pods_list = common_pods + ["llm-dependency-deploy"]
if num_nodes == 1:
replicas = [
{"name": "chatqna-backend-server-deploy", "replicas": 2},
{"name": "embedding-dependency-deploy", "replicas": 1},
{"name": "reranking-dependency-deploy", "replicas": 1} if with_rerank else None,
{"name": "llm-dependency-deploy", "replicas": 7 if with_rerank else 8},
{"name": "dataprep-deploy", "replicas": 1},
{"name": "vector-db", "replicas": 1},
{"name": "retriever-deploy", "replicas": 2},
]
else:
replicas = [
{"name": "chatqna-backend-server-deploy", "replicas": 1 * num_nodes},
{"name": "embedding-dependency-deploy", "replicas": 1 * num_nodes},
{"name": "reranking-dependency-deploy", "replicas": 1} if with_rerank else None,
{"name": "llm-dependency-deploy", "replicas": (8 * num_nodes) - 1 if with_rerank else 8 * num_nodes},
{"name": "dataprep-deploy", "replicas": 1},
{"name": "vector-db", "replicas": 1},
{"name": "retriever-deploy", "replicas": 1 * num_nodes},
]
resources = [
{
"name": "chatqna-backend-server-deploy",
"resources": {"limits": {"cpu": "16", "memory": "8000Mi"}, "requests": {"cpu": "16", "memory": "8000Mi"}},
},
{
"name": "embedding-dependency-deploy",
"resources": {"limits": {"cpu": "80", "memory": "20000Mi"}, "requests": {"cpu": "80", "memory": "20000Mi"}},
},
(
{"name": "reranking-dependency-deploy", "resources": {"limits": {"habana.ai/gaudi": 1}}}
if with_rerank
else None
),
{"name": "llm-dependency-deploy", "resources": {"limits": {"habana.ai/gaudi": 1}}},
{"name": "retriever-deploy", "resources": {"requests": {"cpu": "8", "memory": "8000Mi"}}},
]
replicas = [replica for replica in replicas if replica]
resources = [resource for resource in resources if resource]
tgi_params = [
{
"name": "llm-dependency-deploy",
"args": [
{"name": "--model-id", "value": "$(LLM_MODEL_ID)"},
{"name": "--max-input-length", "value": 1280},
{"name": "--max-total-tokens", "value": 2048},
{"name": "--max-batch-total-tokens", "value": 65536},
{"name": "--max-batch-prefill-tokens", "value": 4096},
],
},
]
replicas_dict = {item["name"]: item["replicas"] for item in replicas}
resources_dict = {item["name"]: item["resources"] for item in resources}
tgi_params_dict = {item["name"]: item["args"] for item in tgi_params}
dicts_to_check = [
{"dict": replicas_dict, "key": "replicas"},
]
if mode == "tuned":
dicts_to_check.extend([{"dict": resources_dict, "key": "resources"}, {"dict": tgi_params_dict, "key": "args"}])
merged_specs = {"podSpecs": []}
for pod in pods_list:
pod_spec = {"name": pod}
for item in dicts_to_check:
if pod in item["dict"]:
pod_spec[item["key"]] = item["dict"][pod]
if len(pod_spec) > 1:
merged_specs["podSpecs"].append(pod_spec)
yaml_data = yaml.dump(merged_specs, default_flow_style=False)
print(yaml_data)
if with_rerank:
filename = f"{mode}_{num_nodes}_gaudi_with_rerank.yaml"
else:
filename = f"{mode}_{num_nodes}_gaudi_without_rerank.yaml"
with open(filename, "w") as file:
file.write(yaml_data)
current_dir = os.getcwd()
filepath = os.path.join(current_dir, filename)
print(f"YAML file {filepath} has been generated.")
return filepath
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--name", help="The name of example pipelines", default="chatqna")
parser.add_argument("--folder", help="The path of helmcharts folder", default=".")
parser.add_argument(
"--num_nodes", help="Number of nodes to deploy", type=int, choices=[1, 2, 4, 8], default=1, required=True
)
parser.add_argument(
"--mode", help="set up your chatqna in the specified mode", type=str, choices=["oob", "tuned"], default="oob"
)
parser.add_argument(
"--workflow",
help="with rerank in the pipeline",
type=str,
choices=["with_rerank", "without_rerank"],
default="with_rerank",
)
parser.add_argument("--template", help="helm template", action="store_true")
args = parser.parse_args()
if args.workflow == "with_rerank":
with_rerank = True
workflow_file = "./hpu_with_rerank.yaml"
else:
with_rerank = False
workflow_file = "./hpu_without_rerank.yaml"
customize_filepath = generate_yaml(args.num_nodes, mode=args.mode, with_rerank=with_rerank)
if args.template:
subprocess.run(
["helm", "template", args.folder, "-f", workflow_file, "-f", customize_filepath],
check=True,
text=True,
capture_output=False,
)
else:
subprocess.run(
["helm", "install", args.name, args.folder, "-f", workflow_file, "-f", customize_filepath],
check=True,
text=True,
capture_output=False,
)
if __name__ == "__main__":
main()

View File

@@ -1,223 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
namespace: default
config:
CONFIG_MAP_NAME: chatqna-config
NODE_SELECTOR: opea
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
RERANK_MODEL_ID: BAAI/bge-reranker-base
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
LLM_SERVER_HOST_IP: llm-dependency-svc
INDEX_NAME: rag-redis
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
LLM_SERVER_PORT: 9009
RERANK_SERVER_PORT: 8808
EMBEDDING_SERVER_PORT: 6006
microservices:
- name: chatqna-backend-server-deploy
image: opea/chatqna:latest
replicas: 1
ports:
- containerPort: 8888
- name: dataprep-deploy
image: opea/dataprep-redis:latest
replicas: 1
ports:
- containerPort: 6007
- name: vector-db
image: redis/redis-stack:7.2.0-v9
replicas: 1
ports:
- containerPort: 6379
- containerPort: 8001
- name: retriever-deploy
image: opea/retriever-redis:latest
replicas: 1
ports:
- containerPort: 7000
- name: embedding-dependency-deploy
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
replicas: 1
ports:
- containerPort: 80
args:
- name: "--model-id"
value: $(EMBEDDING_MODEL_ID)
- name: "--auto-truncate"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
- name: reranking-dependency-deploy
image: opea/tei-gaudi:latest
replicas: 1
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
- value: $(RERANK_MODEL_ID)
- name: "--auto-truncate"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: "512"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
- name: llm-dependency-deploy
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
replicas: 1
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
value: $(LLM_MODEL_ID)
- name: "--max-input-length"
value: "2048"
- name: "--max-total-tokens"
value: "4096"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
services:
- name: chatqna-backend-server-svc
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
- name: dataprep-svc
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
- name: embedding-dependency-svc
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
- name: llm-dependency-svc
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
- name: reranking-dependency-svc
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
- name: retriever-svc
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
- name: vector-db
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP

View File

@@ -1,166 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
namespace: default
config:
CONFIG_MAP_NAME: chatqna-config
NODE_SELECTOR: opea
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
RERANK_MODEL_ID: BAAI/bge-reranker-base
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
microservices:
- name: chatqna-backend-server-deploy
image: opea/chatqna-without-rerank:latest
replicas: 1
ports:
- containerPort: 8888
- name: dataprep-deploy
image: opea/dataprep-redis:latest
replicas: 1
ports:
- containerPort: 6007
- name: vector-db
image: redis/redis-stack:7.2.0-v9
replicas: 1
ports:
- containerPort: 6379
- containerPort: 8001
- name: retriever-deploy
image: opea/retriever-redis:latest
replicas: 1
ports:
- containerPort: 7000
- name: embedding-dependency-deploy
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
replicas: 1
ports:
- containerPort: 80
args:
- name: "--model-id"
value: $(EMBEDDING_MODEL_ID)
- name: "--auto-truncate"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
- name: llm-dependency-deploy
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
replicas: 1
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
value: $(LLM_MODEL_ID)
- name: "--max-input-length"
value: "2048"
- name: "--max-total-tokens"
value: "4096"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
services:
- name: chatqna-backend-server-svc
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
- name: dataprep-svc
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
- name: embedding-dependency-svc
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
- name: llm-dependency-svc
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
- name: retriever-svc
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
- name: vector-db
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP

View File

@@ -0,0 +1,237 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
config:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
deployments:
- name: chatqna-backend-server-deploy
spec:
image_name: opea/chatqna-no-wrapper
image_tag: latest
replicas: 1
ports:
- containerPort: 8888
- name: dataprep-deploy
spec:
image_name: opea/dataprep-redis
image_tag: latest
replicas: 1
ports:
- containerPort: 6007
- name: vector-db
spec:
image_name: redis/redis-stack
image_tag: 7.2.0-v9
replicas: 1
ports:
- containerPort: 6379
- containerPort: 8001
- name: retriever-deploy
spec:
image_name: opea/retriever-redis
image_tag: latest
replicas: 1
ports:
- containerPort: 7000
- name: embedding-dependency-deploy
spec:
image_name: ghcr.io/huggingface/text-embeddings-inference
image_tag: cpu-1.5
replicas: 1
ports:
- containerPort: 80
args:
- name: "--model-id"
value: $(EMBEDDING_MODEL_ID)
- name: "--auto-truncate"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
- name: reranking-dependency-deploy
spec:
image_name: opea/tei-gaudi
image_tag: latest
replicas: 1
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
- value: $(RERANK_MODEL_ID)
- name: "--auto-truncate"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: "512"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
- name: llm-dependency-deploy
spec:
image_name: ghcr.io/huggingface/tgi-gaudi
image_tag: 2.0.4
replicas: 7
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
value: $(LLM_MODEL_ID)
- name: "--max-input-length"
value: "2048"
- name: "--max-total-tokens"
value: "4096"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
services:
- name: chatqna-backend-server-svc
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
- name: dataprep-svc
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
- name: embedding-dependency-svc
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
- name: llm-dependency-svc
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
- name: reranking-dependency-svc
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
- name: retriever-svc
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
- name: vector-db
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP

View File

@@ -4,22 +4,22 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ .Values.config.CONFIG_MAP_NAME }}
name: qna-config
namespace: default
data:
EMBEDDING_MODEL_ID: {{ .Values.config.EMBEDDING_MODEL_ID }}
EMBEDDING_SERVER_HOST_IP: {{ .Values.config.EMBEDDING_SERVER_HOST_IP }}
HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }}
NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
RERANK_MODEL_ID: {{ .Values.config.RERANK_MODEL_ID }}
INDEX_NAME: {{ .Values.config.INDEX_NAME }}
LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
LLM_SERVER_HOST_IP: llm-dependency-svc
INDEX_NAME: rag-redis
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
LLM_SERVER_PORT: "9009"
RERANK_SERVER_PORT: "8808"
EMBEDDING_SERVER_PORT: "6006"
LLM_SERVER_HOST_IP: {{ .Values.config.LLM_SERVER_HOST_IP }}
NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
REDIS_URL: {{ .Values.config.REDIS_URL }}
RERANK_MODEL_ID: {{ .Values.config.RERANK_MODEL_ID }}
RERANK_SERVER_HOST_IP: {{ .Values.config.RERANK_SERVER_HOST_IP }}
RETRIEVER_SERVICE_HOST_IP: {{ .Values.config.RETRIEVER_SERVICE_HOST_IP }}
TEI_EMBEDDING_ENDPOINT: {{ .Values.config.TEI_EMBEDDING_ENDPOINT }}
TEI_ENDPOINT: {{ .Values.config.TEI_ENDPOINT }}
TEI_RERANKING_ENDPOINT: {{ .Values.config.TEI_RERANKING_ENDPOINT }}
TGI_LLM_ENDPOINT: {{ .Values.config.TGI_LLM_ENDPOINT }}
---

View File

@@ -1,47 +1,31 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
{{- $global := .Values }}
{{- range $microservice := .Values.microservices }}
{{- range $deployment := .Values.deployments }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ $microservice.name }}
name: {{ $deployment.name }}
namespace: default
spec:
{{- $replicas := $microservice.replicas }}
{{- range $podSpec := $global.podSpecs }}
{{- if eq $podSpec.name $microservice.name }}
{{- $replicas = $podSpec.replicas | default $microservice.replicas }}
{{- end }}
{{- end }}
replicas: {{ $replicas }}
replicas: {{ $deployment.spec.replicas }}
selector:
matchLabels:
app: {{ $microservice.name }}
app: {{ $deployment.name }}
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: {{ $microservice.name }}
app: {{ $deployment.name }}
spec:
containers:
- envFrom:
- configMapRef:
name: {{ $global.config.CONFIG_MAP_NAME }}
{{- $args := $microservice.args }}
{{- range $podSpec := $global.podSpecs }}
{{- if eq $podSpec.name $microservice.name }}
{{- $args = $podSpec.args | default $microservice.args }}
{{- end }}
{{- end }}
{{- if $microservice.args }}
name: qna-config
{{- if $deployment.spec.args }}
args:
{{- range $arg := $args }}
{{- range $arg := $deployment.spec.args }}
{{- if $arg.name }}
- {{ $arg.name }}
{{- end }}
@@ -51,46 +35,30 @@ spec:
{{- end }}
{{- end }}
{{- if $microservice.env }}
{{- if $deployment.spec.env }}
env:
{{- range $env := $microservice.env }}
{{- range $env := $deployment.spec.env }}
- name: {{ $env.name }}
value: "{{ $env.value }}"
{{- end }}
{{- end }}
{{- $image := $microservice.image }}
{{- range $podSpec := $global.podSpecs }}
{{- if eq $podSpec.name $microservice.name }}
{{- $image = $podSpec.image | default $microservice.image }}
{{- end }}
{{- end }}
image: {{ $image }}
image: {{ $deployment.spec.image_name }}:{{ $deployment.spec.image_tag }}
imagePullPolicy: IfNotPresent
name: {{ $microservice.name }}
name: {{ $deployment.name }}
{{- if $microservice.ports }}
{{- if $deployment.spec.ports }}
ports:
{{- range $port := $microservice.ports }}
{{- range $port := $deployment.spec.ports }}
{{- range $port_name, $port_id := $port }}
- {{ $port_name }}: {{ $port_id }}
{{- end }}
{{- end }}
{{- end }}
{{- $resources := $microservice.resources }}
{{- range $podSpec := $global.podSpecs }}
{{- if eq $podSpec.name $microservice.name }}
{{- if $podSpec.resources }}
{{- $resources = $podSpec.resources }}
{{- end }}
{{- end }}
{{- end }}
{{- if $resources }}
{{- if $deployment.spec.resources }}
resources:
{{- range $resourceType, $resource := $resources }}
{{- range $resourceType, $resource := $deployment.spec.resources }}
{{ $resourceType }}:
{{- range $limitType, $limit := $resource }}
{{ $limitType }}: {{ $limit }}
@@ -98,9 +66,9 @@ spec:
{{- end }}
{{- end }}
{{- if $microservice.volumeMounts }}
{{- if $deployment.spec.volumeMounts }}
volumeMounts:
{{- range $volumeMount := $microservice.volumeMounts }}
{{- range $volumeMount := $deployment.spec.volumeMounts }}
- mountPath: {{ $volumeMount.mountPath }}
name: {{ $volumeMount.name }}
{{- end }}
@@ -108,20 +76,20 @@ spec:
hostIPC: true
nodeSelector:
node-type: {{ $global.config.NODE_SELECTOR }}
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: {{ $microservice.name }}
app: {{ $deployment.name }}
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
{{- if $microservice.volumes }}
{{- if $deployment.spec.volumes }}
volumes:
{{- range $index, $volume := $microservice.volumes }}
{{- range $index, $volume := $deployment.spec.volumes }}
- name: {{ $volume.name }}
{{- if $volume.hostPath }}
hostPath:
@@ -135,5 +103,6 @@ spec:
{{- end }}
{{- end }}
---
{{- end }}

View File

@@ -0,0 +1,259 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
config:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
deployments:
- name: chatqna-backend-server-deploy
spec:
image_name: opea/chatqna-no-wrapper
image_tag: latest
replicas: 2
ports:
- containerPort: 8888
resources:
limits:
cpu: "8"
memory: "8000Mi"
requests:
cpu: "8"
memory: "8000Mi"
- name: dataprep-deploy
spec:
image_name: opea/dataprep-redis
image_tag: latest
replicas: 1
ports:
- containerPort: 6007
- name: vector-db
spec:
image_name: redis/redis-stack
image_tag: 7.2.0-v9
replicas: 1
ports:
- containerPort: 6379
- containerPort: 8001
- name: retriever-deploy
spec:
image_name: opea/retriever-redis
image_tag: latest
replicas: 2
ports:
- containerPort: 7000
resources:
requests:
cpu: "4"
memory: "4000Mi"
- name: embedding-dependency-deploy
spec:
image_name: ghcr.io/huggingface/text-embeddings-inference
image_tag: cpu-1.5
replicas: 1
ports:
- containerPort: 80
args:
- name: "--model-id"
value: $(EMBEDDING_MODEL_ID)
- name: "--auto-truncate"
resources:
limits:
cpu: "80"
memory: "20000Mi"
requests:
cpu: "80"
memory: "20000Mi"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
- name: reranking-dependency-deploy
spec:
image_name: opea/tei-gaudi
image_tag: latest
replicas: 1
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
- value: $(RERANK_MODEL_ID)
- name: "--auto-truncate"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: "512"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
- name: llm-dependency-deploy
spec:
image_name: ghcr.io/huggingface/tgi-gaudi
image_tag: 2.0.4
replicas: 7
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
value: $(LLM_MODEL_ID)
- name: "--max-input-length"
value: "1280"
- name: "--max-total-tokens"
value: "2048"
- name: "--max-batch-total-tokens"
value: "65536"
- name: "--max-batch-prefill-tokens"
value: "4096"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
services:
- name: chatqna-backend-server-svc
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
- name: dataprep-svc
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
- name: embedding-dependency-svc
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
- name: llm-dependency-svc
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
- name: reranking-dependency-svc
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
- name: retriever-svc
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
- name: vector-db
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP

View File

@@ -0,0 +1,237 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
config:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
deployments:
- name: chatqna-backend-server-deploy
spec:
image_name: opea/chatqna-no-wrapper
image_tag: latest
replicas: 1
ports:
- containerPort: 8888
- name: dataprep-deploy
spec:
image_name: opea/dataprep-redis
image_tag: latest
replicas: 1
ports:
- containerPort: 6007
- name: vector-db
spec:
image_name: redis/redis-stack
image_tag: 7.2.0-v9
replicas: 1
ports:
- containerPort: 6379
- containerPort: 8001
- name: retriever-deploy
spec:
image_name: opea/retriever-redis
image_tag: latest
replicas: 1
ports:
- containerPort: 7000
- name: embedding-dependency-deploy
spec:
image_name: ghcr.io/huggingface/text-embeddings-inference
image_tag: cpu-1.5
replicas: 1
ports:
- containerPort: 80
args:
- name: "--model-id"
value: $(EMBEDDING_MODEL_ID)
- name: "--auto-truncate"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
- name: reranking-dependency-deploy
spec:
image_name: opea/tei-gaudi
image_tag: latest
replicas: 1
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
- value: $(RERANK_MODEL_ID)
- name: "--auto-truncate"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: "512"
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
- name: llm-dependency-deploy
spec:
image_name: ghcr.io/huggingface/tgi-gaudi
image_tag: 2.0.4
replicas: 7
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
value: $(LLM_MODEL_ID)
- name: "--max-input-length"
value: "2048"
- name: "--max-total-tokens"
value: "4096"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
services:
- name: chatqna-backend-server-svc
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
- name: dataprep-svc
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
- name: embedding-dependency-svc
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
- name: llm-dependency-svc
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
- name: reranking-dependency-svc
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
- name: retriever-svc
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
- name: vector-db
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP

View File

@@ -44,7 +44,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
image: opea/chatqna-no-wrapper:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
@@ -327,7 +327,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tei-gaudi:latest
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:

View File

@@ -29,7 +29,7 @@ metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 4
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
@@ -44,7 +44,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
image: opea/chatqna-no-wrapper:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
@@ -327,7 +327,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tei-gaudi:latest
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
@@ -381,7 +381,7 @@ metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 4
replicas: 1
selector:
matchLabels:
app: retriever-deploy

View File

@@ -44,7 +44,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
image: opea/chatqna-no-wrapper:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
@@ -327,7 +327,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tei-gaudi:latest
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:

View File

@@ -44,7 +44,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna
image: opea/chatqna-no-wrapper:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
@@ -327,7 +327,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tei-gaudi:latest
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:

View File

@@ -44,7 +44,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
image: opea/chatqna-no-wrapper-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:

View File

@@ -29,7 +29,7 @@ metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 4
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
@@ -44,7 +44,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
image: opea/chatqna-no-wrapper-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
@@ -295,7 +295,7 @@ metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 4
replicas: 1
selector:
matchLabels:
app: retriever-deploy

View File

@@ -44,7 +44,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
image: opea/chatqna-no-wrapper-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:

View File

@@ -44,7 +44,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
image: opea/chatqna-no-wrapper-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:

View File

@@ -1,507 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 8
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 8
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 63
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1280'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 8
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -44,7 +44,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
image: opea/chatqna-no-wrapper:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
@@ -345,7 +345,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tei-gaudi:latest
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:

View File

@@ -44,7 +44,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
image: opea/chatqna-no-wrapper:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
@@ -345,7 +345,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tei-gaudi:latest
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:

View File

@@ -1,507 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 31
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1280'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -44,7 +44,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
image: opea/chatqna-no-wrapper:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
@@ -345,7 +345,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tei-gaudi:latest
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:

View File

@@ -44,7 +44,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
image: opea/chatqna-no-wrapper:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
@@ -345,7 +345,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tei-gaudi:latest
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:

View File

@@ -1,507 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 15
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1280'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -44,7 +44,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
image: opea/chatqna-no-wrapper-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:

View File

@@ -1,421 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 8
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 8
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 64
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1280'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 8
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -44,7 +44,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
image: opea/chatqna-no-wrapper-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:

View File

@@ -1,421 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 32
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1280'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -44,7 +44,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
image: opea/chatqna-no-wrapper-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:

View File

@@ -1,421 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 8
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1280'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -44,7 +44,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
image: opea/chatqna-no-wrapper-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:

View File

@@ -1,514 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-config
namespace: default
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
LLM_SERVER_PORT: '9009'
RERANK_SERVER_PORT: '8808'
EMBEDDING_SERVER_PORT: '6006'
---
# Source: chatqna-charts/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
# Source: chatqna-charts/templates/service.yaml
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
# Source: chatqna-charts/templates/service.yaml
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
# Source: chatqna-charts/templates/service.yaml
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
# Source: chatqna-charts/templates/service.yaml
apiVersion: v1
kind: Service
metadata:
name: reranking-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 8808
targetPort: 80
selector:
app: reranking-dependency-deploy
type: ClusterIP
---
# Source: chatqna-charts/templates/service.yaml
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
# Source: chatqna-charts/templates/service.yaml
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---
# Source: chatqna-charts/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: chatqna-config
image: opea/chatqna-model-fixed-root:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
hostIPC: true
nodeSelector:
node-type: opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
# Source: chatqna-charts/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: chatqna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
# Source: chatqna-charts/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: chatqna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
# Source: chatqna-charts/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: chatqna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
hostIPC: true
nodeSelector:
node-type: opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
# Source: chatqna-charts/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: chatqna-config
args:
- --model-id
- "$(EMBEDDING_MODEL_ID)"
- --auto-truncate
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
# Source: chatqna-charts/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: chatqna-config
args:
- --model-id
- "$(RERANK_MODEL_ID)"
- --auto-truncate
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: "none"
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: "habana"
- name: HABANA_VISIBLE_DEVICES
value: "all"
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: "512"
image: opea/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: reranking-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
# Source: chatqna-charts/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: chatqna-config
command: ["/bin/bash", "-c"]
args: ["python3 -m vllm.entrypoints.openai.api_server --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"]
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: "none"
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: "habana"
- name: HABANA_VISIBLE_DEVICES
value: "all"
image: opea/llm-vllm-hpu:latest
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi

View File

@@ -1,421 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
INDEX_NAME: rag-redis
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
LLM_SERVER_HOST_IP: llm-dependency-svc
NODE_SELECTOR: chatqna-opea
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
RERANK_MODEL_ID: BAAI/bge-reranker-base
RERANK_SERVER_HOST_IP: reranking-dependency-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
kind: ConfigMap
metadata:
name: qna-config
namespace: default
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna-without-rerank:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 8000Mi
requests:
cpu: 8
memory: 8000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: chatqna-backend-server-svc
namespace: default
spec:
ports:
- name: service
nodePort: 30888
port: 8888
targetPort: 8888
selector:
app: chatqna-backend-server-deploy
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
ports:
- containerPort: 6007
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: dataprep-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: dataprep-svc
namespace: default
spec:
ports:
- name: port1
port: 6007
targetPort: 6007
selector:
app: dataprep-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
imagePullPolicy: IfNotPresent
name: embedding-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
requests:
cpu: 80
memory: 20000Mi
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: embedding-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: embedding-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 6006
targetPort: 80
selector:
app: embedding-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 16
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
containers:
- args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1280'
- --max-total-tokens
- '2048'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
imagePullPolicy: IfNotPresent
name: llm-dependency-deploy
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
securityContext:
capabilities:
add:
- SYS_NICE
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: llm-dependency-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
---
apiVersion: v1
kind: Service
metadata:
name: llm-dependency-svc
namespace: default
spec:
ports:
- name: service
port: 9009
targetPort: 80
selector:
app: llm-dependency-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
ports:
- containerPort: 7000
resources:
requests:
cpu: 4
memory: 4000Mi
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: retriever-deploy
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: retriever-svc
namespace: default
spec:
ports:
- name: service
port: 7000
targetPort: 7000
selector:
app: retriever-deploy
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: vector-db
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: redis/redis-stack:7.2.0-v9
imagePullPolicy: IfNotPresent
name: vector-db
ports:
- containerPort: 6379
- containerPort: 8001
hostIPC: true
nodeSelector:
node-type: chatqna-opea
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: vector-db
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
namespace: default
spec:
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001
selector:
app: vector-db
type: ClusterIP
---

View File

@@ -1,197 +1,37 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import argparse
import json
import os
import re
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
from langchain_core.prompts import PromptTemplate
class ChatTemplate:
@staticmethod
def generate_rag_prompt(question, documents):
context_str = "\n".join(documents)
if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3:
# chinese context
template = """
### 你将扮演一个乐于助人、尊重他人并诚实的助手,你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案,请避免分享不准确的信息。
### 搜索结果:{context}
### 问题:{question}
### 回答:
"""
else:
template = """
### You are a helpful, respectful and honest assistant to help the user with questions. \
Please refer to the search results obtained from the local knowledge base. \
But be careful to not incorporate the information that you think is not relevant to the question. \
If you don't know the answer to a question, please don't share false information. \n
### Search results: {context} \n
### Question: {question} \n
### Answer:
"""
return template.format(context=context_str, question=question)
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
GUARDRAIL_SERVICE_HOST_IP = os.getenv("GUARDRAIL_SERVICE_HOST_IP", "0.0.0.0")
GUARDRAIL_SERVICE_PORT = int(os.getenv("GUARDRAIL_SERVICE_PORT", 80))
EMBEDDING_SERVER_HOST_IP = os.getenv("EMBEDDING_SERVER_HOST_IP", "0.0.0.0")
EMBEDDING_SERVER_PORT = int(os.getenv("EMBEDDING_SERVER_PORT", 80))
EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
RERANK_SERVER_HOST_IP = os.getenv("RERANK_SERVER_HOST_IP", "0.0.0.0")
RERANK_SERVER_PORT = int(os.getenv("RERANK_SERVER_PORT", 80))
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 80))
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
inputs["inputs"] = inputs["text"]
del inputs["text"]
elif self.services[cur_node].service_type == ServiceType.RETRIEVER:
# prepare the retriever params
retriever_parameters = kwargs.get("retriever_parameters", None)
if retriever_parameters:
inputs.update(retriever_parameters.dict())
elif self.services[cur_node].service_type == ServiceType.LLM:
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
next_inputs = {}
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
next_inputs["top_p"] = llm_parameters_dict["top_p"]
next_inputs["stream"] = inputs["streaming"]
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
# next_inputs["presence_penalty"] = inputs["presence_penalty"]
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
next_inputs["temperature"] = inputs["temperature"]
inputs = next_inputs
return inputs
def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
next_data = {}
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
assert isinstance(data, list)
next_data = {"text": inputs["inputs"], "embedding": data[0]}
elif self.services[cur_node].service_type == ServiceType.RETRIEVER:
docs = [doc["text"] for doc in data["retrieved_docs"]]
with_rerank = runtime_graph.downstream(cur_node)[0].startswith("rerank")
if with_rerank and docs:
# forward to rerank
# prepare inputs for rerank
next_data["query"] = data["initial_query"]
next_data["texts"] = [doc["text"] for doc in data["retrieved_docs"]]
else:
# forward to llm
if not docs and with_rerank:
# delete the rerank from retriever -> rerank -> llm
for ds in reversed(runtime_graph.downstream(cur_node)):
for nds in runtime_graph.downstream(ds):
runtime_graph.add_edge(cur_node, nds)
runtime_graph.delete_node_if_exists(ds)
# handle template
# if user provides template, then format the prompt with it
# otherwise, use the default template
prompt = data["initial_query"]
chat_template = llm_parameters_dict["chat_template"]
if chat_template:
prompt_template = PromptTemplate.from_template(chat_template)
input_variables = prompt_template.input_variables
if sorted(input_variables) == ["context", "question"]:
prompt = prompt_template.format(question=data["initial_query"], context="\n".join(docs))
elif input_variables == ["question"]:
prompt = prompt_template.format(question=data["initial_query"])
else:
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
else:
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
next_data["inputs"] = prompt
elif self.services[cur_node].service_type == ServiceType.RERANK:
# rerank the inputs with the scores
reranker_parameters = kwargs.get("reranker_parameters", None)
top_n = reranker_parameters.top_n if reranker_parameters else 1
docs = inputs["texts"]
reranked_docs = []
for best_response in data[:top_n]:
reranked_docs.append(docs[best_response["index"]])
# handle template
# if user provides template, then format the prompt with it
# otherwise, use the default template
prompt = inputs["query"]
chat_template = llm_parameters_dict["chat_template"]
if chat_template:
prompt_template = PromptTemplate.from_template(chat_template)
input_variables = prompt_template.input_variables
if sorted(input_variables) == ["context", "question"]:
prompt = prompt_template.format(question=prompt, context="\n".join(reranked_docs))
elif input_variables == ["question"]:
prompt = prompt_template.format(question=prompt)
else:
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs)
else:
prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs)
next_data["inputs"] = prompt
else:
next_data = data
return next_data
def align_generator(self, gen, **kwargs):
# openai reaponse format
# b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
for line in gen:
line = line.decode("utf-8")
start = line.find("{")
end = line.rfind("}") + 1
json_str = line[start:end]
try:
# sometimes yield empty chunk, do a fallback here
json_data = json.loads(json_str)
if json_data["choices"][0]["finish_reason"] != "eos_token":
yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
except Exception as e:
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
yield "data: [DONE]\n\n"
RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
class ChatQnAService:
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
ServiceOrchestrator.align_inputs = align_inputs
ServiceOrchestrator.align_outputs = align_outputs
ServiceOrchestrator.align_generator = align_generator
self.megaservice = ServiceOrchestrator()
def add_remote_service(self):
embedding = MicroService(
name="embedding",
host=EMBEDDING_SERVER_HOST_IP,
port=EMBEDDING_SERVER_PORT,
endpoint="/embed",
host=EMBEDDING_SERVICE_HOST_IP,
port=EMBEDDING_SERVICE_PORT,
endpoint="/v1/embeddings",
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
retriever = MicroService(
name="retriever",
host=RETRIEVER_SERVICE_HOST_IP,
@@ -200,20 +40,18 @@ class ChatQnAService:
use_remote_service=True,
service_type=ServiceType.RETRIEVER,
)
rerank = MicroService(
name="rerank",
host=RERANK_SERVER_HOST_IP,
port=RERANK_SERVER_PORT,
endpoint="/rerank",
host=RERANK_SERVICE_HOST_IP,
port=RERANK_SERVICE_PORT,
endpoint="/v1/reranking",
use_remote_service=True,
service_type=ServiceType.RERANK,
)
llm = MicroService(
name="llm",
host=LLM_SERVER_HOST_IP,
port=LLM_SERVER_PORT,
host=LLM_SERVICE_HOST_IP,
port=LLM_SERVICE_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
@@ -224,109 +62,7 @@ class ChatQnAService:
self.megaservice.flow_to(rerank, llm)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
def add_remote_service_without_rerank(self):
embedding = MicroService(
name="embedding",
host=EMBEDDING_SERVER_HOST_IP,
port=EMBEDDING_SERVER_PORT,
endpoint="/embed",
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
retriever = MicroService(
name="retriever",
host=RETRIEVER_SERVICE_HOST_IP,
port=RETRIEVER_SERVICE_PORT,
endpoint="/v1/retrieval",
use_remote_service=True,
service_type=ServiceType.RETRIEVER,
)
llm = MicroService(
name="llm",
host=LLM_SERVER_HOST_IP,
port=LLM_SERVER_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
)
self.megaservice.add(embedding).add(retriever).add(llm)
self.megaservice.flow_to(embedding, retriever)
self.megaservice.flow_to(retriever, llm)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
def add_remote_service_with_guardrails(self):
guardrail_in = MicroService(
name="guardrail_in",
host=GUARDRAIL_SERVICE_HOST_IP,
port=GUARDRAIL_SERVICE_PORT,
endpoint="/v1/guardrails",
use_remote_service=True,
service_type=ServiceType.GUARDRAIL,
)
embedding = MicroService(
name="embedding",
host=EMBEDDING_SERVER_HOST_IP,
port=EMBEDDING_SERVER_PORT,
endpoint="/embed",
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
retriever = MicroService(
name="retriever",
host=RETRIEVER_SERVICE_HOST_IP,
port=RETRIEVER_SERVICE_PORT,
endpoint="/v1/retrieval",
use_remote_service=True,
service_type=ServiceType.RETRIEVER,
)
rerank = MicroService(
name="rerank",
host=RERANK_SERVER_HOST_IP,
port=RERANK_SERVER_PORT,
endpoint="/rerank",
use_remote_service=True,
service_type=ServiceType.RERANK,
)
llm = MicroService(
name="llm",
host=LLM_SERVER_HOST_IP,
port=LLM_SERVER_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
)
# guardrail_out = MicroService(
# name="guardrail_out",
# host=GUARDRAIL_SERVICE_HOST_IP,
# port=GUARDRAIL_SERVICE_PORT,
# endpoint="/v1/guardrails",
# use_remote_service=True,
# service_type=ServiceType.GUARDRAIL,
# )
# self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm).add(guardrail_out)
self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm)
self.megaservice.flow_to(guardrail_in, embedding)
self.megaservice.flow_to(embedding, retriever)
self.megaservice.flow_to(retriever, rerank)
self.megaservice.flow_to(rerank, llm)
# self.megaservice.flow_to(llm, guardrail_out)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--without-rerank", action="store_true")
parser.add_argument("--with-guardrails", action="store_true")
args = parser.parse_args()
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
if args.without_rerank:
chatqna.add_remote_service_without_rerank()
elif args.with_guardrails:
chatqna.add_remote_service_with_guardrails()
else:
chatqna.add_remote_service()
chatqna.add_remote_service()

View File

@@ -30,11 +30,21 @@ opea_micro_services:
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
model-id: ${EMBEDDING_MODEL_ID}
embedding:
host: ${EMBEDDING_SERVICE_HOST_IP}
ports: ${EMBEDDING_SERVICE_PORT}
image: opea/embedding-tei:latest
endpoint: /v1/embeddings
retrieval:
host: ${RETRIEVER_SERVICE_HOST_IP}
ports: ${RETRIEVER_SERVICE_PORT}
image: opea/retriever-redis:latest
endpoint: /v1/retrieval
reranking:
host: ${RERANK_SERVICE_HOST_IP}
ports: ${RERANK_SERVICE_PORT}
image: opea/reranking-tei:latest
endpoint: /v1/reranking
tgi-service:
host: ${TGI_SERVICE_IP}
ports: ${TGI_SERVICE_PORT}
@@ -54,6 +64,11 @@ opea_micro_services:
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
model-id: ${LLM_MODEL_ID}
llm:
host: ${LLM_SERVICE_HOST_IP}
ports: ${LLM_SERVICE_PORT}
image: opea/llm-tgi:latest
endpoint: /v1/chat/completions
ui:
host: ${UI_SERVICE_HOST_IP}
ports:

View File

@@ -0,0 +1,89 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
GUARDRAIL_SERVICE_HOST_IP = os.getenv("GUARDRAIL_SERVICE_HOST_IP", "0.0.0.0")
GUARDRAIL_SERVICE_PORT = int(os.getenv("GUARDRAIL_SERVICE_PORT", 9090))
EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
class ChatQnAService:
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
self.megaservice = ServiceOrchestrator()
def add_remote_service(self):
guardrail_in = MicroService(
name="guardrail_in",
host=GUARDRAIL_SERVICE_HOST_IP,
port=GUARDRAIL_SERVICE_PORT,
endpoint="/v1/guardrails",
use_remote_service=True,
service_type=ServiceType.GUARDRAIL,
)
embedding = MicroService(
name="embedding",
host=EMBEDDING_SERVICE_HOST_IP,
port=EMBEDDING_SERVICE_PORT,
endpoint="/v1/embeddings",
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
retriever = MicroService(
name="retriever",
host=RETRIEVER_SERVICE_HOST_IP,
port=RETRIEVER_SERVICE_PORT,
endpoint="/v1/retrieval",
use_remote_service=True,
service_type=ServiceType.RETRIEVER,
)
rerank = MicroService(
name="rerank",
host=RERANK_SERVICE_HOST_IP,
port=RERANK_SERVICE_PORT,
endpoint="/v1/reranking",
use_remote_service=True,
service_type=ServiceType.RERANK,
)
llm = MicroService(
name="llm",
host=LLM_SERVICE_HOST_IP,
port=LLM_SERVICE_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
)
# guardrail_out = MicroService(
# name="guardrail_out",
# host=GUARDRAIL_SERVICE_HOST_IP,
# port=GUARDRAIL_SERVICE_PORT,
# endpoint="/v1/guardrails",
# use_remote_service=True,
# service_type=ServiceType.GUARDRAIL,
# )
# self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm).add(guardrail_out)
self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm)
self.megaservice.flow_to(guardrail_in, embedding)
self.megaservice.flow_to(embedding, retriever)
self.megaservice.flow_to(retriever, rerank)
self.megaservice.flow_to(rerank, llm)
# self.megaservice.flow_to(llm, guardrail_out)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
if __name__ == "__main__":
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
chatqna.add_remote_service()

View File

@@ -0,0 +1,275 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import argparse
import json
import os
import re
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
from langchain_core.prompts import PromptTemplate
class ChatTemplate:
@staticmethod
def generate_rag_prompt(question, documents):
context_str = "\n".join(documents)
if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3:
# chinese context
template = """
### 你将扮演一个乐于助人、尊重他人并诚实的助手,你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案,请避免分享不准确的信息。
### 搜索结果:{context}
### 问题:{question}
### 回答:
"""
else:
template = """
### You are a helpful, respectful and honest assistant to help the user with questions. \
Please refer to the search results obtained from the local knowledge base. \
But be careful to not incorporate the information that you think is not relevant to the question. \
If you don't know the answer to a question, please don't share false information. \n
### Search results: {context} \n
### Question: {question} \n
### Answer:
"""
return template.format(context=context_str, question=question)
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
# EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
# EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
# RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
# RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
# RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
# RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
# LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
# LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
EMBEDDING_SERVER_HOST_IP = os.getenv("EMBEDDING_SERVER_HOST_IP", "0.0.0.0")
EMBEDDING_SERVER_PORT = int(os.getenv("EMBEDDING_SERVER_PORT", 6006))
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
RERANK_SERVER_HOST_IP = os.getenv("RERANK_SERVER_HOST_IP", "0.0.0.0")
RERANK_SERVER_PORT = int(os.getenv("RERANK_SERVER_PORT", 8808))
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 9009))
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
inputs["inputs"] = inputs["text"]
del inputs["text"]
elif self.services[cur_node].service_type == ServiceType.RETRIEVER:
# prepare the retriever params
retriever_parameters = kwargs.get("retriever_parameters", None)
if retriever_parameters:
inputs.update(retriever_parameters.dict())
elif self.services[cur_node].service_type == ServiceType.LLM:
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
next_inputs = {}
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
next_inputs["top_p"] = llm_parameters_dict["top_p"]
next_inputs["stream"] = inputs["streaming"]
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
next_inputs["presence_penalty"] = inputs["presence_penalty"]
next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
next_inputs["temperature"] = inputs["temperature"]
inputs = next_inputs
return inputs
def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
next_data = {}
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
assert isinstance(data, list)
next_data = {"text": inputs["inputs"], "embedding": data[0]}
elif self.services[cur_node].service_type == ServiceType.RETRIEVER:
docs = [doc["text"] for doc in data["retrieved_docs"]]
with_rerank = runtime_graph.downstream(cur_node)[0].startswith("rerank")
if with_rerank and docs:
# forward to rerank
# prepare inputs for rerank
next_data["query"] = data["initial_query"]
next_data["texts"] = [doc["text"] for doc in data["retrieved_docs"]]
else:
# forward to llm
if not docs and with_rerank:
# delete the rerank from retriever -> rerank -> llm
for ds in reversed(runtime_graph.downstream(cur_node)):
for nds in runtime_graph.downstream(ds):
runtime_graph.add_edge(cur_node, nds)
runtime_graph.delete_node_if_exists(ds)
# handle template
# if user provides template, then format the prompt with it
# otherwise, use the default template
prompt = data["initial_query"]
chat_template = llm_parameters_dict["chat_template"]
if chat_template:
prompt_template = PromptTemplate.from_template(chat_template)
input_variables = prompt_template.input_variables
if sorted(input_variables) == ["context", "question"]:
prompt = prompt_template.format(question=data["initial_query"], context="\n".join(docs))
elif input_variables == ["question"]:
prompt = prompt_template.format(question=data["initial_query"])
else:
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
else:
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
next_data["inputs"] = prompt
elif self.services[cur_node].service_type == ServiceType.RERANK:
# rerank the inputs with the scores
reranker_parameters = kwargs.get("reranker_parameters", None)
top_n = reranker_parameters.top_n if reranker_parameters else 1
docs = inputs["texts"]
reranked_docs = []
for best_response in data[:top_n]:
reranked_docs.append(docs[best_response["index"]])
# handle template
# if user provides template, then format the prompt with it
# otherwise, use the default template
prompt = inputs["query"]
chat_template = llm_parameters_dict["chat_template"]
if chat_template:
prompt_template = PromptTemplate.from_template(chat_template)
input_variables = prompt_template.input_variables
if sorted(input_variables) == ["context", "question"]:
prompt = prompt_template.format(question=prompt, context="\n".join(docs))
elif input_variables == ["question"]:
prompt = prompt_template.format(question=prompt)
else:
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
prompt = ChatTemplate.generate_rag_prompt(prompt, docs)
else:
prompt = ChatTemplate.generate_rag_prompt(prompt, docs)
next_data["inputs"] = prompt
return next_data
def align_generator(self, gen, **kwargs):
# openai reaponse format
# b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
for line in gen:
line = line.decode("utf-8")
start = line.find("{")
end = line.rfind("}") + 1
json_str = line[start:end]
try:
# sometimes yield empty chunk, do a fallback here
json_data = json.loads(json_str)
if json_data["choices"][0]["finish_reason"] != "eos_token":
yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
except Exception as e:
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
yield "data: [DONE]\n\n"
class ChatQnAService:
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
ServiceOrchestrator.align_inputs = align_inputs
ServiceOrchestrator.align_outputs = align_outputs
ServiceOrchestrator.align_generator = align_generator
self.megaservice = ServiceOrchestrator()
def add_remote_service(self):
embedding = MicroService(
name="embedding",
host=EMBEDDING_SERVER_HOST_IP,
port=EMBEDDING_SERVER_PORT,
endpoint="/embed",
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
retriever = MicroService(
name="retriever",
host=RETRIEVER_SERVICE_HOST_IP,
port=RETRIEVER_SERVICE_PORT,
endpoint="/v1/retrieval",
use_remote_service=True,
service_type=ServiceType.RETRIEVER,
)
rerank = MicroService(
name="rerank",
host=RERANK_SERVER_HOST_IP,
port=RERANK_SERVER_PORT,
endpoint="/rerank",
use_remote_service=True,
service_type=ServiceType.RERANK,
)
llm = MicroService(
name="llm",
host=LLM_SERVER_HOST_IP,
port=LLM_SERVER_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
)
self.megaservice.add(embedding).add(retriever).add(rerank).add(llm)
self.megaservice.flow_to(embedding, retriever)
self.megaservice.flow_to(retriever, rerank)
self.megaservice.flow_to(rerank, llm)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
def add_remote_service_without_rerank(self):
embedding = MicroService(
name="embedding",
host=EMBEDDING_SERVER_HOST_IP,
port=EMBEDDING_SERVER_PORT,
endpoint="/embed",
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
retriever = MicroService(
name="retriever",
host=RETRIEVER_SERVICE_HOST_IP,
port=RETRIEVER_SERVICE_PORT,
endpoint="/v1/retrieval",
use_remote_service=True,
service_type=ServiceType.RETRIEVER,
)
llm = MicroService(
name="llm",
host=LLM_SERVER_HOST_IP,
port=LLM_SERVER_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
)
self.megaservice.add(embedding).add(retriever).add(llm)
self.megaservice.flow_to(embedding, retriever)
self.megaservice.flow_to(retriever, llm)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--without-rerank", action="store_true")
args = parser.parse_args()
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
if args.without_rerank:
chatqna.add_remote_service_without_rerank()
else:
chatqna.add_remote_service()

View File

@@ -0,0 +1,57 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
class ChatQnAService:
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
self.megaservice = ServiceOrchestrator()
def add_remote_service(self):
embedding = MicroService(
name="embedding",
host=EMBEDDING_SERVICE_HOST_IP,
port=EMBEDDING_SERVICE_PORT,
endpoint="/v1/embeddings",
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
retriever = MicroService(
name="retriever",
host=RETRIEVER_SERVICE_HOST_IP,
port=RETRIEVER_SERVICE_PORT,
endpoint="/v1/retrieval",
use_remote_service=True,
service_type=ServiceType.RETRIEVER,
)
llm = MicroService(
name="llm",
host=LLM_SERVICE_HOST_IP,
port=LLM_SERVICE_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
)
self.megaservice.add(embedding).add(retriever).add(llm)
self.megaservice.flow_to(embedding, retriever)
self.megaservice.flow_to(retriever, llm)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
if __name__ == "__main__":
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
chatqna.add_remote_service()

View File

@@ -2,111 +2,11 @@
This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on AIPC. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`.
## Prerequisites
We use [Ollama](https://ollama.com/) as our LLM service for AIPC.
Please follow the instructions to set up Ollama on your PC. This will set the entrypoint needed for the Ollama to suit the ChatQnA examples.
### Set Up Ollama LLM Service
#### Install Ollama Service
Install Ollama service with one command:
```
curl -fsSL https://ollama.com/install.sh | sh
```
#### Set Ollama Service Configuration
Ollama Service Configuration file is /etc/systemd/system/ollama.service. Edit the file to set OLLAMA_HOST environment.
Replace **<host_ip>** with your host IPV4 (please use external public IP). For example the host_ip is 10.132.x.y, then `Environment="OLLAMA_HOST=10.132.x.y:11434"'.
```
Environment="OLLAMA_HOST=host_ip:11434"
```
#### Set https_proxy environment for Ollama
If your system access network through proxy, add https_proxy in Ollama Service Configuration file
```
Environment="https_proxy=Your_HTTPS_Proxy"
```
#### Restart Ollama services
```
$ sudo systemctl daemon-reload
$ sudo systemctl restart ollama.service
```
#### Check the service started
```
netstat -tuln | grep 11434
```
The output are:
```
tcp 0 0 10.132.x.y:11434 0.0.0.0:* LISTEN
```
#### Pull Ollama LLM model
Run the command to download LLM models. The <host_ip> is the one set in [Ollama Service Configuration](#Set-Ollama-Service-Configuration)
```
export host_ip=<host_ip>
export OLLAMA_HOST=http://${host_ip}:11434
ollama pull llama3.2
```
After downloaded the models, you can list the models by `ollama list`.
The output should be similar to the following:
```
NAME ID SIZE MODIFIED
llama3.2:latest a80c4f17acd5 2.0 GB 2 minutes ago
```
### Consume Ollama LLM Service
Access ollama service to verify that the ollama is functioning correctly.
```bash
curl http://${host_ip}:11434/api/generate -d '{"model": "llama3.2", "prompt":"What is Deep Learning?"}'
```
The outputs are similar to these:
```
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.098813868Z","response":"Deep","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.124514468Z","response":" learning","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.149754216Z","response":" is","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.180420784Z","response":" a","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.229185873Z","response":" subset","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.263956118Z","response":" of","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.289097354Z","response":" machine","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.316838918Z","response":" learning","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.342309506Z","response":" that","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.367221264Z","response":" involves","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.39205893Z","response":" the","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.417933974Z","response":" use","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.443110388Z","response":" of","done":false}
...
```
## 🚀 Build Docker Images
First of all, you need to build Docker Images locally and install the python package of it.
```bash
mkdir ~/OPEA -p
cd ~/OPEA
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
```
@@ -116,60 +16,112 @@ If you are in a proxy environment, set the proxy-related environment variables:
export http_proxy="Your_HTTP_Proxy"
export https_proxy="Your_HTTPs_Proxy"
### 1. Build Retriever Image
### 1. Build Embedding Image
```bash
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
```
### 2. Build Retriever Image
```bash
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
```
### 2 Build LLM Image
### 3. Build Rerank Image
```bash
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
```
### 4. Set up Ollama Service and Build LLM Image
We use [Ollama](https://ollama.com/) as our LLM service for AIPC.
Please set up Ollama on your PC follow the instructions. This will set the entrypoint needed for the Ollama to suit the ChatQnA examples.
#### 4.1 Set Up Ollama LLM Service
Install Ollama service with one command
curl -fsSL https://ollama.com/install.sh | sh
##### Set Ollama Service Configuration
Ollama Service Configuration file is /etc/systemd/system/ollama.service. Edit the file to set OLLAMA_HOST environment (Replace **${host_ip}** with your host IPV4).
```
Environment="OLLAMA_HOST=${host_ip}:11434"
```
##### Set https_proxy environment for Ollama
if your system access network through proxy, add https_proxy in Ollama Service Configuration file
```
Environment="https_proxy="Your_HTTPS_Proxy"
```
##### Restart Ollam services
```
$ sudo systemctl daemon-reload
$ sudo systemctl restart ollama.service
```
##### Pull LLM model
```
#export OLLAMA_HOST=http://${host_ip}:11434
#ollama pull llam3
#ollama lists
NAME ID SIZE MODIFIED
llama3:latest 365c0bd3c000 4.7 GB 5 days ago
```
#### 4.2 Build LLM Image
```bash
docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/langchain/Dockerfile .
```
### 3. Build Dataprep Image
### 5. Build Dataprep Image
```bash
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
cd ..
```
### 4. Build MegaService Docker Image
### 6. Build MegaService Docker Image
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:
```bash
cd ~/OPEA
git clone https://github.com/opea-project/GenAIExamples.git
cd GenAIExamples/ChatQnA
docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
docker build --no-cache -t opea/chatqna:latest -f Dockerfile .
cd ../../..
```
### 5. Build UI Docker Image
### 7. Build UI Docker Image
Build frontend Docker image via below command:
```bash
cd ~/OPEA/GenAIExamples/ChatQnA/ui
cd GenAIExamples/ChatQnA/ui
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
cd ../../../..
```
### 6. Build Nginx Docker Image
```bash
cd GenAIComps
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
```
Then run the command `docker images`, you will have the following 6 Docker Images:
Then run the command `docker images`, you will have the following 7 Docker Images:
1. `opea/dataprep-redis:latest`
2. `opea/retriever-redis:latest`
3. `opea/llm-ollama:latest`
4. `opea/chatqna:latest`
5. `opea/chatqna-ui:latest`
6. `opea/nginx:latest`
2. `opea/embedding-tei:latest`
3. `opea/retriever-redis:latest`
4. `opea/reranking-tei:latest`
5. `opea/llm-ollama:latest`
6. `opea/chatqna:latest`
7. `opea/chatqna-ui:latest`
## 🚀 Start Microservices
@@ -209,10 +161,21 @@ export http_proxy=${your_http_proxy}
export https_proxy=${your_http_proxy}
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
export REDIS_URL="redis://${host_ip}:6379"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export MEGA_SERVICE_HOST_IP=${host_ip}
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
export RERANK_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
export OLLAMA_ENDPOINT=http://${host_ip}:11434
export OLLAMA_MODEL="llama3.2"
export OLLAMA_MODEL="llama3"
```
- Windows PC
@@ -220,10 +183,21 @@ export OLLAMA_MODEL="llama3.2"
```bash
set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
set RERANK_MODEL_ID=BAAI/bge-reranker-base
set TEI_EMBEDDING_ENDPOINT=http://%host_ip%:6006
set TEI_RERANKING_ENDPOINT=http://%host_ip%:8808
set REDIS_URL=redis://%host_ip%:6379
set INDEX_NAME=rag-redis
set HUGGINGFACEHUB_API_TOKEN=%your_hf_api_token%
set MEGA_SERVICE_HOST_IP=%host_ip%
set EMBEDDING_SERVICE_HOST_IP=%host_ip%
set RETRIEVER_SERVICE_HOST_IP=%host_ip%
set RERANK_SERVICE_HOST_IP=%host_ip%
set LLM_SERVICE_HOST_IP=%host_ip%
set BACKEND_SERVICE_ENDPOINT=http://%host_ip%:8888/v1/chatqna
set DATAPREP_SERVICE_ENDPOINT=http://%host_ip%:6007/v1/dataprep
set OLLAMA_ENDPOINT=http://host.docker.internal:11434
set OLLAMA_MODEL="llama3.2"
set OLLAMA_MODEL="llama3"
```
Note: Please replace with `host_ip` with you external IP address, do not use localhost.
@@ -233,8 +207,14 @@ Note: Please replace with `host_ip` with you external IP address, do not use loc
> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file
```bash
cd ~/OPEA/GenAIExamples/ChatQnA/docker_compose/intel/cpu/aipc/
cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/aipc/
docker compose up -d
# let ollama service runs
# e.g. ollama run llama3
OLLAMA_HOST=${host_ip}:11434 ollama run $OLLAMA_MODEL
# for windows
# ollama run %OLLAMA_MODEL%
```
### Validate Microservices
@@ -251,7 +231,16 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
2. Retriever Microservice
2. Embedding Microservice
```bash
curl http://${host_ip}:6000/v1/embeddings\
-X POST \
-d '{"text":"hello"}' \
-H 'Content-Type: application/json'
```
3. Retriever Microservice
To validate the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:
```bash
@@ -262,7 +251,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
3. TEI Reranking Service
4. TEI Reranking Service
```bash
curl http://${host_ip}:8808/rerank \
@@ -271,13 +260,22 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
4. Ollama Service
5. Reranking Microservice
```bash
curl http://${host_ip}:11434/api/generate -d '{"model": "llama3.2", "prompt":"What is Deep Learning?"}'
curl http://${host_ip}:8000/v1/reranking\
-X POST \
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-H 'Content-Type: application/json'
```
5. LLM Microservice
6. Ollama Service
```bash
curl http://${host_ip}:11434/api/generate -d '{"model": "llama3", "prompt":"What is Deep Learning?"}'
```
7. LLM Microservice
```bash
curl http://${host_ip}:9000/v1/chat/completions\
@@ -286,51 +284,37 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
6. MegaService
8. MegaService
```bash
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
"messages": "What is the revenue of Nike in 2023?"
"messages": "What is the revenue of Nike in 2023?", "model": "'"${OLLAMA_MODEL}"'"
}'
```
7. Upload RAG Files through Dataprep Microservice (Optional)
9. Dataprep MicroserviceOptional
To chat with retrieved information, you need to upload a file using Dataprep service.
If you want to update the default knowledge base, you can use the following commands:
Here is an example of Nike 2023 pdf file.
Update Knowledge Base via Local File Upload:
```bash
# download pdf file
wget https://raw.githubusercontent.com/opea-project/GenAIComps/main/comps/retrievers/redis/data/nke-10k-2023.pdf
```bash
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
-H "Content-Type: multipart/form-data" \
-F "files=@./nke-10k-2023.pdf"
```
# upload pdf file with dataprep
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
-H "Content-Type: multipart/form-data" \
-F "files=@./nke-10k-2023.pdf"
```
This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
Add Knowledge Base via HTTP Links:
Alternatively, you can add knowledge base via HTTP Links:
```bash
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
-H "Content-Type: multipart/form-data" \
-F 'link_list=["https://opea.dev"]'
```
```bash
curl -X POST "http://${host_ip}:6007/v1/dataprep" \
-H "Content-Type: multipart/form-data" \
-F 'link_list=["https://opea.dev"]'
```
This command updates a knowledge base by submitting a list of HTTP links for processing.
To check the uploaded files, you are able to get the file list that uploaded:
```bash
curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
-H "Content-Type: application/json"
```
the output is:
`[{"name":"nke-10k-2023.pdf","id":"nke-10k-2023.pdf","type":"File","parent":""}]`
This command updates a knowledge base by submitting a list of HTTP links for processing.
## 🚀 Launch the UI

View File

@@ -13,17 +13,15 @@ services:
container_name: dataprep-redis-server
depends_on:
- redis-vector-db
- tei-embedding-service
ports:
- "6007:6007"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: http://tei-embedding-service:80
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -38,6 +36,20 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -50,11 +62,9 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -72,6 +82,23 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-aipc-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
llm:
image: ${REGISTRY:-opea}/llm-ollama
container_name: llm-ollama
@@ -82,6 +109,7 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
@@ -92,10 +120,11 @@ services:
container_name: chatqna-aipc-backend-server
depends_on:
- redis-vector-db
- dataprep-redis-service
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- llm
ports:
- "8888:8888"
@@ -103,15 +132,11 @@ services:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=chaqna-aipc-backend-server
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
- EMBEDDING_SERVER_PORT=80
- RETRIEVER_SERVICE_HOST_IP=retriever
- RERANK_SERVER_HOST_IP=tei-reranking-service
- RERANK_SERVER_PORT=80
- LLM_SERVER_HOST_IP=llm
- LLM_SERVER_PORT=9000
- LOGFLAG=${LOGFLAG}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
ipc: host
restart: always
chaqna-aipc-ui-server:
@@ -125,27 +150,8 @@ services:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
ipc: host
restart: always
chaqna-aipc-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: chaqna-aipc-nginx-server
depends_on:
- chaqna-aipc-backend-server
- chaqna-aipc-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
- FRONTEND_SERVICE_PORT=5173
- BACKEND_SERVICE_NAME=chatqna
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
- BACKEND_SERVICE_PORT=8888
- DATAPREP_SERVICE_IP=dataprep-redis-service
- DATAPREP_SERVICE_PORT=6007
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
ipc: host
restart: always

View File

@@ -1,20 +0,0 @@
#!/usr/bin/env bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
if [ -z "${your_hf_api_token}" ]; then
echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set your_hf_api_token."
fi
if [ -z "${host_ip}" ]; then
echo "Error: host_ip is not set. Please set host_ip first."
fi
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export INDEX_NAME="rag-redis"
export OLLAMA_ENDPOINT=http://${host_ip}:11434
export OLLAMA_MODEL="llama3.2"

View File

@@ -97,20 +97,61 @@ After launching your instance, you can connect to it using SSH (for Linux instan
First of all, you need to build Docker Images locally and install the python package of it.
### 1. Build Retriever Image
### 1. Build Embedding Image
```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
```
### 2. Build Retriever Image
```bash
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
```
### 2. Build Dataprep Image
### 3. Build Rerank Image
> Skip for ChatQnA without Rerank pipeline
```bash
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
```
### 4. Build LLM Image
#### Use TGI as backend
```bash
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
```
#### Use vLLM as backend
Build vLLM docker.
```bash
git clone https://github.com/vllm-project/vllm.git
cd ./vllm/
docker build --no-cache -t opea/vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.cpu .
cd ..
```
Build microservice.
```bash
docker build --no-cache -t opea/llm-vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/langchain/Dockerfile .
```
### 5. Build Dataprep Image
```bash
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
cd ..
```
### 3. Build MegaService Docker Image
### 6. Build MegaService Docker Image
1. MegaService with Rerank
@@ -132,7 +173,7 @@ cd ..
docker build --no-cache -t opea/chatqna-without-rerank:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.without_rerank .
```
### 4. Build UI Docker Image
### 7. Build UI Docker Image
Build frontend Docker image via below command:
@@ -141,7 +182,7 @@ cd GenAIExamples/ChatQnA/ui
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
```
### 5. Build Conversational React UI Docker Image (Optional)
### 8. Build Conversational React UI Docker Image (Optional)
Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command:
@@ -152,20 +193,23 @@ cd GenAIExamples/ChatQnA/ui
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
```
### 6. Build Nginx Docker Image
### 9. Build Nginx Docker Image
```bash
cd GenAIComps
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
```
Then run the command `docker images`, you will have the following 5 Docker Images:
Then run the command `docker images`, you will have the following 8 Docker Images:
1. `opea/dataprep-redis:latest`
2. `opea/retriever-redis:latest`
3. `opea/chatqna:latest` or `opea/chatqna-without-rerank:latest`
4. `opea/chatqna-ui:latest`
5. `opea/nginx:latest`
2. `opea/embedding-tei:latest`
3. `opea/retriever-redis:latest`
4. `opea/reranking-tei:latest`
5. `opea/llm-tgi:latest` or `opea/llm-vllm:latest`
6. `opea/chatqna:latest` or `opea/chatqna-without-rerank:latest`
7. `opea/chatqna-ui:latest`
8. `opea/nginx:latest`
## 🚀 Start Microservices
@@ -271,7 +315,16 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
2. Retriever Microservice
2. Embedding Microservice
```bash
curl http://${host_ip}:6000/v1/embeddings\
-X POST \
-d '{"text":"hello"}' \
-H 'Content-Type: application/json'
```
3. Retriever Microservice
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
is determined by the embedding model.
@@ -287,7 +340,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
3. TEI Reranking Service
4. TEI Reranking Service
> Skip for ChatQnA without Rerank pipeline
@@ -298,7 +351,18 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
4. LLM backend Service
5. Reranking Microservice
> Skip for ChatQnA without Rerank pipeline
```bash
curl http://${host_ip}:8000/v1/reranking\
-X POST \
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-H 'Content-Type: application/json'
```
6. LLM backend Service
In first startup, this service will take more time to download the model files. After it's finished, the service will be ready.
@@ -331,7 +395,31 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-d '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}'
```
5. MegaService
7. LLM Microservice
This service depends on above LLM backend service startup. It will be ready after long time, to wait for them being ready in first startup.
```bash
# TGI service
curl http://${host_ip}:9000/v1/chat/completions\
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json'
```
For parameters in TGI modes, please refer to [HuggingFace InferenceClient API](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation) (except we rename "max_new_tokens" to "max_tokens".)
```bash
# vLLM Service
curl http://${host_ip}:9000/v1/chat/completions \
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":1,"temperature":0.7,"frequency_penalty":0,"presence_penalty":0, "streaming":false}' \
-H 'Content-Type: application/json'
```
For parameters in vLLM modes, can refer to [LangChain VLLMOpenAI API](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.vllm.VLLMOpenAI.html)
8. MegaService
```bash
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
@@ -339,7 +427,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
}'
```
6. Nginx Service
9. Nginx Service
```bash
curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \
@@ -347,7 +435,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-d '{"messages": "What is the revenue of Nike in 2023?"}'
```
7. Dataprep MicroserviceOptional
10. Dataprep MicroserviceOptional
If you want to update the default knowledge base, you can use the following commands:

View File

@@ -70,20 +70,38 @@ git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
```
### 1. Build Retriever Image
### 1. Build Embedding Image
```bash
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
```
### 2. Build Retriever Image
```bash
docker build --no-cache -t opea/retriever-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/qdrant/haystack/Dockerfile .
```
### 2. Build Dataprep Image
### 3. Build Rerank Image
```bash
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .`
```
### 4. Build LLM Image
```bash
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
```
### 5. Build Dataprep Image
```bash
docker build --no-cache -t opea/dataprep-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/qdrant/langchain/Dockerfile .
cd ..
```
### 3. Build MegaService Docker Image
### 6. Build MegaService Docker Image
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:
@@ -94,7 +112,7 @@ docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_pr
cd ../../..
```
### 4. Build UI Docker Image
### 7. Build UI Docker Image
Build frontend Docker image via below command:
@@ -104,7 +122,7 @@ docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https
cd ../../../..
```
### 5. Build Conversational React UI Docker Image (Optional)
### 8. Build Conversational React UI Docker Image (Optional)
Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command:
@@ -118,20 +136,15 @@ docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https
cd ../../../..
```
### 6. Build Nginx Docker Image
```bash
cd GenAIComps
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
```
Then run the command `docker images`, you will have the following 5 Docker Images:
Then run the command `docker images`, you will have the following 7 Docker Images:
1. `opea/dataprep-qdrant:latest`
2. `opea/retriever-qdrant:latest`
3. `opea/chatqna:latest`
4. `opea/chatqna-ui:latest`
5. `opea/nginx:latest`
2. `opea/embedding-tei:latest`
3. `opea/retriever-qdrant:latest`
4. `opea/reranking-tei:latest`
5. `opea/llm-tgi:latest`
6. `opea/chatqna:latest`
7. `opea/chatqna-ui:latest`
## 🚀 Start Microservices
@@ -180,7 +193,20 @@ export https_proxy=${your_http_proxy}
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6040"
export TEI_RERANKING_ENDPOINT="http://${host_ip}:6041"
export TGI_LLM_ENDPOINT="http://${host_ip}:6042"
export QDRANT_HOST=${host_ip}
export QDRANT_PORT=6333
export INDEX_NAME="rag-qdrant"
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export MEGA_SERVICE_HOST_IP=${host_ip}
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
export RERANK_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8912/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6043/v1/dataprep"
```
Note: Please replace with `host_ip` with you external IP address, do not use localhost.
@@ -208,7 +234,16 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
2. Retriever Microservice
2. Embedding Microservice
```bash
curl http://${host_ip}:6044/v1/embeddings\
-X POST \
-d '{"text":"hello"}' \
-H 'Content-Type: application/json'
```
3. Retriever Microservice
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
is determined by the embedding model.
@@ -224,7 +259,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
3. TEI Reranking Service
4. TEI Reranking Service
```bash
curl http://${host_ip}:6041/rerank \
@@ -233,7 +268,16 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
4. TGI Service
5. Reranking Microservice
```bash
curl http://${host_ip}:6046/v1/reranking\
-X POST \
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-H 'Content-Type: application/json'
```
6. TGI Service
In first startup, this service will take more time to download the model files. After it's finished, the service will be ready.
@@ -258,7 +302,16 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
5. MegaService
7. LLM Microservice
```bash
curl http://${host_ip}:6047/v1/chat/completions\
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json'
```
8. MegaService
```bash
curl http://${host_ip}:8912/v1/chatqna -H "Content-Type: application/json" -d '{
@@ -266,7 +319,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
}'
```
6. Dataprep MicroserviceOptional
9. Dataprep MicroserviceOptional
If you want to update the default knowledge base, you can use the following commands:

View File

@@ -20,10 +20,10 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
REDIS_HOST: ${REDIS_HOST}
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: http://tei-embedding-service:80
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -38,6 +38,20 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -50,10 +64,9 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-reranking-service:
@@ -72,6 +85,23 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-xeon-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
container_name: tgi-service
@@ -88,65 +118,83 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
chatqna-xeon-backend-server:
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-server
depends_on:
- tgi-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
chaqna-xeon-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-xeon-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- dataprep-redis-service
- retriever
- tei-reranking-service
- reranking
- tgi-service
- llm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
- RETRIEVER_SERVICE_HOST_IP=retriever
- RERANK_SERVER_HOST_IP=tei-reranking-service
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
- LLM_SERVER_HOST_IP=tgi-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LOGFLAG=${LOGFLAG}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
ipc: host
restart: always
chatqna-xeon-ui-server:
chaqna-xeon-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
container_name: chatqna-xeon-ui-server
depends_on:
- chatqna-xeon-backend-server
- chaqna-xeon-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always
chatqna-xeon-nginx-server:
chaqna-xeon-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: chatqna-xeon-nginx-server
container_name: chaqna-xeon-nginx-server
depends_on:
- chatqna-xeon-backend-server
- chatqna-xeon-ui-server
- chaqna-xeon-backend-server
- chaqna-xeon-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
- FRONTEND_SERVICE_PORT=5173
- BACKEND_SERVICE_NAME=chatqna
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
- BACKEND_SERVICE_PORT=8888
- DATAPREP_SERVICE_IP=dataprep-redis-service
- DATAPREP_SERVICE_PORT=6007
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
ipc: host
restart: always

View File

@@ -0,0 +1,184 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
redis-vector-db:
image: redis/redis-stack:7.2.0-v9
container_name: redis-vector-db
ports:
- "6379:6379"
- "8001:8001"
dataprep-redis-service:
image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
container_name: dataprep-redis-server
depends_on:
- redis-vector-db
- tei-embedding-service
ports:
- "6007:6007"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
REDIS_HOST: ${REDIS_HOST}
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-server
ports:
- "6006:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
# embedding:
# image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
# container_name: embedding-tei-server
# depends_on:
# - tei-embedding-service
# ports:
# - "6000:6000"
# ipc: host
# environment:
# no_proxy: ${no_proxy}
# http_proxy: ${http_proxy}
# https_proxy: ${https_proxy}
# TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
# restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
depends_on:
- redis-vector-db
ports:
- "7000:7000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-reranking-server
ports:
- "8808:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
# reranking:
# image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
# container_name: reranking-tei-xeon-server
# depends_on:
# - tei-reranking-service
# ports:
# - "8000:8000"
# ipc: host
# environment:
# no_proxy: ${no_proxy}
# http_proxy: ${http_proxy}
# https_proxy: ${https_proxy}
# TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
# HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
# HF_HUB_DISABLE_PROGRESS_BARS: 1
# HF_HUB_ENABLE_HF_TRANSFER: 0
# restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
container_name: tgi-service
ports:
- "9009:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
# llm:
# image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
# container_name: llm-tgi-server
# depends_on:
# - tgi-service
# ports:
# - "9000:9000"
# ipc: host
# environment:
# no_proxy: ${no_proxy}
# http_proxy: ${http_proxy}
# https_proxy: ${https_proxy}
# TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
# HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
# HF_HUB_DISABLE_PROGRESS_BARS: 1
# HF_HUB_ENABLE_HF_TRANSFER: 0
# restart: unless-stopped
chaqna-xeon-backend-server:
image: ${REGISTRY:-opea}/chatqna-no-wrapper:${TAG:-latest}
container_name: chatqna-xeon-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
# - embedding
- dataprep-redis-service
- retriever
- tei-reranking-service
# - reranking
- tgi-service
# - llm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVER_HOST_IP}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVER_HOST_IP=${RERANK_SERVER_HOST_IP}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
ipc: host
restart: always
chaqna-xeon-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
container_name: chatqna-xeon-ui-server
depends_on:
- chaqna-xeon-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -20,10 +20,10 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
QDRANT_HOST: qdrant-vector-db
QDRANT_HOST: ${QDRANT_HOST}
QDRANT_PORT: 6333
COLLECTION_NAME: ${INDEX_NAME}
TEI_ENDPOINT: http://tei-embedding-service:80
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -38,6 +38,20 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6044:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-qdrant:${TAG:-latest}
container_name: retriever-qdrant-server
@@ -50,10 +64,9 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
QDRANT_HOST: qdrant-vector-db
QDRANT_HOST: ${QDRANT_HOST}
QDRANT_PORT: 6333
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -71,6 +84,23 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-xeon-server
depends_on:
- tei-reranking-service
ports:
- "6046:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
container_name: tgi-service
@@ -87,65 +117,67 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
chatqna-xeon-backend-server:
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-server
depends_on:
- tgi-service
ports:
- "6047:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
chaqna-xeon-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-xeon-backend-server
depends_on:
- qdrant-vector-db
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- tgi-service
- llm
ports:
- "8912:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
- RETRIEVER_SERVICE_HOST_IP=retriever
- RETRIEVER_SERVICE_PORT=${RETRIEVER_SERVICE_PORT:-7000}
- RERANK_SERVER_HOST_IP=tei-reranking-service
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
- LLM_SERVER_HOST_IP=tgi-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LOGFLAG=${LOGFLAG}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_PORT=${EMBEDDING_SERVICE_PORT}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RETRIEVER_SERVICE_PORT=${RETRIEVER_SERVICE_PORT}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- RERANK_SERVICE_PORT=${RERANK_SERVICE_PORT}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
ipc: host
restart: always
chatqna-xeon-ui-server:
chaqna-xeon-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
container_name: chatqna-xeon-ui-server
depends_on:
- chatqna-xeon-backend-server
- chaqna-xeon-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
ipc: host
restart: always
chatqna-xeon-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: chatqna-xeon-nginx-server
depends_on:
- chatqna-xeon-backend-server
- chatqna-xeon-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
- FRONTEND_SERVICE_PORT=5173
- BACKEND_SERVICE_NAME=chatqna
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
- BACKEND_SERVICE_PORT=8888
- DATAPREP_SERVICE_IP=dataprep-qdrant-service
- DATAPREP_SERVICE_PORT=6007
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always

View File

@@ -20,10 +20,9 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: http://tei-embedding-service:80
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -38,6 +37,23 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-embedding-service"
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -50,10 +66,12 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-retriever-service"
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-reranking-service:
@@ -72,6 +90,26 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-xeon-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-reranking-service"
restart: unless-stopped
vllm_service:
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
container_name: vllm-service
@@ -87,64 +125,67 @@ services:
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL_ID: ${LLM_MODEL_ID}
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
chatqna-xeon-backend-server:
llm:
image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
container_name: llm-vllm-server
depends_on:
- vllm_service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
vLLM_ENDPOINT: ${vLLM_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL: ${LLM_MODEL_ID}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-llm-service"
restart: unless-stopped
chaqna-xeon-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-xeon-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- vllm_service
- llm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
- RETRIEVER_SERVICE_HOST_IP=retriever
- RERANK_SERVER_HOST_IP=tei-reranking-service
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
- LLM_SERVER_HOST_IP=vllm_service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LOGFLAG=${LOGFLAG}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
ipc: host
restart: always
chatqna-xeon-ui-server:
chaqna-xeon-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
container_name: chatqna-xeon-ui-server
depends_on:
- chatqna-xeon-backend-server
- chaqna-xeon-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
ipc: host
restart: always
chatqna-xeon-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: chatqna-xeon-nginx-server
depends_on:
- chatqna-xeon-backend-server
- chatqna-xeon-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
- FRONTEND_SERVICE_PORT=5173
- BACKEND_SERVICE_NAME=chatqna
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
- BACKEND_SERVICE_PORT=8888
- DATAPREP_SERVICE_IP=dataprep-redis-service
- DATAPREP_SERVICE_PORT=6007
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always

View File

@@ -20,10 +20,10 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
REDIS_HOST: ${REDIS_HOST}
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: http://tei-embedding-service:80
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -38,6 +38,20 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -50,10 +64,9 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tgi-service:
@@ -72,62 +85,61 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
chatqna-xeon-backend-server:
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-server
depends_on:
- tgi-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
chaqna-xeon-backend-server:
image: ${REGISTRY:-opea}/chatqna-without-rerank:${TAG:-latest}
container_name: chatqna-xeon-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- dataprep-redis-service
- retriever
- tgi-service
- llm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
- RETRIEVER_SERVICE_HOST_IP=retriever
- LLM_SERVER_HOST_IP=tgi-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LOGFLAG=${LOGFLAG}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
ipc: host
restart: always
chatqna-xeon-ui-server:
chaqna-xeon-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
container_name: chatqna-xeon-ui-server
depends_on:
- chatqna-xeon-backend-server
- chaqna-xeon-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
ipc: host
restart: always
chatqna-xeon-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: chatqna-xeon-nginx-server
depends_on:
- chatqna-xeon-backend-server
- chatqna-xeon-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
- FRONTEND_SERVICE_PORT=5173
- BACKEND_SERVICE_NAME=chatqna
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
- BACKEND_SERVICE_PORT=8888
- DATAPREP_SERVICE_IP=dataprep-redis-service
- DATAPREP_SERVICE_PORT=6007
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always

View File

@@ -7,4 +7,24 @@
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
export vLLM_LLM_ENDPOINT="http://${host_ip}:9009"
export REDIS_URL="redis://${host_ip}:6379"
export INDEX_NAME="rag-redis"
export REDIS_HOST=${host_ip}
export MEGA_SERVICE_HOST_IP=${host_ip}
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
export RERANK_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
export FRONTEND_SERVICE_IP=${host_ip}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_NAME=chatqna
export BACKEND_SERVICE_IP=${host_ip}
export BACKEND_SERVICE_PORT=8888

View File

@@ -70,19 +70,73 @@ curl http://${host_ip}:8888/v1/chatqna \
First of all, you need to build Docker Images locally. This step can be ignored after the Docker images published to Docker hub.
### 1. Build Retriever Image
### 1. Build Embedding Image
```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
```
### 2. Build Retriever Image
```bash
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
```
### 2. Build Dataprep Image
### 3. Build Rerank Image
> Skip for ChatQnA without Rerank pipeline
```bash
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
```
### 4. Build LLM Image
You can use different LLM serving solutions, choose one of following four options.
#### 4.1 Use TGI
```bash
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
```
#### 4.2 Use VLLM
Build vllm docker.
```bash
docker build --no-cache -t opea/llm-vllm-hpu:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_hpu .
```
Build microservice docker.
```bash
docker build --no-cache -t opea/llm-vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/langchain/Dockerfile .
```
#### 4.3 Use VLLM-on-Ray
Build vllm-on-ray docker.
```bash
docker build --no-cache -t opea/llm-vllm-ray-hpu:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/ray/dependency/Dockerfile .
```
Build microservice docker.
```bash
docker build --no-cache -t opea/llm-vllm-ray:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/ray/Dockerfile .
```
### 5. Build Dataprep Image
```bash
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
```
### 3. Build Guardrails Docker Image (Optional)
### 6. Build Guardrails Docker Image (Optional)
To fortify AI initiatives in production, Guardrails microservice can secure model inputs and outputs, building Trustworthy, Safe, and Secure LLM-based Applications.
@@ -90,7 +144,7 @@ To fortify AI initiatives in production, Guardrails microservice can secure mode
docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/llama_guard/langchain/Dockerfile .
```
### 4. Build MegaService Docker Image
### 7. Build MegaService Docker Image
1. MegaService with Rerank
@@ -122,7 +176,7 @@ docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy
docker build --no-cache -t opea/chatqna-without-rerank:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.without_rerank .
```
### 5. Build UI Docker Image
### 8. Build UI Docker Image
Construct the frontend Docker image using the command below:
@@ -131,7 +185,7 @@ cd GenAIExamples/ChatQnA/ui
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
```
### 6. Build Conversational React UI Docker Image (Optional)
### 9. Build Conversational React UI Docker Image (Optional)
Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command:
@@ -142,18 +196,21 @@ cd GenAIExamples/ChatQnA/ui
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
```
### 7. Build Nginx Docker Image
### 10. Build Nginx Docker Image
```bash
cd GenAIComps
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
```
Then run the command `docker images`, you will have the following 5 Docker Images:
Then run the command `docker images`, you will have the following 8 Docker Images:
- `opea/embedding-tei:latest`
- `opea/retriever-redis:latest`
- `opea/reranking-tei:latest`
- `opea/llm-tgi:latest` or `opea/llm-vllm:latest` or `opea/llm-vllm-ray:latest`
- `opea/dataprep-redis:latest`
- `opea/chatqna:latest`
- `opea/chatqna:latest` or `opea/chatqna-guardrails:latest` or `opea/chatqna-without-rerank:latest`
- `opea/chatqna-ui:latest`
- `opea/nginx:latest`
@@ -281,7 +338,16 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
-H 'Content-Type: application/json'
```
2. Retriever Microservice
2. Embedding Microservice
```bash
curl http://${host_ip}:6000/v1/embeddings \
-X POST \
-d '{"text":"hello"}' \
-H 'Content-Type: application/json'
```
3. Retriever Microservice
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
is determined by the embedding model.
@@ -297,7 +363,7 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
-H 'Content-Type: application/json'
```
3. TEI Reranking Service
4. TEI Reranking Service
> Skip for ChatQnA without Rerank pipeline
@@ -308,7 +374,18 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
-H 'Content-Type: application/json'
```
4. LLM backend Service
5. Reranking Microservice
> Skip for ChatQnA without Rerank pipeline
```bash
curl http://${host_ip}:8000/v1/reranking \
-X POST \
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-H 'Content-Type: application/json'
```
6. LLM backend Service
In first startup, this service will take more time to download the model files. After it's finished, the service will be ready.
@@ -353,7 +430,39 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
-d '{"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
```
5. MegaService
7. LLM Microservice
```bash
# TGI service
curl http://${host_ip}:9000/v1/chat/completions\
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json'
```
For parameters in TGI mode, please refer to [HuggingFace InferenceClient API](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation) (except we rename "max_new_tokens" to "max_tokens".)
```bash
# vLLM Service
curl http://${host_ip}:9000/v1/chat/completions \
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":1,"temperature":0.7,"frequency_penalty":0,"presence_penalty":0, "streaming":false}' \
-H 'Content-Type: application/json'
```
For parameters in vLLM Mode, can refer to [LangChain VLLMOpenAI API](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.vllm.VLLMOpenAI.html)
```bash
# vLLM-on-Ray Service
curl http://${host_ip}:9000/v1/chat/completions \
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"presence_penalty":1.03","streaming":false}' \
-H 'Content-Type: application/json'
```
For parameters in vLLM-on-Ray mode, can refer to [LangChain ChatOpenAI API](https://python.langchain.com/v0.2/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html)
8. MegaService
```bash
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
@@ -361,7 +470,7 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
}'
```
6. Nginx Service
9. Nginx Service
```bash
curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \
@@ -369,7 +478,7 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
-d '{"messages": "What is the revenue of Nike in 2023?"}'
```
7. Dataprep MicroserviceOptional
10. Dataprep MicroserviceOptional
If you want to update the default knowledge base, you can use the following commands:
@@ -438,7 +547,7 @@ curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
-H "Content-Type: application/json"
```
8. Guardrails (Optional)
10. Guardrails (Optional)
```bash
curl http://${host_ip}:9090/v1/guardrails\
@@ -454,7 +563,7 @@ curl http://${host_ip}:9090/v1/guardrails\
To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
```yaml
chatqna-gaudi-ui-server:
chaqna-gaudi-ui-server:
image: opea/chatqna-ui:latest
...
ports:
@@ -467,10 +576,10 @@ If you want to launch the UI using Nginx, open this URL: `http://${host_ip}:${NG
## 🚀 Launch the Conversational UI (Optional)
To access the Conversational UI (react based) frontend, modify the UI service in the `compose.yaml` file. Replace `chatqna-gaudi-ui-server` service with the `chatqna-gaudi-conversation-ui-server` service as per the config below:
To access the Conversational UI (react based) frontend, modify the UI service in the `compose.yaml` file. Replace `chaqna-gaudi-ui-server` service with the `chatqna-gaudi-conversation-ui-server` service as per the config below:
```yaml
chatqna-gaudi-conversation-ui-server:
chaqna-gaudi-conversation-ui-server:
image: opea/chatqna-conversation-ui:latest
container_name: chatqna-gaudi-conversation-ui-server
environment:
@@ -479,7 +588,7 @@ chatqna-gaudi-conversation-ui-server:
ports:
- "5174:80"
depends_on:
- chatqna-gaudi-backend-server
- chaqna-gaudi-backend-server
ipc: host
restart: always
```
@@ -487,7 +596,7 @@ chatqna-gaudi-conversation-ui-server:
Once the services are up, open the following URL in your browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
```yaml
chatqna-gaudi-conversation-ui-server:
chaqna-gaudi-conversation-ui-server:
image: opea/chatqna-conversation-ui:latest
...
ports:

View File

@@ -20,10 +20,9 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: http://tei-embedding-service:80
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/tei-gaudi:latest
@@ -40,12 +39,26 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
HABANA_VISIBLE_DEVICES: ${tei_embedding_devices}
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
INIT_HCCL_ON_ACQUIRE: 0
ENABLE_EXPERIMENTAL_FLAGS: true
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -58,33 +71,42 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/tei-gaudi:latest
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-reranking-gaudi-server
ports:
- "8808:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-gaudi-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
container_name: tgi-gaudi-server
@@ -99,7 +121,7 @@ services:
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
HABANA_VISIBLE_DEVICES: ${llm_service_devices}
OMPI_MCA_btl_vader_single_copy_mechanism: none
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
@@ -109,65 +131,83 @@ services:
cap_add:
- SYS_NICE
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096
chatqna-gaudi-backend-server:
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-gaudi-server
depends_on:
- tgi-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
chaqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- tgi-service
- llm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=chatqna-gaudi-backend-server
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
- RETRIEVER_SERVICE_HOST_IP=retriever
- RERANK_SERVER_HOST_IP=tei-reranking-service
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
- LLM_SERVER_HOST_IP=tgi-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LOGFLAG=${LOGFLAG}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
ipc: host
restart: always
chatqna-gaudi-ui-server:
chaqna-gaudi-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
container_name: chatqna-gaudi-ui-server
depends_on:
- chatqna-gaudi-backend-server
- chaqna-gaudi-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always
chatqna-gaudi-nginx-server:
chaqna-gaudi-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: chatqna-gaudi-nginx-server
container_name: chaqna-gaudi-nginx-server
depends_on:
- chatqna-gaudi-backend-server
- chatqna-gaudi-ui-server
- chaqna-gaudi-backend-server
- chaqna-gaudi-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=chatqna-gaudi-ui-server
- FRONTEND_SERVICE_PORT=5173
- BACKEND_SERVICE_NAME=chatqna
- BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
- BACKEND_SERVICE_PORT=8888
- DATAPREP_SERVICE_IP=dataprep-redis-service
- DATAPREP_SERVICE_PORT=6007
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
ipc: host
restart: always

View File

@@ -20,10 +20,9 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: http://tei-embedding-service:80
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tgi-guardrails-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
@@ -60,8 +59,8 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
SAFETY_GUARD_MODEL_ID: ${GURADRAILS_MODEL_ID}
SAFETY_GUARD_ENDPOINT: http://tgi-guardrails-service:80
SAFETY_GUARD_MODEL_ID: ${SAFETY_GUARD_MODEL_ID}
SAFETY_GUARD_ENDPOINT: ${SAFETY_GUARD_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-embedding-service:
@@ -79,12 +78,24 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -97,33 +108,42 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/tei-gaudi:latest
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-reranking-gaudi-server
ports:
- "8808:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-gaudi-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
container_name: tgi-gaudi-server
@@ -149,7 +169,24 @@ services:
- SYS_NICE
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
chatqna-gaudi-backend-server:
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-gaudi-server
depends_on:
- tgi-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
chaqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna-guardrails:${TAG:-latest}
container_name: chatqna-gaudi-guardrails-server
depends_on:
@@ -157,60 +194,41 @@ services:
- tgi-guardrails-service
- guardrails
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- tgi-service
- llm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=chatqna-gaudi-backend-server
- GUARDRAIL_SERVICE_HOST_IP=guardrails
- GUARDRAIL_SERVICE_PORT=${GUARDRAIL_SERVICE_PORT:-9090}
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
- RETRIEVER_SERVICE_HOST_IP=retriever
- RERANK_SERVER_HOST_IP=tei-reranking-service
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
- LLM_SERVER_HOST_IP=tgi-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LOGFLAG=${LOGFLAG}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- GUARDRAIL_SERVICE_HOST_IP=${GUARDRAIL_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
ipc: host
restart: always
chatqna-gaudi-ui-server:
chaqna-gaudi-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
container_name: chatqna-gaudi-ui-server
depends_on:
- chatqna-gaudi-backend-server
- chaqna-gaudi-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
ipc: host
restart: always
chatqna-gaudi-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: chatqna-gaudi-nginx-server
depends_on:
- chatqna-gaudi-backend-server
- chatqna-gaudi-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=chatqna-gaudi-ui-server
- FRONTEND_SERVICE_PORT=5173
- BACKEND_SERVICE_NAME=chatqna
- BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
- BACKEND_SERVICE_PORT=8888
- DATAPREP_SERVICE_IP=dataprep-redis-service
- DATAPREP_SERVICE_PORT=6007
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always

View File

@@ -0,0 +1,201 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
redis-vector-db:
image: redis/redis-stack:7.2.0-v9
container_name: redis-vector-db
ports:
- "6379:6379"
- "8001:8001"
dataprep-redis-service:
image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
container_name: dataprep-redis-server
depends_on:
- redis-vector-db
- tei-embedding-service
ports:
- "6007:6007"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/tei-gaudi:latest
container_name: tei-embedding-gaudi-server
ports:
- "8090:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
INIT_HCCL_ON_ACQUIRE: 0
ENABLE_EXPERIMENTAL_FLAGS: true
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
# embedding:
# image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
# container_name: embedding-tei-server
# depends_on:
# - tei-embedding-service
# ports:
# - "6000:6000"
# ipc: host
# environment:
# no_proxy: ${no_proxy}
# http_proxy: ${http_proxy}
# https_proxy: ${https_proxy}
# TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
# restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
depends_on:
- redis-vector-db
ports:
- "7000:7000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-reranking-gaudi-server
ports:
- "8808:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
# reranking:
# image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
# container_name: reranking-tei-gaudi-server
# depends_on:
# - tei-reranking-service
# ports:
# - "8000:8000"
# ipc: host
# environment:
# no_proxy: ${no_proxy}
# http_proxy: ${http_proxy}
# https_proxy: ${https_proxy}
# TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
# HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
# HF_HUB_DISABLE_PROGRESS_BARS: 1
# HF_HUB_ENABLE_HF_TRANSFER: 0
# restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
container_name: tgi-gaudi-server
ports:
- "8005:80"
volumes:
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096
# llm:
# image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
# container_name: llm-tgi-gaudi-server
# depends_on:
# - tgi-service
# ports:
# - "9000:9000"
# ipc: host
# environment:
# no_proxy: ${no_proxy}
# http_proxy: ${http_proxy}
# https_proxy: ${https_proxy}
# TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
# HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
# HF_HUB_DISABLE_PROGRESS_BARS: 1
# HF_HUB_ENABLE_HF_TRANSFER: 0
# restart: unless-stopped
chaqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna-no-wrapper:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
# - embedding
- retriever
- tei-reranking-service
# - reranking
- tgi-service
# - llm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVER_HOST_IP}
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-8090}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVER_HOST_IP=${RERANK_SERVER_HOST_IP}
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-8808}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-8005}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always
chaqna-gaudi-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
container_name: chatqna-gaudi-ui-server
depends_on:
- chaqna-gaudi-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -20,10 +20,9 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: http://tei-embedding-service:80
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/tei-gaudi:latest
@@ -40,12 +39,24 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
command: --model-id ${EMBEDDING_MODEL_ID}
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -58,33 +69,42 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/tei-gaudi:latest
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-reranking-gaudi-server
ports:
- "8808:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-gaudi-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
restart: unless-stopped
vllm-service:
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
container_name: vllm-gaudi-server
@@ -105,64 +125,63 @@ services:
- SYS_NICE
ipc: host
command: /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"
chatqna-gaudi-backend-server:
llm:
image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
container_name: llm-vllm-gaudi-server
depends_on:
- vllm-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
vLLM_ENDPOINT: ${vLLM_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL: ${LLM_MODEL_ID}
restart: unless-stopped
chaqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- vllm-service
- llm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=chatqna-gaudi-backend-server
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
- RETRIEVER_SERVICE_HOST_IP=retriever
- RERANK_SERVER_HOST_IP=tei-reranking-service
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
- LLM_SERVER_HOST_IP=vllm-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LOGFLAG=${LOGFLAG}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
ipc: host
restart: always
chatqna-gaudi-ui-server:
chaqna-gaudi-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
container_name: chatqna-gaudi-ui-server
depends_on:
- chatqna-gaudi-backend-server
- chaqna-gaudi-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
ipc: host
restart: always
chatqna-gaudi-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: chatqna-gaudi-nginx-server
depends_on:
- chatqna-gaudi-backend-server
- chatqna-gaudi-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=chatqna-gaudi-ui-server
- FRONTEND_SERVICE_PORT=5173
- BACKEND_SERVICE_NAME=chatqna
- BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
- BACKEND_SERVICE_PORT=8888
- DATAPREP_SERVICE_IP=dataprep-redis-service
- DATAPREP_SERVICE_PORT=6007
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always

View File

@@ -20,10 +20,9 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: http://tei-embedding-service:80
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/tei-gaudi:latest
@@ -40,12 +39,24 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
command: --model-id ${EMBEDDING_MODEL_ID}
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -58,33 +69,42 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/tei-gaudi:latest
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-reranking-gaudi-server
ports:
- "8808:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-gaudi-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
restart: unless-stopped
vllm-ray-service:
image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest}
container_name: vllm-ray-gaudi-server
@@ -105,64 +125,63 @@ services:
- SYS_NICE
ipc: host
command: /bin/bash -c "ray start --head && python vllm_ray_openai.py --port_number 8000 --model_id_or_path $LLM_MODEL_ID --tensor_parallel_size 2 --enforce_eager True"
chatqna-gaudi-backend-server:
llm:
image: ${REGISTRY:-opea}/llm-vllm-ray:${TAG:-latest}
container_name: llm-vllm-ray-gaudi-server
depends_on:
- vllm-ray-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
vLLM_RAY_ENDPOINT: ${vLLM_RAY_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL: ${LLM_MODEL_ID}
restart: unless-stopped
chaqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- vllm-ray-service
- llm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=chatqna-gaudi-backend-server
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
- RETRIEVER_SERVICE_HOST_IP=retriever
- RERANK_SERVER_HOST_IP=tei-reranking-service
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
- LLM_SERVER_HOST_IP=vllm-ray-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-8000}
- LOGFLAG=${LOGFLAG}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
ipc: host
restart: always
chatqna-gaudi-ui-server:
chaqna-gaudi-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
container_name: chatqna-gaudi-ui-server
depends_on:
- chatqna-gaudi-backend-server
- chaqna-gaudi-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
ipc: host
restart: always
chatqna-gaudi-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: chatqna-gaudi-nginx-server
depends_on:
- chatqna-gaudi-backend-server
- chatqna-gaudi-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=chatqna-gaudi-ui-server
- FRONTEND_SERVICE_PORT=5173
- BACKEND_SERVICE_NAME=chatqna
- BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
- BACKEND_SERVICE_PORT=8888
- DATAPREP_SERVICE_IP=dataprep-redis-service
- DATAPREP_SERVICE_PORT=6007
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always

View File

@@ -20,10 +20,9 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: http://tei-embedding-service:80
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/tei-gaudi:latest
@@ -40,12 +39,26 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
INIT_HCCL_ON_ACQUIRE: 0
ENABLE_EXPERIMENTAL_FLAGS: true
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -58,11 +71,8 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
@@ -89,61 +99,60 @@ services:
- SYS_NICE
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
chatqna-gaudi-backend-server:
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-gaudi-server
depends_on:
- tgi-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
chaqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna-without-rerank:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- retriever
- tgi-service
- llm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=chatqna-gaudi-backend-server
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
- RETRIEVER_SERVICE_HOST_IP=retriever
- LLM_SERVER_HOST_IP=tgi-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LOGFLAG=${LOGFLAG}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
ipc: host
restart: always
chatqna-gaudi-ui-server:
chaqna-gaudi-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
container_name: chatqna-gaudi-ui-server
depends_on:
- chatqna-gaudi-backend-server
- chaqna-gaudi-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
ipc: host
restart: always
chatqna-gaudi-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: chatqna-gaudi-nginx-server
depends_on:
- chatqna-gaudi-backend-server
- chatqna-gaudi-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=chatqna-gaudi-ui-server
- FRONTEND_SERVICE_PORT=5173
- BACKEND_SERVICE_NAME=chatqna
- BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
- BACKEND_SERVICE_PORT=8888
- DATAPREP_SERVICE_IP=dataprep-redis-service
- DATAPREP_SERVICE_PORT=6007
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always

View File

@@ -26,6 +26,14 @@ The warning messages point out the veriabls are **NOT** set.
```
ubuntu@gaudi-vm:~/GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi$ docker compose -f ./compose.yaml up -d
WARN[0000] The "LANGCHAIN_API_KEY" variable is not set. Defaulting to a blank string.
WARN[0000] The "LANGCHAIN_TRACING_V2" variable is not set. Defaulting to a blank string.
WARN[0000] The "LANGCHAIN_API_KEY" variable is not set. Defaulting to a blank string.
WARN[0000] The "LANGCHAIN_TRACING_V2" variable is not set. Defaulting to a blank string.
WARN[0000] The "LANGCHAIN_API_KEY" variable is not set. Defaulting to a blank string.
WARN[0000] The "LANGCHAIN_TRACING_V2" variable is not set. Defaulting to a blank string.
WARN[0000] The "LANGCHAIN_API_KEY" variable is not set. Defaulting to a blank string.
WARN[0000] The "LANGCHAIN_TRACING_V2" variable is not set. Defaulting to a blank string.
WARN[0000] /home/ubuntu/GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml: `version` is obsolete
```
@@ -164,7 +172,24 @@ This test the embedding service. It sends "What is Deep Learning?" to the embedd
**Note**: The vector dimension are decided by the embedding model and the output value is dependent on model and input data.
### 2 Retriever Microservice
### 2 Embedding Microservice
```
curl http://${host_ip}:6000/v1/embeddings\
-X POST \
-d '{"text":"What is Deep Learning?"}' \
-H 'Content-Type: application/json'
```
This test the embedding microservice. In this test, it sends out `What is Deep Learning?` to embedding.
Embedding microservice get input data, call embedding service to embedding data.
Embedding server are with NO state, but microservice keep the state. There is `id` in the output of `Embedding Microservice`.
```
{"id":"e8c85e588a235a4bc4747a23b3a71d8f","text":"What is Deep Learning?","embedding":[0.00030903306,-0.06356524,0.0025720573,-0.012404448,0.050649878, ..., 0.02776986,-0.0246678,0.03999176,0.037477136,-0.006806653,0.02261455,-0.04570737,-0.033122733,0.022785513,0.0160026,-0.021343587,-0.029969815,-0.0049176104]}
```
### 3 Retriever Microservice
To consume the retriever microservice, you need to generate a mock embedding vector by Python script.
The length of embedding vector is determined by the embedding model.
@@ -187,7 +212,7 @@ The output is retrieved text that relevant to the input data:
```
### 3 TEI Reranking Service
### 4 TEI Reranking Service
Reranking service
@@ -203,7 +228,24 @@ Output is:
It scores the input
### 4 TGI Service
### 5 Reranking Microservice
```
curl http://${host_ip}:8000/v1/reranking\
-X POST \
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-H 'Content-Type: application/json'
```
Here is the output:
```
{"id":"e1eb0e44f56059fc01aa0334b1dac313","query":"Human: Answer the question based only on the following context:\n Deep learning is...\n Question: What is Deep Learning?","max_new_tokens":1024,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}
```
You may notice reranking microservice are with state ('ID' and other meta data), while reranking service are not.
### 6 TGI Service
```
curl http://${host_ip}:8008/generate \
@@ -235,7 +277,56 @@ and the log shows model warm up, please wait for a while and try it later.
2024-06-05T05:45:27.867833811Z 2024-06-05T05:45:27.867759Z INFO text_generation_router: router/src/main.rs:221: Warming up model
```
### 5 MegaService
### 7 LLM Microservice
```
curl http://${host_ip}:9000/v1/chat/completions\
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json'
```
You will get generated text from LLM:
```
data: b'\n'
data: b'\n'
data: b'Deep'
data: b' learning'
data: b' is'
data: b' a'
data: b' subset'
data: b' of'
data: b' machine'
data: b' learning'
data: b' that'
data: b' uses'
data: b' algorithms'
data: b' to'
data: b' learn'
data: b' from'
data: b' data'
data: [DONE]
```
### 8 MegaService
```
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{

View File

@@ -7,4 +7,22 @@
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
export TGI_LLM_ENDPOINT="http://${host_ip}:8005"
export REDIS_URL="redis://${host_ip}:6379"
export INDEX_NAME="rag-redis"
export MEGA_SERVICE_HOST_IP=${host_ip}
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
export RERANK_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
export FRONTEND_SERVICE_IP=${host_ip}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_NAME=chatqna
export BACKEND_SERVICE_IP=${host_ip}
export BACKEND_SERVICE_PORT=8888

View File

@@ -77,19 +77,37 @@ git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
```
### 2. Build Retriever Image
### 2. Build Embedding Image
```bash
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
```
### 3. Build Retriever Image
```bash
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
```
### 3. Build Dataprep Image
### 4. Build Rerank Image
```bash
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
```
### 5. Build LLM Image
```bash
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
```
### 6. Build Dataprep Image
```bash
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
```
### 4. Build MegaService Docker Image
### 7. Build MegaService Docker Image
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build the MegaService Docker image using the command below:
@@ -100,7 +118,7 @@ docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_pr
cd ../../..
```
### 5. Build UI Docker Image
### 8. Build UI Docker Image
Construct the frontend Docker image using the command below:
@@ -110,7 +128,7 @@ docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https
cd ../../../..
```
### 6. Build React UI Docker Image (Optional)
### 9. Build React UI Docker Image (Optional)
Construct the frontend Docker image using the command below:
@@ -120,20 +138,23 @@ docker build --no-cache -t opea/chatqna-react-ui:latest --build-arg https_proxy=
cd ../../../..
```
### 7. Build Nginx Docker Image
### 10. Build Nginx Docker Image
```bash
cd GenAIComps
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
```
Then run the command `docker images`, you will have the following 5 Docker Images:
Then run the command `docker images`, you will have the following 8 Docker Images:
1. `opea/retriever-redis:latest`
2. `opea/dataprep-redis:latest`
3. `opea/chatqna:latest`
4. `opea/chatqna-ui:latest` or `opea/chatqna-react-ui:latest`
5. `opea/nginx:latest`
1. `opea/embedding-tei:latest`
2. `opea/retriever-redis:latest`
3. `opea/reranking-tei:latest`
4. `opea/llm-tgi:latest`
5. `opea/dataprep-redis:latest`
6. `opea/chatqna:latest`
7. `opea/chatqna-ui:latest` or `opea/chatqna-react-ui:latest`
8. `opea/nginx:latest`
## 🚀 Start MicroServices and MegaService
@@ -194,7 +215,16 @@ docker compose up -d
-H 'Content-Type: application/json'
```
2. Retriever Microservice
2. Embedding Microservice
```bash
curl http://${host_ip}:6000/v1/embeddings \
-X POST \
-d '{"text":"hello"}' \
-H 'Content-Type: application/json'
```
3. Retriever Microservice
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
is determined by the embedding model.
@@ -210,7 +240,7 @@ docker compose up -d
-H 'Content-Type: application/json'
```
3. TEI Reranking Service
4. TEI Reranking Service
```bash
curl http://${host_ip}:8808/rerank \
@@ -219,7 +249,16 @@ docker compose up -d
-H 'Content-Type: application/json'
```
4. TGI Service
5. Reranking Microservice
```bash
curl http://${host_ip}:8000/v1/reranking \
-X POST \
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-H 'Content-Type: application/json'
```
6. TGI Service
In first startup, this service will take more time to download the model files. After it's finished, the service will be ready.
@@ -244,7 +283,16 @@ docker compose up -d
-H 'Content-Type: application/json'
```
5. MegaService
7. LLM Microservice
```bash
curl http://${host_ip}:9000/v1/chat/completions \
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json'
```
8. MegaService
```bash
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
@@ -252,7 +300,7 @@ docker compose up -d
}'
```
6. Nginx Service
9. Nginx Service
```bash
curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \
@@ -260,7 +308,7 @@ docker compose up -d
-d '{"messages": "What is the revenue of Nike in 2023?"}'
```
7. Dataprep MicroserviceOptional
10. Dataprep MicroserviceOptional
If you want to update the default knowledge base, you can use the following commands:

View File

@@ -46,6 +46,20 @@ services:
- driver: nvidia
count: 1
capabilities: [gpu]
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -60,7 +74,6 @@ services:
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -85,6 +98,23 @@ services:
- driver: nvidia
count: 1
capabilities: [gpu]
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:2.2.0
container_name: tgi-server
@@ -108,15 +138,35 @@ services:
- driver: nvidia
count: 1
capabilities: [gpu]
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-server
depends_on:
- tgi-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
chaqna-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- tgi-service
- llm
ports:
- "8888:8888"
environment:

View File

@@ -8,9 +8,15 @@ export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
export REDIS_URL="redis://${host_ip}:6379"
export INDEX_NAME="rag-redis"
export MEGA_SERVICE_HOST_IP=${host_ip}
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
export RERANK_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"

View File

@@ -23,6 +23,18 @@ services:
dockerfile: ./Dockerfile.without_rerank
extends: chatqna
image: ${REGISTRY:-opea}/chatqna-without-rerank:${TAG:-latest}
chatqna-no-wrapper:
build:
context: ../
dockerfile: ./Dockerfile.no_wrapper
extends: chatqna
image: ${REGISTRY:-opea}/chatqna-no-wrapper:${TAG:-latest}
chatqna-no-wrapper-without-rerank:
build:
context: ../
dockerfile: ./Dockerfile.no_wrapper_without_rerank
extends: chatqna
image: ${REGISTRY:-opea}/chatqna-no-wrapper-without-rerank:${TAG:-latest}
chatqna-ui:
build:
context: ../ui

View File

@@ -16,9 +16,12 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
- redis-vector-db: redis/redis-stack:7.2.0-v9
- tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
- embedding: opea/embedding-tei:latest
- retriever: opea/retriever-redis:latest
- tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
- reranking: opea/reranking-tei:latest
- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
- llm: opea/llm-tgi:latest
- chaqna-xeon-backend-server: opea/chatqna:latest
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.

View File

@@ -27,6 +27,27 @@ data:
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-embedding-usvc-config
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/guardrails-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -51,6 +72,50 @@ data:
https_proxy: ""
no_proxy: ""
---
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-reranking-usvc-config
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -143,7 +208,7 @@ metadata:
app.kubernetes.io/managed-by: Helm
data:
MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
PORT: "2083"
PORT: "2080"
HF_TOKEN: "insert-your-huggingface-token-here"
http_proxy: ""
https_proxy: ""
@@ -297,6 +362,31 @@ spec:
app.kubernetes.io/name: data-prep
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/embedding-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 6000
targetPort: 6000
protocol: TCP
name: embedding-usvc
selector:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/guardrails-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -322,6 +412,31 @@ spec:
app.kubernetes.io/name: guardrails-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/redis-vector-db/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -351,6 +466,31 @@ spec:
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/reranking-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 8000
targetPort: 8000
protocol: TCP
name: reranking-usvc
selector:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/retriever-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -444,7 +584,7 @@ spec:
type: ClusterIP
ports:
- port: 80
targetPort: 2083
targetPort: 2080
protocol: TCP
name: tgi
selector:
@@ -557,7 +697,7 @@ spec:
imagePullPolicy: IfNotPresent
ports:
- name: ui
containerPort: 5173
containerPort: 80
protocol: TCP
resources:
{}
@@ -646,36 +786,39 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml
# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-redis-vector-db
name: chatqna-embedding-usvc
labels:
helm.sh/chart: redis-vector-db-1.0.0
app.kubernetes.io/name: redis-vector-db
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "7.2.0-v9"
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: redis-vector-db
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-embedding-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
@@ -686,35 +829,38 @@ spec:
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "redis/redis-stack:7.2.0-v9"
image: "opea/embedding-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: embedding-usvc
containerPort: 6000
protocol: TCP
volumeMounts:
- mountPath: /data
name: data-volume
- mountPath: /redisinsight
name: redisinsight-volume
- mountPath: /tmp
name: tmp
ports:
- name: redis-service
containerPort: 6379
protocol: TCP
- name: redis-insight
containerPort: 8001
protocol: TCP
startupProbe:
tcpSocket:
port: 6379 # Probe the Redis port
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: data-volume
emptyDir: {}
- name: redisinsight-volume
emptyDir: {}
- name: tmp
emptyDir: {}
---
@@ -796,6 +942,234 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-llm-uservice-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-tgi:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-redis-vector-db
labels:
helm.sh/chart: redis-vector-db-1.0.0
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "7.2.0-v9"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: redis-vector-db
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "redis/redis-stack:7.2.0-v9"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
name: data-volume
- mountPath: /redisinsight
name: redisinsight-volume
- mountPath: /tmp
name: tmp
ports:
- name: redis-service
containerPort: 6379
protocol: TCP
- name: redis-insight
containerPort: 8001
protocol: TCP
startupProbe:
tcpSocket:
port: 6379 # Probe the Redis port
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 120
resources:
{}
volumes:
- name: data-volume
emptyDir: {}
- name: redisinsight-volume
emptyDir: {}
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-reranking-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/reranking-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: reranking-usvc
containerPort: 8000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -1109,7 +1483,7 @@ spec:
name: tmp
ports:
- name: http
containerPort: 2083
containerPort: 2080
protocol: TCP
livenessProbe:
failureThreshold: 24
@@ -1250,24 +1624,16 @@ spec:
containers:
- name: chatqna
env:
- name: LLM_SERVER_HOST_IP
value: chatqna-tgi
- name: LLM_SERVER_PORT
value: "2080"
- name: RERANK_SERVER_HOST_IP
value: chatqna-teirerank
- name: RERANK_SERVER_PORT
value: "2082"
- name: LLM_SERVICE_HOST_IP
value: chatqna-llm-uservice
- name: RERANK_SERVICE_HOST_IP
value: chatqna-reranking-usvc
- name: RETRIEVER_SERVICE_HOST_IP
value: chatqna-retriever-usvc
- name: EMBEDDING_SERVER_HOST_IP
value: chatqna-tei
- name: EMBEDDING_SERVER_PORT
value: "2081"
- name: EMBEDDING_SERVICE_HOST_IP
value: chatqna-embedding-usvc
- name: GUARDRAIL_SERVICE_HOST_IP
value: chatqna-guardrails-usvc
- name: GUARDRAIL_SERVICE_PORT
value: "9090"
securityContext:
allowPrivilegeEscalation: false
capabilities:

View File

@@ -27,6 +27,71 @@ data:
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-embedding-usvc-config
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-reranking-usvc-config
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -247,6 +312,56 @@ spec:
app.kubernetes.io/name: data-prep
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/embedding-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 6000
targetPort: 6000
protocol: TCP
name: embedding-usvc
selector:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/redis-vector-db/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -276,6 +391,31 @@ spec:
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/reranking-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 8000
targetPort: 8000
protocol: TCP
name: reranking-usvc
selector:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/retriever-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -457,7 +597,7 @@ spec:
imagePullPolicy: IfNotPresent
ports:
- name: ui
containerPort: 5173
containerPort: 80
protocol: TCP
resources:
{}
@@ -546,6 +686,162 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-embedding-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/embedding-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: embedding-usvc
containerPort: 6000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-llm-uservice-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-tgi:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -618,6 +914,84 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-reranking-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/reranking-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: reranking-usvc
containerPort: 8000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -992,20 +1366,16 @@ spec:
containers:
- name: chatqna
env:
- name: LLM_SERVER_HOST_IP
value: chatqna-tgi
- name: LLM_SERVER_PORT
value: "2080"
- name: RERANK_SERVER_HOST_IP
value: chatqna-teirerank
- name: RERANK_SERVER_PORT
value: "2082"
- name: LLM_SERVICE_HOST_IP
value: chatqna-llm-uservice
- name: RERANK_SERVICE_HOST_IP
value: chatqna-reranking-usvc
- name: RETRIEVER_SERVICE_HOST_IP
value: chatqna-retriever-usvc
- name: EMBEDDING_SERVER_HOST_IP
value: chatqna-tei
- name: EMBEDDING_SERVER_PORT
value: "2081"
- name: EMBEDDING_SERVICE_HOST_IP
value: chatqna-embedding-usvc
- name: GUARDRAIL_SERVICE_HOST_IP
value: chatqna-guardrails-usvc
securityContext:
allowPrivilegeEscalation: false
capabilities:

View File

@@ -27,6 +27,71 @@ data:
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-embedding-usvc-config
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-reranking-usvc-config
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -248,6 +313,56 @@ spec:
app.kubernetes.io/name: data-prep
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/embedding-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 6000
targetPort: 6000
protocol: TCP
name: embedding-usvc
selector:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/redis-vector-db/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -277,6 +392,31 @@ spec:
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/reranking-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 8000
targetPort: 8000
protocol: TCP
name: reranking-usvc
selector:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/retriever-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -458,7 +598,7 @@ spec:
imagePullPolicy: IfNotPresent
ports:
- name: ui
containerPort: 5173
containerPort: 80
protocol: TCP
resources:
{}
@@ -547,6 +687,162 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-embedding-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/embedding-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: embedding-usvc
containerPort: 6000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-llm-uservice-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-tgi:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -619,6 +915,84 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-reranking-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/reranking-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: reranking-usvc
containerPort: 8000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -995,20 +1369,16 @@ spec:
containers:
- name: chatqna
env:
- name: LLM_SERVER_HOST_IP
value: chatqna-tgi
- name: LLM_SERVER_PORT
value: "2080"
- name: RERANK_SERVER_HOST_IP
value: chatqna-teirerank
- name: RERANK_SERVER_PORT
value: "2082"
- name: LLM_SERVICE_HOST_IP
value: chatqna-llm-uservice
- name: RERANK_SERVICE_HOST_IP
value: chatqna-reranking-usvc
- name: RETRIEVER_SERVICE_HOST_IP
value: chatqna-retriever-usvc
- name: EMBEDDING_SERVER_HOST_IP
value: chatqna-tei
- name: EMBEDDING_SERVER_PORT
value: "2081"
- name: EMBEDDING_SERVICE_HOST_IP
value: chatqna-embedding-usvc
- name: GUARDRAIL_SERVICE_HOST_IP
value: chatqna-guardrails-usvc
securityContext:
allowPrivilegeEscalation: false
capabilities:

View File

@@ -27,6 +27,27 @@ data:
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-embedding-usvc-config
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/guardrails-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -51,6 +72,50 @@ data:
https_proxy: ""
no_proxy: ""
---
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-reranking-usvc-config
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -143,7 +208,7 @@ metadata:
app.kubernetes.io/managed-by: Helm
data:
MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
PORT: "2083"
PORT: "2080"
HF_TOKEN: "insert-your-huggingface-token-here"
http_proxy: ""
https_proxy: ""
@@ -299,6 +364,31 @@ spec:
app.kubernetes.io/name: data-prep
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/embedding-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 6000
targetPort: 6000
protocol: TCP
name: embedding-usvc
selector:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/guardrails-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -324,6 +414,31 @@ spec:
app.kubernetes.io/name: guardrails-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/redis-vector-db/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -353,6 +468,31 @@ spec:
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/reranking-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 8000
targetPort: 8000
protocol: TCP
name: reranking-usvc
selector:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/retriever-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -446,7 +586,7 @@ spec:
type: ClusterIP
ports:
- port: 80
targetPort: 2083
targetPort: 2080
protocol: TCP
name: tgi
selector:
@@ -559,7 +699,7 @@ spec:
imagePullPolicy: IfNotPresent
ports:
- name: ui
containerPort: 5173
containerPort: 80
protocol: TCP
resources:
{}
@@ -648,6 +788,84 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-embedding-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/embedding-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: embedding-usvc
containerPort: 6000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/guardrails-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -726,6 +944,84 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-llm-uservice-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-tgi:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -798,6 +1094,84 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-reranking-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/reranking-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: reranking-usvc
containerPort: 8000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -1112,7 +1486,7 @@ spec:
name: tmp
ports:
- name: http
containerPort: 2083
containerPort: 2080
protocol: TCP
livenessProbe:
failureThreshold: 24
@@ -1255,24 +1629,16 @@ spec:
containers:
- name: chatqna
env:
- name: LLM_SERVER_HOST_IP
value: chatqna-tgi
- name: LLM_SERVER_PORT
value: "2080"
- name: RERANK_SERVER_HOST_IP
value: chatqna-teirerank
- name: RERANK_SERVER_PORT
value: "2082"
- name: LLM_SERVICE_HOST_IP
value: chatqna-llm-uservice
- name: RERANK_SERVICE_HOST_IP
value: chatqna-reranking-usvc
- name: RETRIEVER_SERVICE_HOST_IP
value: chatqna-retriever-usvc
- name: EMBEDDING_SERVER_HOST_IP
value: chatqna-tei
- name: EMBEDDING_SERVER_PORT
value: "2081"
- name: EMBEDDING_SERVICE_HOST_IP
value: chatqna-embedding-usvc
- name: GUARDRAIL_SERVICE_HOST_IP
value: chatqna-guardrails-usvc
- name: GUARDRAIL_SERVICE_PORT
value: "9090"
securityContext:
allowPrivilegeEscalation: false
capabilities:

Some files were not shown because too many files have changed in this diff Show More