Compare commits
28 Commits
refactor_b
...
fix_videoq
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
abafd5de20 | ||
|
|
970b869838 | ||
|
|
87ff149f61 | ||
|
|
c39a569ab2 | ||
|
|
81b02bb947 | ||
|
|
47069ac70c | ||
|
|
6ce7730863 | ||
|
|
ad5523bac7 | ||
|
|
88a8235f21 | ||
|
|
63ad850052 | ||
|
|
9a0c547112 | ||
|
|
26a6da4123 | ||
|
|
45d5da2ddd | ||
|
|
1b3291a1c8 | ||
|
|
7ac8cf517a | ||
|
|
44a689b0bf | ||
|
|
388d3eb5c5 | ||
|
|
ef9ad61440 | ||
|
|
4c41a5db83 | ||
|
|
9adf7a6af0 | ||
|
|
a4d028e8ea | ||
|
|
32d4f714fd | ||
|
|
fdbc27a9b5 | ||
|
|
5f4b1828a5 | ||
|
|
39abef8be8 | ||
|
|
ed163087ba | ||
|
|
259099d19f | ||
|
|
9a1118730b |
2
.github/workflows/_example-workflow.yml
vendored
2
.github/workflows/_example-workflow.yml
vendored
@@ -78,7 +78,7 @@ jobs:
|
||||
cd vllm && git rev-parse HEAD && cd ../
|
||||
fi
|
||||
if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
|
||||
git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
|
||||
git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
|
||||
fi
|
||||
git clone --depth 1 --branch ${{ inputs.opea_branch }} https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps && git rev-parse HEAD && cd ../
|
||||
|
||||
1
.github/workflows/_helm-e2e.yml
vendored
1
.github/workflows/_helm-e2e.yml
vendored
@@ -97,6 +97,7 @@ jobs:
|
||||
|
||||
helm-test:
|
||||
needs: [get-test-case]
|
||||
if: ${{ fromJSON(needs.get-test-case.outputs.value_files).length != 0 }}
|
||||
strategy:
|
||||
matrix:
|
||||
value_file: ${{ fromJSON(needs.get-test-case.outputs.value_files) }}
|
||||
|
||||
2
.github/workflows/_run-docker-compose.yml
vendored
2
.github/workflows/_run-docker-compose.yml
vendored
@@ -91,6 +91,7 @@ jobs:
|
||||
|
||||
compose-test:
|
||||
needs: [get-test-case]
|
||||
if: ${{ needs.get-test-case.outputs.test_cases != '' }}
|
||||
strategy:
|
||||
matrix:
|
||||
test_case: ${{ fromJSON(needs.get-test-case.outputs.test_cases) }}
|
||||
@@ -126,6 +127,7 @@ jobs:
|
||||
shell: bash
|
||||
env:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
HF_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
|
||||
|
||||
2
.github/workflows/manual-docker-publish.yml
vendored
2
.github/workflows/manual-docker-publish.yml
vendored
@@ -41,9 +41,11 @@ jobs:
|
||||
|
||||
publish:
|
||||
needs: [get-image-list]
|
||||
if: ${{ needs.get-image-list.outputs.matrix != '' }}
|
||||
strategy:
|
||||
matrix:
|
||||
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: "docker-build-${{ inputs.node }}"
|
||||
steps:
|
||||
- uses: docker/login-action@v3.2.0
|
||||
|
||||
1
.github/workflows/manual-docker-scan.yml
vendored
1
.github/workflows/manual-docker-scan.yml
vendored
@@ -47,6 +47,7 @@ jobs:
|
||||
scan-docker:
|
||||
needs: get-image-list
|
||||
runs-on: "docker-build-${{ inputs.node }}"
|
||||
if: ${{ fromJSON(needs.get-image-list.outputs.matrix).length != 0 }}
|
||||
strategy:
|
||||
matrix:
|
||||
image: ${{ fromJson(needs.get-image-list.outputs.matrix) }}
|
||||
|
||||
@@ -76,7 +76,7 @@ jobs:
|
||||
|
||||
build-deploy-gmc:
|
||||
needs: [get-test-matrix]
|
||||
if: ${{ fromJSON(inputs.deploy_gmc) }}
|
||||
if: ${{ fromJSON(inputs.deploy_gmc) }} && ${{ fromJSON(needs.get-test-matrix.outputs.nodes).length != 0 }}
|
||||
strategy:
|
||||
matrix:
|
||||
node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
|
||||
run-examples:
|
||||
needs: [get-test-matrix, build-deploy-gmc]
|
||||
if: always()
|
||||
if: always() && ${{ fromJSON(needs.get-test-matrix.outputs.examples).length != 0 }}
|
||||
strategy:
|
||||
matrix:
|
||||
example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }}
|
||||
|
||||
6
.github/workflows/manual-freeze-tag.yml
vendored
6
.github/workflows/manual-freeze-tag.yml
vendored
@@ -25,9 +25,9 @@ jobs:
|
||||
|
||||
- name: Set up Git
|
||||
run: |
|
||||
git config --global user.name "NeuralChatBot"
|
||||
git config --global user.email "grp_neural_chat_bot@intel.com"
|
||||
git remote set-url origin https://NeuralChatBot:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
|
||||
git config --global user.name "CICD-at-OPEA"
|
||||
git config --global user.email "CICD@opea.dev"
|
||||
git remote set-url origin https://CICD-at-OPEA:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
|
||||
|
||||
- name: Run script
|
||||
run: |
|
||||
|
||||
1
.github/workflows/manual-image-build.yml
vendored
1
.github/workflows/manual-image-build.yml
vendored
@@ -51,6 +51,7 @@ jobs:
|
||||
|
||||
image-build:
|
||||
needs: get-test-matrix
|
||||
if: ${{ needs.get-test-matrix.outputs.nodes != '' }}
|
||||
strategy:
|
||||
matrix:
|
||||
node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
|
||||
|
||||
@@ -33,6 +33,7 @@ jobs:
|
||||
|
||||
clean-up:
|
||||
needs: get-build-matrix
|
||||
if: ${{ fromJSON(needs.get-build-matrix.outputs.nodes).length != 0 }}
|
||||
strategy:
|
||||
matrix:
|
||||
node: ${{ fromJson(needs.get-build-matrix.outputs.nodes) }}
|
||||
@@ -47,6 +48,7 @@ jobs:
|
||||
|
||||
build:
|
||||
needs: [get-build-matrix, clean-up]
|
||||
if: ${{ fromJSON(needs.get-build-matrix.outputs.nodes).length != 0 }}
|
||||
strategy:
|
||||
matrix:
|
||||
example: ${{ fromJson(needs.get-build-matrix.outputs.examples) }}
|
||||
|
||||
@@ -34,6 +34,7 @@ jobs:
|
||||
|
||||
build-and-test:
|
||||
needs: get-build-matrix
|
||||
if: ${{ needs.get-build-matrix.outputs.examples_json != '' }}
|
||||
strategy:
|
||||
matrix:
|
||||
example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
|
||||
@@ -53,9 +54,11 @@ jobs:
|
||||
|
||||
publish:
|
||||
needs: [get-build-matrix, get-image-list, build-and-test]
|
||||
if: ${{ needs.get-image-list.outputs.matrix != '' }}
|
||||
strategy:
|
||||
matrix:
|
||||
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: "docker-build-gaudi"
|
||||
steps:
|
||||
- uses: docker/login-action@v3.2.0
|
||||
|
||||
2
.github/workflows/pr-chart-e2e.yml
vendored
2
.github/workflows/pr-chart-e2e.yml
vendored
@@ -65,7 +65,7 @@ jobs:
|
||||
|
||||
helm-chart-test:
|
||||
needs: [job1]
|
||||
if: always() && ${{ needs.job1.outputs.run_matrix.example.length > 0 }}
|
||||
if: always() && ${{ fromJSON(needs.job1.outputs.run_matrix).length != 0 }}
|
||||
uses: ./.github/workflows/_helm-e2e.yml
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
|
||||
|
||||
2
.github/workflows/pr-docker-compose-e2e.yml
vendored
2
.github/workflows/pr-docker-compose-e2e.yml
vendored
@@ -32,10 +32,10 @@ jobs:
|
||||
|
||||
example-test:
|
||||
needs: [get-test-matrix]
|
||||
if: ${{ needs.get-test-matrix.outputs.run_matrix != '' }}
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.get-test-matrix.outputs.run_matrix) }}
|
||||
fail-fast: false
|
||||
if: ${{ !github.event.pull_request.draft }}
|
||||
uses: ./.github/workflows/_run-docker-compose.yml
|
||||
with:
|
||||
registry: "opea"
|
||||
|
||||
1
.github/workflows/push-image-build.yml
vendored
1
.github/workflows/push-image-build.yml
vendored
@@ -24,6 +24,7 @@ jobs:
|
||||
|
||||
image-build:
|
||||
needs: job1
|
||||
if: ${{ fromJSON(needs.job1.outputs.run_matrix).length != 0 }}
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
|
||||
fail-fast: false
|
||||
|
||||
@@ -54,6 +54,6 @@ jobs:
|
||||
|
||||
${{ env.changed_files }}
|
||||
|
||||
Please verify if the helm charts and manifests need to be changed accordingly.
|
||||
Please verify if the helm charts need to be changed accordingly.
|
||||
|
||||
> This issue was created automatically by CI.
|
||||
|
||||
4
.github/workflows/weekly-update-images.yml
vendored
4
.github/workflows/weekly-update-images.yml
vendored
@@ -16,8 +16,8 @@ jobs:
|
||||
freeze-images:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
USER_NAME: "NeuralChatBot"
|
||||
USER_EMAIL: "grp_neural_chat_bot@intel.com"
|
||||
USER_NAME: "CICD-at-OPEA"
|
||||
USER_EMAIL: "CICD@opea.dev"
|
||||
BRANCH_NAME: "update_images_tag"
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
|
||||
@@ -18,7 +18,7 @@ Here're some of the project's features:
|
||||
2. cd command to the current folder.
|
||||
|
||||
```
|
||||
cd AgentQnA/ui
|
||||
cd AgentQnA/ui/svelte
|
||||
```
|
||||
|
||||
3. Modify the required .env variables.
|
||||
@@ -41,7 +41,7 @@ Here're some of the project's features:
|
||||
npm run dev
|
||||
```
|
||||
|
||||
- The application will be available at `http://localhost:3000`.
|
||||
- The application will be available at `http://localhost:5173`.
|
||||
|
||||
5. **For Docker Setup:**
|
||||
|
||||
@@ -54,7 +54,7 @@ Here're some of the project's features:
|
||||
- Run the Docker container:
|
||||
|
||||
```
|
||||
docker run -d -p 3000:3000 --name agent-ui opea:agent-ui
|
||||
docker run -d -p 5173:5173 --name agent-ui opea:agent-ui
|
||||
```
|
||||
|
||||
- The application will be available at `http://localhost:3000`.
|
||||
- The application will be available at `http://localhost:5173`.
|
||||
|
||||
209
AvatarChatbot/docker_compose/amd/gpu/rocm/README.md
Normal file
209
AvatarChatbot/docker_compose/amd/gpu/rocm/README.md
Normal file
@@ -0,0 +1,209 @@
|
||||
# Build Mega Service of AvatarChatbot on AMD GPU
|
||||
|
||||
This document outlines the deployment process for a AvatarChatbot application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server.
|
||||
|
||||
## 🚀 Build Docker images
|
||||
|
||||
### 1. Source Code install GenAIComps
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
```
|
||||
|
||||
### 2. Build ASR Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
|
||||
|
||||
|
||||
docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build LLM Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-textgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile .
|
||||
```
|
||||
|
||||
### 4. Build TTS Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile .
|
||||
|
||||
docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Build Animation Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/wav2lip:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/wav2lip/src/Dockerfile .
|
||||
|
||||
docker build -t opea/animation:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/src/Dockerfile .
|
||||
```
|
||||
|
||||
### 6. Build MegaService Docker Image
|
||||
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
cd GenAIExamples/AvatarChatbot/
|
||||
docker build --no-cache -t opea/avatarchatbot:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have following images ready:
|
||||
|
||||
1. `opea/whisper:latest`
|
||||
2. `opea/asr:latest`
|
||||
3. `opea/llm-tgi:latest`
|
||||
4. `opea/speecht5:latest`
|
||||
5. `opea/tts:latest`
|
||||
6. `opea/wav2lip:latest`
|
||||
7. `opea/animation:latest`
|
||||
8. `opea/avatarchatbot:latest`
|
||||
|
||||
## 🚀 Set the environment variables
|
||||
|
||||
Before starting the services with `docker compose`, you have to recheck the following environment variables.
|
||||
|
||||
```bash
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
export TGI_SERVICE_PORT=3006
|
||||
export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_SERVICE_PORT}
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
|
||||
export ASR_ENDPOINT=http://${host_ip}:7066
|
||||
export TTS_ENDPOINT=http://${host_ip}:7055
|
||||
export WAV2LIP_ENDPOINT=http://${host_ip}:7860
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export ANIMATION_SERVICE_HOST_IP=${host_ip}
|
||||
|
||||
export MEGA_SERVICE_PORT=8888
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export ANIMATION_SERVICE_PORT=3008
|
||||
|
||||
export DEVICE="cpu"
|
||||
export WAV2LIP_PORT=7860
|
||||
export INFERENCE_MODE='wav2lip+gfpgan'
|
||||
export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
|
||||
export FACE="assets/img/avatar5.png"
|
||||
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
|
||||
export AUDIO='None'
|
||||
export FACESIZE=96
|
||||
export OUTFILE="/outputs/result.mp4"
|
||||
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
|
||||
export UPSCALE_FACTOR=1
|
||||
export FPS=10
|
||||
```
|
||||
|
||||
Warning!!! - The Wav2lip service works in this solution using only the CPU. To use AMD GPUs and achieve operational performance, the Wav2lip image needs to be modified to adapt to AMD hardware and the ROCm framework.
|
||||
|
||||
## 🚀 Start the MegaService
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/AvatarChatbot/docker_compose/intel/cpu/xeon/
|
||||
docker compose -f compose.yaml up -d
|
||||
```
|
||||
|
||||
## 🚀 Test MicroServices
|
||||
|
||||
```bash
|
||||
# whisper service
|
||||
curl http://${host_ip}:7066/v1/asr \
|
||||
-X POST \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# asr microservice
|
||||
curl http://${host_ip}:3001/v1/audio/transcriptions \
|
||||
-X POST \
|
||||
-d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tgi service
|
||||
curl http://${host_ip}:3006/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# llm microservice
|
||||
curl http://${host_ip}:3007/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# speecht5 service
|
||||
curl http://${host_ip}:7055/v1/tts \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tts microservice
|
||||
curl http://${host_ip}:3002/v1/audio/speech \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# wav2lip service
|
||||
cd ../../../..
|
||||
curl http://${host_ip}:7860/v1/wav2lip \
|
||||
-X POST \
|
||||
-d @assets/audio/sample_minecraft.json \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# animation microservice
|
||||
curl http://${host_ip}:3008/v1/animation \
|
||||
-X POST \
|
||||
-d @assets/audio/sample_question.json \
|
||||
-H "Content-Type: application/json"
|
||||
|
||||
```
|
||||
|
||||
## 🚀 Test MegaService
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:3009/v1/avatarchatbot \
|
||||
-X POST \
|
||||
-d @assets/audio/sample_whoareyou.json \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
If the megaservice is running properly, you should see the following output:
|
||||
|
||||
```bash
|
||||
"/outputs/result.mp4"
|
||||
```
|
||||
|
||||
The output file will be saved in the current working directory, as `${PWD}` is mapped to `/outputs` inside the wav2lip-service Docker container.
|
||||
|
||||
## Gradio UI
|
||||
|
||||
```bash
|
||||
cd $WORKPATH/GenAIExamples/AvatarChatbot
|
||||
python3 ui/gradio/app_gradio_demo_avatarchatbot.py
|
||||
```
|
||||
|
||||
The UI can be viewed at http://${host_ip}:7861
|
||||
<img src="../../../../assets/img/UI.png" alt="UI Example" width="60%">
|
||||
In the current version v1.0, you need to set the avatar figure image/video and the DL model choice in the environment variables before starting AvatarChatbot backend service and running the UI. Please just customize the audio question in the UI.
|
||||
\*\* We will enable change of avatar figure between runs in v2.0
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/AvatarChatbot/tests
|
||||
export IMAGE_REPO="opea"
|
||||
export IMAGE_TAG="latest"
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
|
||||
|
||||
test_avatarchatbot_on_xeon.sh
|
||||
```
|
||||
158
AvatarChatbot/docker_compose/amd/gpu/rocm/compose.yaml
Normal file
158
AvatarChatbot/docker_compose/amd/gpu/rocm/compose.yaml
Normal file
@@ -0,0 +1,158 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
whisper-service:
|
||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||
container_name: whisper-service
|
||||
ports:
|
||||
- "7066:7066"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
asr:
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
container_name: asr-service
|
||||
ports:
|
||||
- "3001:9099"
|
||||
ipc: host
|
||||
environment:
|
||||
ASR_ENDPOINT: ${ASR_ENDPOINT}
|
||||
speecht5-service:
|
||||
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
|
||||
container_name: speecht5-service
|
||||
ports:
|
||||
- "7055:7055"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
tts:
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
container_name: tts-service
|
||||
ports:
|
||||
- "3002:9088"
|
||||
ipc: host
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "${TGI_SERVICE_PORT:-3006}:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
shm_size: 1g
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri/:/dev/dri/
|
||||
cap_add:
|
||||
- SYS_PTRACE
|
||||
group_add:
|
||||
- video
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
|
||||
container_name: llm-tgi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "3007:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
OPENAI_API_KEY: ${OPENAI_API_KEY}
|
||||
restart: unless-stopped
|
||||
wav2lip-service:
|
||||
image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
|
||||
container_name: wav2lip-service
|
||||
ports:
|
||||
- "7860:7860"
|
||||
ipc: host
|
||||
volumes:
|
||||
- ${PWD}:/outputs
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
DEVICE: ${DEVICE}
|
||||
INFERENCE_MODE: ${INFERENCE_MODE}
|
||||
CHECKPOINT_PATH: ${CHECKPOINT_PATH}
|
||||
FACE: ${FACE}
|
||||
AUDIO: ${AUDIO}
|
||||
FACESIZE: ${FACESIZE}
|
||||
OUTFILE: ${OUTFILE}
|
||||
GFPGAN_MODEL_VERSION: ${GFPGAN_MODEL_VERSION}
|
||||
UPSCALE_FACTOR: ${UPSCALE_FACTOR}
|
||||
FPS: ${FPS}
|
||||
WAV2LIP_PORT: ${WAV2LIP_PORT}
|
||||
restart: unless-stopped
|
||||
animation:
|
||||
image: ${REGISTRY:-opea}/animation:${TAG:-latest}
|
||||
container_name: animation-server
|
||||
ports:
|
||||
- "3008:9066"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
WAV2LIP_ENDPOINT: ${WAV2LIP_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
avatarchatbot-backend-server:
|
||||
image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
|
||||
container_name: avatarchatbot-backend-server
|
||||
depends_on:
|
||||
- asr
|
||||
- llm
|
||||
- tts
|
||||
- animation
|
||||
ports:
|
||||
- "3009:8888"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
|
||||
MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT}
|
||||
ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP}
|
||||
ASR_SERVICE_PORT: ${ASR_SERVICE_PORT}
|
||||
LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP}
|
||||
LLM_SERVICE_PORT: ${LLM_SERVICE_PORT}
|
||||
LLM_SERVER_HOST_IP: ${LLM_SERVICE_HOST_IP}
|
||||
LLM_SERVER_PORT: ${LLM_SERVICE_PORT}
|
||||
TTS_SERVICE_HOST_IP: ${TTS_SERVICE_HOST_IP}
|
||||
TTS_SERVICE_PORT: ${TTS_SERVICE_PORT}
|
||||
ANIMATION_SERVICE_HOST_IP: ${ANIMATION_SERVICE_HOST_IP}
|
||||
ANIMATION_SERVICE_PORT: ${ANIMATION_SERVICE_PORT}
|
||||
WHISPER_SERVER_HOST_IP: ${WHISPER_SERVER_HOST_IP}
|
||||
WHISPER_SERVER_PORT: ${WHISPER_SERVER_PORT}
|
||||
SPEECHT5_SERVER_HOST_IP: ${SPEECHT5_SERVER_HOST_IP}
|
||||
SPEECHT5_SERVER_PORT: ${SPEECHT5_SERVER_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
47
AvatarChatbot/docker_compose/amd/gpu/rocm/set_env.sh
Normal file
47
AvatarChatbot/docker_compose/amd/gpu/rocm/set_env.sh
Normal file
@@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
export TGI_SERVICE_PORT=3006
|
||||
export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_SERVICE_PORT}
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
|
||||
export ASR_ENDPOINT=http://${host_ip}:7066
|
||||
export TTS_ENDPOINT=http://${host_ip}:7055
|
||||
export WAV2LIP_ENDPOINT=http://${host_ip}:7860
|
||||
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
|
||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export ANIMATION_SERVICE_HOST_IP=${host_ip}
|
||||
|
||||
export MEGA_SERVICE_PORT=8888
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export ANIMATION_SERVICE_PORT=3008
|
||||
|
||||
export DEVICE="cpu"
|
||||
export WAV2LIP_PORT=7860
|
||||
export INFERENCE_MODE='wav2lip+gfpgan'
|
||||
export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
|
||||
export FACE="/home/user/comps/animation/src/assets/img/avatar5.png"
|
||||
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
|
||||
export AUDIO='None'
|
||||
export FACESIZE=96
|
||||
export OUTFILE="/outputs/result.mp4"
|
||||
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
|
||||
export UPSCALE_FACTOR=1
|
||||
export FPS=10
|
||||
170
AvatarChatbot/tests/test_compose_on_rocm.sh
Normal file
170
AvatarChatbot/tests/test_compose_on_rocm.sh
Normal file
@@ -0,0 +1,170 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||
export REGISTRY=${IMAGE_REPO}
|
||||
export TAG=${IMAGE_TAG}
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
if ls $LOG_PATH/*.log 1> /dev/null 2>&1; then
|
||||
rm $LOG_PATH/*.log
|
||||
echo "Log files removed."
|
||||
else
|
||||
echo "No log files to remove."
|
||||
fi
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="avatarchatbot whisper asr llm-textgen speecht5 tts wav2lip animation"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
|
||||
|
||||
docker images && sleep 3s
|
||||
}
|
||||
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/amd/gpu/rocm
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
|
||||
export OPENAI_API_KEY=$OPENAI_API_KEY
|
||||
export host_ip=${ip_address}
|
||||
|
||||
export TGI_SERVICE_PORT=3006
|
||||
export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_SERVICE_PORT}
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
|
||||
export ASR_ENDPOINT=http://${host_ip}:7066
|
||||
export TTS_ENDPOINT=http://${host_ip}:7055
|
||||
export WAV2LIP_ENDPOINT=http://${host_ip}:7860
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export ANIMATION_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
|
||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
|
||||
export MEGA_SERVICE_PORT=8888
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export ANIMATION_SERVICE_PORT=3008
|
||||
|
||||
export DEVICE="cpu"
|
||||
export WAV2LIP_PORT=7860
|
||||
export INFERENCE_MODE='wav2lip+gfpgan'
|
||||
export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
|
||||
export FACE="/home/user/comps/animation/src/assets/img/avatar5.png"
|
||||
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
|
||||
export AUDIO='None'
|
||||
export FACESIZE=96
|
||||
export OUTFILE="./outputs/result.mp4"
|
||||
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
|
||||
export UPSCALE_FACTOR=1
|
||||
export FPS=5
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose up -d --force-recreate
|
||||
|
||||
echo "Check tgi-service status"
|
||||
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs tgi-service > $LOG_PATH/tgi_service_start.log
|
||||
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
echo "tgi-service are up and running"
|
||||
sleep 5s
|
||||
|
||||
echo "Check wav2lip-service status"
|
||||
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs wav2lip-service >& $LOG_PATH/wav2lip-service_start.log
|
||||
if grep -q "Application startup complete" $LOG_PATH/wav2lip-service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
echo "wav2lip-service are up and running"
|
||||
sleep 5s
|
||||
}
|
||||
|
||||
|
||||
function validate_megaservice() {
|
||||
cd $WORKPATH
|
||||
ls
|
||||
result=$(http_proxy="" curl http://${ip_address}:3009/v1/avatarchatbot -X POST -d @assets/audio/sample_whoareyou.json -H 'Content-Type: application/json')
|
||||
echo "result is === $result"
|
||||
if [[ $result == *"mp4"* ]]; then
|
||||
echo "Result correct."
|
||||
else
|
||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||
docker logs asr-service > $LOG_PATH/asr-service.log
|
||||
docker logs speecht5-service > $LOG_PATH/speecht5-service.log
|
||||
docker logs tts-service > $LOG_PATH/tts-service.log
|
||||
docker logs tgi-service > $LOG_PATH/tgi-service.log
|
||||
docker logs llm-tgi-server > $LOG_PATH/llm-tgi-server.log
|
||||
docker logs wav2lip-service > $LOG_PATH/wav2lip-service.log
|
||||
docker logs animation-server > $LOG_PATH/animation-server.log
|
||||
|
||||
echo "Result wrong."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
|
||||
#function validate_frontend() {
|
||||
|
||||
#}
|
||||
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/amd/gpu/rocm
|
||||
docker compose down && docker compose rm -f
|
||||
}
|
||||
|
||||
|
||||
function main() {
|
||||
|
||||
echo $OPENAI_API_KEY
|
||||
echo $OPENAI_KEY
|
||||
|
||||
stop_docker
|
||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||
start_services
|
||||
# validate_microservices
|
||||
sleep 30
|
||||
validate_megaservice
|
||||
# validate_frontend
|
||||
stop_docker
|
||||
|
||||
echo y | docker system prune
|
||||
|
||||
}
|
||||
|
||||
|
||||
main
|
||||
@@ -91,6 +91,14 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
|
||||
CPU example with Open Telemetry feature:
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
|
||||
docker compose -f compose.yaml -f compose.telemetry.yaml up -d
|
||||
```
|
||||
|
||||
It will automatically download the docker image on `docker hub`:
|
||||
|
||||
```bash
|
||||
@@ -232,6 +240,13 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/
|
||||
docker compose -f compose.yaml -f compose.telemetry.yaml up -d
|
||||
```
|
||||
|
||||
Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) to build docker images from source.
|
||||
|
||||
### Deploy ChatQnA on Xeon
|
||||
@@ -243,6 +258,13 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
|
||||
docker compose -f compose.yaml -f compose.telemetry.yaml up -d
|
||||
```
|
||||
|
||||
Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for more instructions on building docker images from source.
|
||||
|
||||
### Deploy ChatQnA on NVIDIA GPU
|
||||
@@ -346,7 +368,7 @@ OPEA microservice deployment can easily be monitored through Grafana dashboards
|
||||
|
||||
## Tracing Services with OpenTelemetry Tracing and Jaeger
|
||||
|
||||
> NOTE: limited support. Only LLM inference serving with TGI on Gaudi is enabled for this feature.
|
||||
> NOTE: This feature is disabled by default. Please check the Deploy ChatQnA sessions for how to enable this feature with compose.telemetry.yaml file.
|
||||
|
||||
OPEA microservice and TGI/TEI serving can easily be traced through Jaeger dashboards in conjunction with OpenTelemetry Tracing feature. Follow the [README](https://github.com/opea-project/GenAIComps/tree/main/comps/cores/telemetry#tracing) to trace additional functions if needed.
|
||||
|
||||
@@ -357,8 +379,17 @@ Users could also get the external IP via below command.
|
||||
ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+'
|
||||
```
|
||||
|
||||
Access the Jaeger dashboard UI at http://{EXTERNAL_IP}:16686
|
||||
|
||||
For TGI serving on Gaudi, users could see different services like opea, TEI and TGI.
|
||||

|
||||
|
||||
Here is a screenshot for one tracing of TGI serving request.
|
||||

|
||||
|
||||
There are also OPEA related tracings. Users could understand the time breakdown of each service request by looking into each opea:schedule operation.
|
||||

|
||||
|
||||
There could be async function such as `llm/MicroService_asyn_generate` and user needs to check the trace of the async function in another operation like
|
||||
opea:llm_generate_stream.
|
||||

|
||||
|
||||
@@ -6,32 +6,31 @@ deploy:
|
||||
version: 1.1.0
|
||||
modelUseHostPath: /mnt/models
|
||||
HUGGINGFACEHUB_API_TOKEN: ""
|
||||
node: [1, 2, 4]
|
||||
namespace: "default"
|
||||
cards_per_node: 8
|
||||
node: [1, 2, 4, 8]
|
||||
namespace: ""
|
||||
|
||||
services:
|
||||
backend:
|
||||
instance_num: [2, 2, 4]
|
||||
instance_num: [2, 2, 4, 8]
|
||||
cores_per_instance: ""
|
||||
memory_capacity: ""
|
||||
|
||||
teirerank:
|
||||
enabled: True
|
||||
model_id: ""
|
||||
instance_num: [1, 1, 1]
|
||||
replicaCount: [1, 1, 1, 1]
|
||||
cards_per_instance: 1
|
||||
|
||||
tei:
|
||||
model_id: ""
|
||||
instance_num: [1, 2, 4]
|
||||
replicaCount: [1, 2, 4, 8]
|
||||
cores_per_instance: ""
|
||||
memory_capacity: ""
|
||||
|
||||
llm:
|
||||
engine: tgi
|
||||
model_id: ""
|
||||
instance_num: [7, 15, 31]
|
||||
replicaCount: [7, 15, 31, 63]
|
||||
max_batch_size: [1, 2, 4, 8]
|
||||
max_input_length: ""
|
||||
max_total_tokens: ""
|
||||
@@ -40,25 +39,25 @@ deploy:
|
||||
cards_per_instance: 1
|
||||
|
||||
data-prep:
|
||||
instance_num: [1, 1, 1]
|
||||
replicaCount: [1, 1, 1, 1]
|
||||
cores_per_instance: ""
|
||||
memory_capacity: ""
|
||||
|
||||
retriever-usvc:
|
||||
instance_num: [2, 2, 4]
|
||||
replicaCount: [2, 2, 4, 8]
|
||||
cores_per_instance: ""
|
||||
memory_capacity: ""
|
||||
|
||||
redis-vector-db:
|
||||
instance_num: [1, 1, 1]
|
||||
replicaCount: [1, 1, 1, 1]
|
||||
cores_per_instance: ""
|
||||
memory_capacity: ""
|
||||
|
||||
chatqna-ui:
|
||||
instance_num: [1, 1, 1]
|
||||
replicaCount: [1, 1, 1, 1]
|
||||
|
||||
nginx:
|
||||
instance_num: [1, 1, 1]
|
||||
replicaCount: [1, 1, 1, 1]
|
||||
|
||||
benchmark:
|
||||
# http request behavior related fields
|
||||
@@ -71,20 +70,14 @@ benchmark:
|
||||
warmup_iterations: 10
|
||||
seed: 1024
|
||||
|
||||
# dataset relted fields
|
||||
dataset: pub_med10 # [dummy_english, dummy_chinese, pub_med100] predefined keywords for supported dataset
|
||||
# workload, all of the test cases will run for benchmark
|
||||
test_cases:
|
||||
- chatqnafixed
|
||||
- chatqna_qlist_pubmed:
|
||||
dataset: pub_med10 # pub_med10, pub_med100, pub_med1000
|
||||
user_queries: [1, 2, 4]
|
||||
query_token_size: 128 # if specified, means fixed query token size will be sent out
|
||||
|
||||
# advance settings in each component which will impact perf.
|
||||
dataprep: # not target this time
|
||||
chunk_size: [1024]
|
||||
chunk_overlap: [1000]
|
||||
retriever: # not target this time
|
||||
algo: IVF
|
||||
fetch_k: 2
|
||||
k: 1
|
||||
rerank:
|
||||
top_n: 2
|
||||
llm:
|
||||
max_token_size: 128 # specify the output token size
|
||||
# specify the llm output token size
|
||||
max_token_size: [128, 256]
|
||||
@@ -34,16 +34,36 @@ To set up environment variables for deploying ChatQnA services, follow these ste
|
||||
```
|
||||
|
||||
3. Set up other environment variables:
|
||||
|
||||
```bash
|
||||
source ./set_env.sh
|
||||
```
|
||||
|
||||
4. Change Model for LLM serving
|
||||
|
||||
By default, Meta-Llama-3-8B-Instruct is used for LLM serving, the default model can be changed to other validated LLM models.
|
||||
Please pick a [validated llm models](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/src/text-generation#validated-llm-models) from the table.
|
||||
To change the default model defined in set_env.sh, overwrite it by exporting LLM_MODEL_ID to the new model or by modifying set_env.sh, and then repeat step 3.
|
||||
For example, change to Llama-2-7b-chat-hf using the following command.
|
||||
|
||||
```bash
|
||||
export LLM_MODEL_ID="meta-llama/Llama-2-7b-chat-hf"
|
||||
```
|
||||
|
||||
## Quick Start: 2.Run Docker Compose
|
||||
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
|
||||
CPU example with Open Telemetry feature:
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
|
||||
docker compose -f compose.yaml -f compose.telemetry.yaml up -d
|
||||
```
|
||||
|
||||
It will automatically download the docker image on `docker hub`:
|
||||
|
||||
```bash
|
||||
@@ -263,12 +283,16 @@ If use vLLM as the LLM serving backend.
|
||||
docker compose -f compose.yaml up -d
|
||||
# Start ChatQnA without Rerank Pipeline
|
||||
docker compose -f compose_without_rerank.yaml up -d
|
||||
# Start ChatQnA with Rerank Pipeline and Open Telemetry Tracing
|
||||
docker compose -f compose.yaml -f compose.telemetry.yaml up -d
|
||||
```
|
||||
|
||||
If use TGI as the LLM serving backend.
|
||||
|
||||
```bash
|
||||
docker compose -f compose_tgi.yaml up -d
|
||||
# Start ChatQnA with Open Telemetry Tracing
|
||||
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d
|
||||
```
|
||||
|
||||
### Validate Microservices
|
||||
|
||||
27
ChatQnA/docker_compose/intel/cpu/xeon/compose.telemetry.yaml
Normal file
27
ChatQnA/docker_compose/intel/cpu/xeon/compose.telemetry.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
tei-embedding-service:
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
|
||||
tei-reranking-service:
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
|
||||
jaeger:
|
||||
image: jaegertracing/all-in-one:latest
|
||||
container_name: jaeger
|
||||
ports:
|
||||
- "16686:16686"
|
||||
- "4317:4317"
|
||||
- "4318:4318"
|
||||
- "9411:9411"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
COLLECTOR_ZIPKIN_HOST_PORT: 9411
|
||||
restart: unless-stopped
|
||||
chatqna-xeon-backend-server:
|
||||
environment:
|
||||
- ENABLE_OPEA_TELEMETRY=true
|
||||
- TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
|
||||
@@ -0,0 +1,29 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
tei-embedding-service:
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
|
||||
tei-reranking-service:
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
|
||||
tgi-service:
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
|
||||
jaeger:
|
||||
image: jaegertracing/all-in-one:latest
|
||||
container_name: jaeger
|
||||
ports:
|
||||
- "16686:16686"
|
||||
- "4317:4317"
|
||||
- "4318:4318"
|
||||
- "9411:9411"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
COLLECTOR_ZIPKIN_HOST_PORT: 9411
|
||||
restart: unless-stopped
|
||||
chatqna-xeon-backend-server:
|
||||
environment:
|
||||
- ENABLE_OPEA_TELEMETRY=true
|
||||
- TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
|
||||
@@ -14,3 +14,7 @@ export INDEX_NAME="rag-redis"
|
||||
# Set it as a non-null string, such as true, if you want to enable logging facility,
|
||||
# otherwise, keep it as "" to disable it.
|
||||
export LOGFLAG=""
|
||||
# Set OpenTelemetry Tracing Endpoint
|
||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
||||
|
||||
@@ -10,7 +10,7 @@ Quick Start:
|
||||
2. Run Docker Compose.
|
||||
3. Consume the ChatQnA Service.
|
||||
|
||||
Note: The default LLM is `meta-llama/Meta-Llama-3-8B-Instruct`. Before deploying the application, please make sure either you've requested and been granted the access to it on [Huggingface](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) or you've downloaded the model locally from [ModelScope](https://www.modelscope.cn/models).
|
||||
Note: The default LLM is `meta-llama/Meta-Llama-3-8B-Instruct`. Before deploying the application, please make sure either you've requested and been granted the access to it on [Huggingface](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) or you've downloaded the model locally from [ModelScope](https://www.modelscope.cn/models). We now support running the latest DeepSeek models, including [deepseek-ai/DeepSeek-R1-Distill-Llama-70B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B) and [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) on Gaudi accelerators. To run `deepseek-ai/DeepSeek-R1-Distill-Llama-70B`, update the `LLM_MODEL_ID` and configure `NUM_CARDS` to 8 in the [set_env.sh](./set_env.sh) script. To run `deepseek-ai/DeepSeek-R1-Distill-Qwen-32B`, update the `LLM_MODEL_ID` and configure `NUM_CARDS` to 4 in the [set_env.sh](./set_env.sh) script.
|
||||
|
||||
## Quick Start: 1.Setup Environment Variable
|
||||
|
||||
@@ -39,12 +39,37 @@ To set up environment variables for deploying ChatQnA services, follow these ste
|
||||
source ./set_env.sh
|
||||
```
|
||||
|
||||
4. Change Model for LLM serving
|
||||
|
||||
By default, Meta-Llama-3-8B-Instruct is used for LLM serving, the default model can be changed to other validated LLM models.
|
||||
Please pick a [validated llm models](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/src/text-generation#validated-llm-models) from the table.
|
||||
To change the default model defined in set_env.sh, overwrite it by exporting LLM_MODEL_ID to the new model or by modifying set_env.sh, and then repeat step 3.
|
||||
For example, change to DeepSeek-R1-Distill-Qwen-32B using the following command.
|
||||
|
||||
```bash
|
||||
export LLM_MODEL_ID="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
|
||||
```
|
||||
|
||||
Please also check [required gaudi cards for different models](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/src/text-generation#system-requirements-for-llm-models) for new models.
|
||||
It might be necessary to increase the number of Gaudi cards for the model by exporting NUM_CARDS to the new model or by modifying set_env.sh, and then repeating step 3. For example, increase the number of Gaudi cards for DeepSeek-R1-
|
||||
Distill-Qwen-32B using the following command:
|
||||
|
||||
```bash
|
||||
export NUM_CARDS=4
|
||||
```
|
||||
|
||||
## Quick Start: 2.Run Docker Compose
|
||||
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
|
||||
|
||||
```bash
|
||||
docker compose -f compose.yaml -f compose.telemetry.yaml up -d
|
||||
```
|
||||
|
||||
It will automatically download the docker image on `docker hub`:
|
||||
|
||||
```bash
|
||||
@@ -259,12 +284,16 @@ If use vLLM as the LLM serving backend.
|
||||
docker compose -f compose.yaml up -d
|
||||
# Start ChatQnA without Rerank Pipeline
|
||||
docker compose -f compose_without_rerank.yaml up -d
|
||||
# Start ChatQnA with Rerank Pipeline and Open Telemetry Tracing
|
||||
docker compose -f compose.yaml -f compose.telemetry.yaml up -d
|
||||
```
|
||||
|
||||
If use TGI as the LLM serving backend.
|
||||
|
||||
```bash
|
||||
docker compose -f compose_tgi.yaml up -d
|
||||
# Start ChatQnA with Open Telemetry Tracing
|
||||
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d
|
||||
```
|
||||
|
||||
If you want to enable guardrails microservice in the pipeline, please follow the below command instead:
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
tei-embedding-service:
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
|
||||
tei-reranking-service:
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
|
||||
jaeger:
|
||||
image: jaegertracing/all-in-one:latest
|
||||
container_name: jaeger
|
||||
ports:
|
||||
- "16686:16686"
|
||||
- "4317:4317"
|
||||
- "4318:4318"
|
||||
- "9411:9411"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
COLLECTOR_ZIPKIN_HOST_PORT: 9411
|
||||
restart: unless-stopped
|
||||
chatqna-gaudi-backend-server:
|
||||
environment:
|
||||
- ENABLE_OPEA_TELEMETRY=true
|
||||
- TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
|
||||
@@ -92,6 +92,7 @@ services:
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
NUM_CARDS: ${NUM_CARDS}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:8007/health || exit 1"]
|
||||
@@ -102,7 +103,7 @@ services:
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
|
||||
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
|
||||
chatqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-backend-server
|
||||
|
||||
@@ -133,12 +133,13 @@ services:
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
NUM_CARDS: ${NUM_CARDS}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
|
||||
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
|
||||
chatqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-guardrails:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-guardrails-server
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
tei-embedding-service:
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
|
||||
tei-reranking-service:
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
|
||||
tgi-service:
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
|
||||
jaeger:
|
||||
image: jaegertracing/all-in-one:latest
|
||||
container_name: jaeger
|
||||
ports:
|
||||
- "16686:16686"
|
||||
- "4317:4317"
|
||||
- "4318:4318"
|
||||
- "9411:9411"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
COLLECTOR_ZIPKIN_HOST_PORT: 9411
|
||||
restart: unless-stopped
|
||||
chatqna-gaudi-backend-server:
|
||||
environment:
|
||||
- ENABLE_OPEA_TELEMETRY=true
|
||||
- TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
|
||||
@@ -25,7 +25,6 @@ services:
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
TELEMETRY_ENDPOINT: ${TELEMETRY_ENDPOINT}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-embedding-gaudi-server
|
||||
@@ -38,7 +37,7 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
@@ -56,7 +55,6 @@ services:
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
TELEMETRY_ENDPOINT: ${TELEMETRY_ENDPOINT}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
|
||||
restart: unless-stopped
|
||||
@@ -80,7 +78,7 @@ services:
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
@@ -101,26 +99,12 @@ services:
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
NUM_CARDS: ${NUM_CARDS}
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
|
||||
jaeger:
|
||||
image: jaegertracing/all-in-one:latest
|
||||
container_name: jaeger
|
||||
ports:
|
||||
- "16686:16686"
|
||||
- "4317:4317"
|
||||
- "4318:4318"
|
||||
- "9411:9411"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
COLLECTOR_ZIPKIN_HOST_PORT: 9411
|
||||
restart: unless-stopped
|
||||
command: --model-id ${LLM_MODEL_ID} --num-shard ${NUM_CARDS} --max-input-length 2048 --max-total-tokens 4096
|
||||
chatqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-backend-server
|
||||
@@ -146,7 +130,6 @@ services:
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LLM_MODEL=${LLM_MODEL_ID}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
- TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-gaudi-ui-server:
|
||||
|
||||
@@ -73,12 +73,13 @@ services:
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
NUM_CARDS: ${NUM_CARDS}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
|
||||
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
|
||||
chatqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-without-rerank:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-backend-server
|
||||
|
||||
@@ -11,6 +11,7 @@ export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export NUM_CARDS=1
|
||||
# Set it as a non-null string, such as true, if you want to enable logging facility,
|
||||
# otherwise, keep it as "" to disable it.
|
||||
export LOGFLAG=""
|
||||
|
||||
@@ -1,64 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
USER_ID=$(whoami)
|
||||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
||||
IMAGE_REPO=${IMAGE_REPO:-opea}
|
||||
IMAGE_TAG=${IMAGE_TAG:-latest}
|
||||
|
||||
ROLLOUT_TIMEOUT_SECONDS="1800s"
|
||||
KUBECTL_TIMEOUT_SECONDS="60s"
|
||||
|
||||
function init_chatqna() {
|
||||
# replace the mount dir "path: /mnt/opea-models" with "path: $CHART_MOUNT"
|
||||
find ../../kubernetes/intel/*/*/manifest -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
|
||||
# replace microservice image tag
|
||||
find ../../kubernetes/intel/*/*/manifest -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \;
|
||||
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/"
|
||||
find ../../kubernetes/intel/*/*/manifest -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}/#g" {} \;
|
||||
# set huggingface token
|
||||
find ../../kubernetes/intel/*/*/manifest -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
|
||||
}
|
||||
|
||||
function get_end_point() {
|
||||
# $1 is service name, $2 is namespace
|
||||
ip_address=$(kubectl get svc $1 -n $2 -o jsonpath='{.spec.clusterIP}')
|
||||
port=$(kubectl get svc $1 -n $2 -o jsonpath='{.spec.ports[0].port}')
|
||||
echo "$ip_address:$port"
|
||||
}
|
||||
|
||||
function _cleanup_ns() {
|
||||
local ns=$1
|
||||
if kubectl get ns $ns; then
|
||||
if ! kubectl delete ns $ns --timeout=$KUBECTL_TIMEOUT_SECONDS; then
|
||||
kubectl delete pods --namespace $ns --force --grace-period=0 --all
|
||||
kubectl delete ns $ns --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
echo "Usage: $0 <function_name>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
case "$1" in
|
||||
init_ChatQnA)
|
||||
init_chatqna
|
||||
;;
|
||||
get_end_point)
|
||||
service=$2
|
||||
NAMESPACE=$3
|
||||
get_end_point $service $NAMESPACE
|
||||
;;
|
||||
_cleanup_ns)
|
||||
NAMESPACE=$2
|
||||
_cleanup_ns $NAMESPACE
|
||||
;;
|
||||
*)
|
||||
echo "Unknown function: $1"
|
||||
;;
|
||||
esac
|
||||
@@ -46,6 +46,7 @@ function start_services() {
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export NUM_CARDS=1
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export GURADRAILS_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
|
||||
|
||||
@@ -44,12 +44,16 @@ function start_services() {
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export NUM_CARDS=1
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export host_ip=${ip_address}
|
||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
docker compose -f compose.yaml -f compose.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
n=0
|
||||
until [[ "$n" -ge 160 ]]; do
|
||||
echo "n=$n"
|
||||
@@ -170,7 +174,7 @@ function validate_frontend() {
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
docker compose -f compose.yaml down
|
||||
docker compose -f compose.yaml -f compose.telemetry.yaml down
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
@@ -49,9 +49,12 @@ function start_services() {
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export host_ip=${ip_address}
|
||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
docker compose -f compose.yaml -f compose.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1
|
||||
@@ -172,7 +175,7 @@ function validate_frontend() {
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||
docker compose -f compose.yaml down
|
||||
docker compose -f compose.yaml -f compose.telemetry.yaml down
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
@@ -46,6 +46,7 @@ function start_services() {
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export NUM_CARDS=1
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
||||
@@ -53,7 +54,7 @@ function start_services() {
|
||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
n=0
|
||||
until [[ "$n" -ge 500 ]]; do
|
||||
@@ -217,7 +218,7 @@ function validate_frontend() {
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
docker compose -f compose_tgi.yaml down
|
||||
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml down
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
@@ -48,9 +48,12 @@ function start_services() {
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
@@ -216,7 +219,7 @@ function validate_frontend() {
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||
docker compose -f compose_tgi.yaml down
|
||||
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml down
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
@@ -44,6 +44,7 @@ function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export NUM_CARDS=1
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
|
||||
126
DBQnA/docker_compose/amd/gpu/rocm/README.md
Normal file
126
DBQnA/docker_compose/amd/gpu/rocm/README.md
Normal file
@@ -0,0 +1,126 @@
|
||||
# Deploy on AMD GPU
|
||||
|
||||
This document outlines the deployment process for DBQnA application which helps generating a SQL query and its output given a NLP question, utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on an AMD GPU. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices. We will publish the Docker images to Docker Hub soon, which will simplify the deployment process for this service.
|
||||
|
||||
## 🚀 Build Docker Images
|
||||
|
||||
First of all, you need to build Docker Images locally. This step can be ignored once the Docker images are published to Docker hub.
|
||||
|
||||
### 1.1 Build Text to SQL service Image
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
docker build --no-cache -t opea/texttosql:latest -f comps/text2sql/src/Dockerfile .
|
||||
```
|
||||
|
||||
### 1.2 Build react UI Docker Image
|
||||
|
||||
Build the frontend Docker image based on react framework via below command:
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/DBQnA/ui
|
||||
docker build --no-cache -t opea/dbqna-react-ui:latest --build-arg texttosql_url=$textToSql_host:$textToSql_port/v1 -f docker/Dockerfile.react .
|
||||
```
|
||||
|
||||
Attention! Replace $textToSql_host and $textToSql_port with your own value.
|
||||
|
||||
Then run the command `docker images`, you will have the following Docker Images:
|
||||
|
||||
1. `opea/texttosql:latest`
|
||||
2. `opea/dbqna-react-ui:latest`
|
||||
|
||||
## 🚀 Start Microservices
|
||||
|
||||
### Required Models
|
||||
|
||||
We set default model as "mistralai/Mistral-7B-Instruct-v0.3", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models.
|
||||
|
||||
If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.
|
||||
|
||||
### 2.1 Setup Environment Variables
|
||||
|
||||
Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
|
||||
|
||||
```bash
|
||||
export host_ip="host_ip_address_or_dns_name"
|
||||
export DBQNA_HUGGINGFACEHUB_API_TOKEN=""
|
||||
export DBQNA_TGI_SERVICE_PORT=8008
|
||||
export DBQNA_TGI_LLM_ENDPOINT="http://${host_ip}:${DBQNA_TGI_SERVICE_PORT}"
|
||||
export DBQNA_LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
|
||||
export MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
|
||||
export POSTGRES_USER="postgres"
|
||||
export POSTGRES_PASSWORD="testpwd"
|
||||
export POSTGRES_DB="chinook"
|
||||
export DBQNA_TEXT_TO_SQL_PORT=18142
|
||||
export DBQNA_UI_PORT=18143
|
||||
```
|
||||
|
||||
Note: Please replace with `host_ip_address_or_dns_name` with your external IP address or DNS name, do not use localhost.
|
||||
|
||||
### 2.2 Start Microservice Docker Containers
|
||||
|
||||
There are 2 options to start the microservice
|
||||
|
||||
#### 2.2.1 Start the microservice using docker compose
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/DBQnA/docker_compose/amd/gpu/rocm
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
## 🚀 Validate Microservices
|
||||
|
||||
### 3.1 TGI Service
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:$DBQNA_TGI_SERVICE_PORT/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
### 3.2 Postgres Microservice
|
||||
|
||||
Once Text-to-SQL microservice is started, user can use below command
|
||||
|
||||
#### 3.2.1 Test the Database connection
|
||||
|
||||
```bash
|
||||
curl --location http://${host_ip}:${DBQNA_TEXT_TO_SQL_PORT}/v1/postgres/health \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${host_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}'
|
||||
```
|
||||
|
||||
#### 3.2.2 Invoke the microservice.
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:${DBQNA_TEXT_TO_SQL_PORT}/v1/texttosql \
|
||||
-X POST \
|
||||
-d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${host_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
### 3.3 Frontend validation
|
||||
|
||||
We test the API in frontend validation to check if API returns HTTP_STATUS: 200 and validates if API response returns SQL query and output
|
||||
|
||||
The test is present in App.test.tsx under react root folder ui/react/
|
||||
|
||||
Command to run the test
|
||||
|
||||
```bash
|
||||
npm run test
|
||||
```
|
||||
|
||||
## 🚀 Launch the React UI
|
||||
|
||||
Open this URL `http://${host_ip}:${DBQNA_UI_PORT}` in your browser to access the frontend.
|
||||
|
||||

|
||||
|
||||
Test DB Connection
|
||||

|
||||
|
||||
Create SQL query and output for given NLP question
|
||||

|
||||
23856
DBQnA/docker_compose/amd/gpu/rocm/chinook.sql
Normal file
23856
DBQnA/docker_compose/amd/gpu/rocm/chinook.sql
Normal file
File diff suppressed because it is too large
Load Diff
75
DBQnA/docker_compose/amd/gpu/rocm/compose.yaml
Normal file
75
DBQnA/docker_compose/amd/gpu/rocm/compose.yaml
Normal file
@@ -0,0 +1,75 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
dbqna-tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
|
||||
container_name: dbqna-tgi-service
|
||||
ports:
|
||||
- "${DBQNA_TGI_SERVICE_PORT:-8008}:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_SERVICE_PORT: ${DBQNA_TGI_SERVICE_PORT}
|
||||
MODEL_ID: ${DBQNA_LLM_MODEL_ID}
|
||||
HUGGING_FACE_HUB_TOKEN: ${DBQNA_HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${DBQNA_HUGGINGFACEHUB_API_TOKEN}
|
||||
shm_size: 1g
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri/:/dev/dri/
|
||||
cap_add:
|
||||
- SYS_PTRACE
|
||||
group_add:
|
||||
- video
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
ipc: host
|
||||
command: --model-id ${MODEL_ID} --max-input-length 2048 --max-total-tokens 4096
|
||||
|
||||
postgres:
|
||||
image: postgres:latest
|
||||
container_name: postgres-container
|
||||
restart: always
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
POSTGRES_DB: ${POSTGRES_DB}
|
||||
ports:
|
||||
- '5442:5432'
|
||||
volumes:
|
||||
- ./chinook.sql:/docker-entrypoint-initdb.d/chinook.sql
|
||||
|
||||
text2sql:
|
||||
image: opea/text2sql:latest
|
||||
container_name: text2sql
|
||||
ports:
|
||||
- "${DBQNA_TEXT_TO_SQL_PORT:-9090}:8080"
|
||||
environment:
|
||||
TGI_LLM_ENDPOINT: ${DBQNA_TGI_LLM_ENDPOINT}
|
||||
|
||||
text2sql-react-ui:
|
||||
image: opea/text2sql-react-ui:latest
|
||||
container_name: text2sql-react-ui
|
||||
depends_on:
|
||||
- text2sql
|
||||
ports:
|
||||
- "${DBQNA_UI_PORT:-5174}:80"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
texttosql_port: ${texttosql_port}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
16
DBQnA/docker_compose/amd/gpu/rocm/set_env.sh
Normal file
16
DBQnA/docker_compose/amd/gpu/rocm/set_env.sh
Normal file
@@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
export host_ip=""
|
||||
export DBQNA_HUGGINGFACEHUB_API_TOKEN=""
|
||||
export DBQNA_TGI_SERVICE_PORT=8008
|
||||
export DBQNA_TGI_LLM_ENDPOINT="http://${host_ip}:${DBQNA_TGI_SERVICE_PORT}"
|
||||
export DBQNA_LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
|
||||
export MODEL_ID=${DBQNA_LLM_MODEL_ID}
|
||||
export POSTGRES_USER="postgres"
|
||||
export POSTGRES_PASSWORD="testpwd"
|
||||
export POSTGRES_DB="chinook"
|
||||
export DBQNA_TEXT_TO_SQL_PORT=9090
|
||||
export DBQNA_UI_PORT=5174
|
||||
@@ -19,4 +19,5 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
no_proxy: ${no_proxy}
|
||||
texttosql_url: ${build_texttosql_url}
|
||||
image: ${REGISTRY:-opea}/text2sql-react-ui:${TAG:-latest}
|
||||
|
||||
120
DBQnA/tests/test_compose_on_rocm.sh
Normal file
120
DBQnA/tests/test_compose_on_rocm.sh
Normal file
@@ -0,0 +1,120 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
tgi_port=8008
|
||||
tgi_volume=$WORKPATH/data
|
||||
|
||||
export host_ip=${ip_address}
|
||||
export DBQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export DBQNA_TGI_SERVICE_PORT=8008
|
||||
export DBQNA_TGI_LLM_ENDPOINT="http://${host_ip}:${DBQNA_TGI_SERVICE_PORT}"
|
||||
export DBQNA_LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
|
||||
export MODEL_ID=${DBQNA_LLM_MODEL_ID}
|
||||
export POSTGRES_USER="postgres"
|
||||
export POSTGRES_PASSWORD="testpwd"
|
||||
export POSTGRES_DB="chinook"
|
||||
export DBQNA_TEXT_TO_SQL_PORT=9090
|
||||
export DBQNA_UI_PORT=5174
|
||||
export build_texttosql_url="${ip_address}:${DBQNA_TEXT_TO_SQL_PORT}/v1"
|
||||
|
||||
function build_docker_images() {
|
||||
cd "$WORKPATH"/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="text2sql text2sql-react-ui"
|
||||
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > "${LOG_PATH}"/docker_image_build.log
|
||||
docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
function start_service() {
|
||||
cd "$WORKPATH"/docker_compose/amd/gpu/rocm
|
||||
# Start Docker Containers
|
||||
docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs dbqna-tgi-service > "${LOG_PATH}"/tgi_service_start.log
|
||||
if grep -q Connected "${LOG_PATH}"/tgi_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
}
|
||||
|
||||
function validate_microservice() {
|
||||
result=$(http_proxy="" curl --connect-timeout 5 --max-time 120000 http://${ip_address}:${DBQNA_TEXT_TO_SQL_PORT}/v1/text2sql \
|
||||
-X POST \
|
||||
-d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${ip_address}'", "port": "5442", "database": "'${POSTGRES_DB}'" }}' \
|
||||
-H 'Content-Type: application/json')
|
||||
|
||||
if [[ $result == *"output"* ]]; then
|
||||
echo $result
|
||||
echo "Result correct."
|
||||
else
|
||||
echo "Result wrong. Received was $result"
|
||||
docker logs text2sql > ${LOG_PATH}/text2sql.log
|
||||
docker logs dbqna-tgi-service > ${LOG_PATH}/tgi.log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
function validate_frontend() {
|
||||
echo "[ TEST INFO ]: --------- frontend test started ---------"
|
||||
cd $WORKPATH/ui/react
|
||||
local conda_env_name="OPEA_e2e"
|
||||
export PATH=${HOME}/miniconda3/bin/:$PATH
|
||||
if conda info --envs | grep -q "$conda_env_name"; then
|
||||
echo "$conda_env_name exist!"
|
||||
else
|
||||
conda create -n ${conda_env_name} python=3.12 -y
|
||||
fi
|
||||
|
||||
source activate ${conda_env_name}
|
||||
echo "[ TEST INFO ]: --------- conda env activated ---------"
|
||||
|
||||
conda install -c conda-forge nodejs=22.6.0 -y
|
||||
npm install && npm ci
|
||||
node -v && npm -v && pip list
|
||||
|
||||
exit_status=0
|
||||
npm run test || exit_status=$?
|
||||
|
||||
if [ $exit_status -ne 0 ]; then
|
||||
echo "[TEST INFO]: ---------frontend test failed---------"
|
||||
exit $exit_status
|
||||
else
|
||||
echo "[TEST INFO]: ---------frontend test passed---------"
|
||||
fi
|
||||
}
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/amd/gpu/rocm/
|
||||
docker compose stop && docker compose rm -f
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
stop_docker
|
||||
|
||||
build_docker_images
|
||||
start_service
|
||||
sleep 10s
|
||||
validate_microservice
|
||||
validate_frontend
|
||||
|
||||
stop_docker
|
||||
echo y | docker system prune
|
||||
|
||||
}
|
||||
|
||||
main
|
||||
@@ -3,8 +3,13 @@
|
||||
|
||||
# Stage 1: Build the React application using Node.js
|
||||
# Use Node 20.11.1 as the base image for the build step
|
||||
|
||||
FROM node:20.11.1 AS vite-app
|
||||
|
||||
ARG texttosql_url
|
||||
|
||||
ENV TEXT_TO_SQL_URL=$texttosql_url
|
||||
|
||||
WORKDIR /usr/app/react
|
||||
|
||||
COPY react /usr/app/react
|
||||
@@ -16,6 +21,10 @@ RUN ["npm", "run", "build"]
|
||||
|
||||
FROM nginx:alpine
|
||||
|
||||
ARG texttosql_url
|
||||
|
||||
ENV TEXT_TO_SQL_URL=$texttosql_url
|
||||
|
||||
EXPOSE 80
|
||||
|
||||
COPY --from=vite-app /usr/app/react/dist /usr/share/nginx/html
|
||||
|
||||
@@ -1 +1 @@
|
||||
VITE_TEXT_TO_SQL_URL=http://${HOSTNAME}:9090/v1
|
||||
VITE_TEXT_TO_SQL_URL=${TEXT_TO_SQL_URL}
|
||||
|
||||
@@ -26,7 +26,7 @@ test('testing api with dynamic host', async () => {
|
||||
const formData = {
|
||||
user: 'postgres',
|
||||
database: 'chinook',
|
||||
host: host, // Dynamic IP
|
||||
host: host,
|
||||
password: 'testpwd',
|
||||
port: '5442',
|
||||
};
|
||||
|
||||
@@ -24,7 +24,7 @@ export default defineConfig({
|
||||
},
|
||||
define: {
|
||||
// Dynamically set the hostname for the VITE_TEXT_TO_SQL_URL
|
||||
"import.meta.env.VITE_TEXT_TO_SQL_URL": JSON.stringify(`http://${os.hostname()}:9090/v1`),
|
||||
"import.meta.env.VITE_TEXT_TO_SQL_URL": JSON.stringify(`http://${process.env.TEXT_TO_SQL_URL}`),
|
||||
"import.meta.env": process.env,
|
||||
},
|
||||
});
|
||||
|
||||
@@ -8,7 +8,7 @@ services:
|
||||
ports:
|
||||
- ${LLM_ENDPOINT_PORT:-8008}:80
|
||||
volumes:
|
||||
- "${DATA_PATH:-data}:/data"
|
||||
- "${DATA_PATH:-./data}:/data"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
|
||||
@@ -296,8 +296,8 @@ class DocSumUI:
|
||||
audio_ui.render()
|
||||
with gr.TabItem("Upload Video"):
|
||||
video_ui.render()
|
||||
with gr.TabItem("Enter URL"):
|
||||
url_ui.render()
|
||||
# with gr.TabItem("Enter URL"):
|
||||
# url_ui.render()
|
||||
|
||||
return self.demo
|
||||
|
||||
@@ -313,6 +313,8 @@ app = gr.mount_gradio_app(app, demo, path="/")
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
import nltk
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--host", type=str, default="0.0.0.0")
|
||||
parser.add_argument("--port", type=int, default=5173)
|
||||
@@ -320,4 +322,8 @@ if __name__ == "__main__":
|
||||
args = parser.parse_args()
|
||||
logger.info(">>> Starting server at %s:%d", args.host, args.port)
|
||||
|
||||
# Needed for UnstructuredURLLoader when reading content from a URL
|
||||
nltk.download("punkt_tab")
|
||||
nltk.download("averaged_perceptron_tagger_eng")
|
||||
|
||||
uvicorn.run(app, host=args.host, port=args.port)
|
||||
|
||||
@@ -8,7 +8,7 @@ services:
|
||||
ports:
|
||||
- ${LLM_ENDPOINT_PORT:-8008}:80
|
||||
volumes:
|
||||
- "${DATA_PATH:-data}:/data"
|
||||
- "${DATA_PATH:-./data}:/data"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
|
||||
@@ -52,6 +52,7 @@ export async function fetchTextStream(query: string | Blob, params: string, file
|
||||
}
|
||||
const reader = postResponse.body.getReader();
|
||||
const decoder = new TextDecoder("utf-8");
|
||||
|
||||
let done, value;
|
||||
|
||||
let buffer = ""; // Initialize a buffer
|
||||
@@ -61,6 +62,7 @@ export async function fetchTextStream(query: string | Blob, params: string, file
|
||||
|
||||
// Decode chunk and append to buffer
|
||||
const chunk = decoder.decode(value, { stream: true });
|
||||
|
||||
buffer += chunk;
|
||||
|
||||
// Use regex to clean and extract data
|
||||
@@ -72,6 +74,21 @@ export async function fetchTextStream(query: string | Blob, params: string, file
|
||||
})
|
||||
.filter((line) => line); // Remove empty lines
|
||||
|
||||
const validJsonChunks = cleanedChunks.filter((item) => {
|
||||
if (item === "[DONE]") {
|
||||
return true;
|
||||
}
|
||||
try {
|
||||
JSON.parse(item);
|
||||
return true;
|
||||
} catch (e) {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
|
||||
cleanedChunks.length = 0;
|
||||
cleanedChunks.push(...validJsonChunks);
|
||||
|
||||
for (const cleanedChunk of cleanedChunks) {
|
||||
// Further clean to ensure all unnecessary parts are removed
|
||||
yield cleanedChunk.replace(/^b'|['"]$/g, ""); // Again clean 'b' and other single or double quotes
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
urlSuffix: string,
|
||||
params: string
|
||||
) => {
|
||||
messages = "";
|
||||
// Fetch the stream
|
||||
const eventStream = await fetchTextStream(
|
||||
query,
|
||||
|
||||
@@ -180,4 +180,3 @@ Utilizes the open-source platform **Keycloak** for single sign-on identity and a
|
||||
|
||||
- **[Keycloak Configuration Guide](./docker_compose/intel/cpu/xeon/keycloak_setup_guide.md)**: Instructions to set up Keycloak for identity and access management.
|
||||
- **[Xeon Guide](./docker_compose/intel/cpu/xeon/README.md)**: Instructions to build Docker images from source and run the application via Docker Compose.
|
||||
- **[Xeon Kubernetes Guide](./kubernetes/intel/README.md)**: Instructions to deploy the application via Kubernetes.
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
# 🚀 Deploy ProductivitySuite with ReactUI
|
||||
|
||||
The document outlines the deployment steps for ProductivitySuite via Kubernetes cluster while utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline components and ReactUI, a popular React-based user interface library.
|
||||
|
||||
In ProductivitySuite, it consists of following pipelines/examples and components:
|
||||
```
|
||||
- productivity-suite-react-ui
|
||||
- chatqna
|
||||
- codegen
|
||||
- docsum
|
||||
- faqgen
|
||||
- dataprep via redis
|
||||
- chat-history
|
||||
- prompt-registry
|
||||
- mongo
|
||||
- keycloak
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ⚠️ Prerequisites for Deploying ProductivitySuite with ReactUI
|
||||
To begin with, ensure that you have following prerequisites in place:
|
||||
|
||||
1. ☸ Kubernetes installation: Make sure that you have Kubernetes installed.
|
||||
2. 🐳 Images: Make sure you have all the images ready for the examples and components stated above. You may refer to [README](../../docker_compose/intel/cpu/xeon/README.md) for steps to build the images.
|
||||
3. 🔧 Configuration Values: Set the following values in all the yaml files before proceeding with the deployment:
|
||||
|
||||
Download and set up yq for YAML processing:
|
||||
```
|
||||
sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64
|
||||
sudo chmod a+x /usr/local/bin/yq
|
||||
|
||||
cd GenAIExamples/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/
|
||||
. ../utils
|
||||
```
|
||||
|
||||
a. HUGGINGFACEHUB_API_TOKEN (Your HuggingFace token to download your desired model from HuggingFace):
|
||||
```
|
||||
# You may set the HUGGINGFACEHUB_API_TOKEN via method:
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
set_hf_token $HUGGINGFACEHUB_API_TOKEN
|
||||
```
|
||||
|
||||
b. Set the proxies based on your network configuration
|
||||
```
|
||||
# Look for http_proxy, https_proxy and no_proxy key and fill up the values for all the yaml files with your system proxy configuration.
|
||||
set_http_proxy $http_proxy
|
||||
set_https_proxy $https_proxy
|
||||
set_no_proxy $no_proxy
|
||||
```
|
||||
|
||||
c. Set all the backend service endpoint for REACT UI service
|
||||
```
|
||||
# Setup all the backend service endpoint in productivity_suite_reactui.yaml for UI to consume with.
|
||||
# Look for ENDPOINT in the yaml and insert all the url endpoint for all the required backend service.
|
||||
set_services_endpoint
|
||||
```
|
||||
|
||||
4. MODEL_ID and model-volume **(OPTIONAL)**: You may as well customize the "MODEL_ID" to use different model and model-volume for the volume to be mounted.
|
||||
```
|
||||
sudo mkdir -p /mnt/opea-models
|
||||
sudo chmod -R a+xwr /mnt/opea-models
|
||||
set_model_id
|
||||
```
|
||||
5. MODEL_MIRROR **(OPTIONAL)**: Please set the exact huggingface mirror if cannot access huggingface website directly from your country. You can set it as https://hf-mirror.com in PRC.
|
||||
```
|
||||
set_model_mirror
|
||||
```
|
||||
6. After finish with steps above, you can proceed with the deployment of the yaml file.
|
||||
```
|
||||
git diff
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🌐 Deploying ProductivitySuite
|
||||
You can use yaml files in xeon folder to deploy ProductivitySuite with reactUI.
|
||||
```
|
||||
cd GenAIExamples/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/
|
||||
kubectl apply -f .
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔐 User Management via Keycloak Configuration
|
||||
Please refer to **[keycloak_setup_guide](../../docker_compose/intel/cpu/xeon/keycloak_setup_guide.md)** for more detail related to Keycloak configuration setup.
|
||||
|
||||
---
|
||||
|
||||
## ✅ Verify Services
|
||||
To verify the installation, run command 'kubectl get pod' to make sure all pods are running.
|
||||
|
||||
To view all the available services, run command 'kubectl get svc' to obtain ports that need to used as backend service endpoint in productivity_suite_reactui.yaml.
|
||||
|
||||
You may use `kubectl port-forward service/<service_name> <forwarded_port>/<service_port>` to forward the port of all the services if necessary.
|
||||
```
|
||||
# For example, 'kubectl get svc | grep productivity'
|
||||
productivity-suite-react-ui ClusterIP 10.96.3.236 <none> 80/TCP
|
||||
|
||||
# By default, productivity-suite-react-ui service export port 80, forward it to 5174 via command:
|
||||
'kubectl port-forward service/productivity-suite-react-ui 5174:80'
|
||||
```
|
||||
|
||||
Or simple way to forward the productivity suite service port.
|
||||
```
|
||||
label='app.kubernetes.io/name=react-ui'
|
||||
port=$(kubectl -n ${ns:-default} get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
|
||||
kubectl port-forward service/productivity-suite-react-ui 5174:$port
|
||||
```
|
||||
|
||||
You may open up the productivity suite react UI by using http://localhost:5174 in the browser.
|
||||
@@ -1,75 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: chat-history-config
|
||||
data:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
MONGO_HOST: "mongo"
|
||||
MONGO_PORT: "27017"
|
||||
DB_NAME: "OPEA"
|
||||
COLLECTION_NAME: "ChatHistory"
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chat-history
|
||||
labels:
|
||||
helm.sh/chart: chat-history-0.1.0
|
||||
app.kubernetes.io/name: chat-history
|
||||
app.kubernetes.io/instance: chat-history
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 6012
|
||||
targetPort: 6012
|
||||
protocol: TCP
|
||||
name: chat-history
|
||||
selector:
|
||||
app.kubernetes.io/name: chat-history
|
||||
app.kubernetes.io/instance: chat-history
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chat-history
|
||||
labels:
|
||||
helm.sh/chart: chat-history-0.1.0
|
||||
app.kubernetes.io/name: chat-history
|
||||
app.kubernetes.io/instance: chat-history
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: chat-history
|
||||
app.kubernetes.io/instance: chat-history
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: chat-history
|
||||
app.kubernetes.io/instance: chat-history
|
||||
spec:
|
||||
securityContext: null
|
||||
containers:
|
||||
- name: chat-history
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: chat-history-config
|
||||
securityContext: null
|
||||
image: "opea/chathistory-mongo-server:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: chat-history
|
||||
containerPort: 6012
|
||||
protocol: TCP
|
||||
resources: null
|
||||
---
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,333 +0,0 @@
|
||||
---
|
||||
# Source: codegen/charts/llm-uservice/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: codegen-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://codegen-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-llm-uservice"
|
||||
---
|
||||
# Source: codegen/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: codegen-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "meta-llama/CodeLlama-7b-hf"
|
||||
PORT: "2080"
|
||||
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
MAX_INPUT_TOKENS: "1024"
|
||||
MAX_TOTAL_TOKENS: "4096"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
---
|
||||
# Source: codegen/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codegen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
---
|
||||
# Source: codegen/charts/tgi/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codegen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 2080
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
---
|
||||
# Source: codegen/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codegen
|
||||
labels:
|
||||
helm.sh/chart: codegen-0.8.0
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 7778
|
||||
targetPort: 7778
|
||||
protocol: TCP
|
||||
name: codegen
|
||||
selector:
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
---
|
||||
# Source: codegen/charts/llm-uservice/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codegen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: codegen
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: codegen-llm-uservice-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-textgen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://codegen-tgi
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
failureThreshold: 120
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: codegen/charts/tgi/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codegen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: tgi
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: codegen-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: codegen/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codegen
|
||||
labels:
|
||||
helm.sh/chart: codegen-0.8.0
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
spec:
|
||||
securityContext:
|
||||
null
|
||||
containers:
|
||||
- name: codegen
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: codegen-llm-uservice
|
||||
- name: http_proxy
|
||||
value: ""
|
||||
- name: https_proxy
|
||||
value: ""
|
||||
- name: no_proxy
|
||||
value: ""
|
||||
#- name: MEGA_SERVICE_PORT
|
||||
# value: 7778
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/codegen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: codegen
|
||||
containerPort: 7778
|
||||
protocol: TCP
|
||||
# startupProbe:
|
||||
# httpGet:
|
||||
# host: codegen-llm-uservice
|
||||
# port: 9000
|
||||
# path: /
|
||||
# initialDelaySeconds: 5
|
||||
# periodSeconds: 5
|
||||
# failureThreshold: 120
|
||||
# livenessProbe:
|
||||
# httpGet:
|
||||
# path: /
|
||||
# port: 7778
|
||||
# readinessProbe:
|
||||
# httpGet:
|
||||
# path: /
|
||||
# port: 7778
|
||||
resources:
|
||||
null
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
@@ -1,317 +0,0 @@
|
||||
---
|
||||
# Source: docsum/charts/llm-uservice/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: docsum-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://docsum-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-llm-uservice"
|
||||
---
|
||||
# Source: docsum/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: docsum-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "Intel/neural-chat-7b-v3-3"
|
||||
PORT: "2080"
|
||||
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
MAX_INPUT_TOKENS: "1024"
|
||||
MAX_TOTAL_TOKENS: "4096"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
---
|
||||
# Source: docsum/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
---
|
||||
# Source: docsum/charts/tgi/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 2080
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
---
|
||||
# Source: docsum/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docsum
|
||||
labels:
|
||||
helm.sh/chart: docsum-0.8.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 8888
|
||||
targetPort: 8888
|
||||
protocol: TCP
|
||||
name: docsum
|
||||
selector:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
---
|
||||
# Source: docsum/charts/llm-uservice/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: docsum
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: docsum
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: docsum-llm-uservice-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-docsum-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://docsum-tgi
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
failureThreshold: 120
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: docsum/charts/tgi/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: docsum
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: tgi
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: docsum-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: docsum/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docsum
|
||||
labels:
|
||||
helm.sh/chart: docsum-0.8.0
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: docsum
|
||||
app.kubernetes.io/instance: docsum
|
||||
spec:
|
||||
securityContext:
|
||||
null
|
||||
containers:
|
||||
- name: docsum
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: docsum-llm-uservice
|
||||
- name: http_proxy
|
||||
value: ""
|
||||
- name: https_proxy
|
||||
value: ""
|
||||
- name: no_proxy
|
||||
value: ""
|
||||
#- name: MEGA_SERVICE_PORT
|
||||
# value: 8888
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/docsum:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: docsum
|
||||
containerPort: 8888
|
||||
protocol: TCP
|
||||
resources:
|
||||
null
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
@@ -1,243 +0,0 @@
|
||||
---
|
||||
# Source: faqgen/charts/llm-uservice/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: faqgen-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://faqgen-tgi:80"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
---
|
||||
# Source: faqgen/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: faqgen-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "Intel/neural-chat-7b-v3-3"
|
||||
PORT: "80"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
---
|
||||
# Source: faqgen/charts/llm-uservice/charts/tgi/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: faqgen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: faqgen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: faqgen
|
||||
labels:
|
||||
helm.sh/chart: faqgen-0.8.0
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 8888
|
||||
targetPort: 8888
|
||||
protocol: TCP
|
||||
name: faqgen
|
||||
selector:
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faqgen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: faqgen
|
||||
spec:
|
||||
securityContext: {}
|
||||
containers:
|
||||
- name: tgi
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: faqgen-tgi-config
|
||||
securityContext: {}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 80
|
||||
protocol: TCP
|
||||
resources: {}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faqgen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: faqgen
|
||||
spec:
|
||||
securityContext: {}
|
||||
containers:
|
||||
- name: faqgen
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: faqgen-llm-uservice-config
|
||||
securityContext: {}
|
||||
image: "opea/llm-faqgen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://faqgen-tgi:80
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
failureThreshold: 120
|
||||
resources: {}
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: faqgen
|
||||
labels:
|
||||
helm.sh/chart: faqgen-0.8.0
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: faqgen
|
||||
app.kubernetes.io/instance: faqgen
|
||||
spec:
|
||||
securityContext: null
|
||||
containers:
|
||||
- name: faqgen
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: faqgen-llm-uservice
|
||||
- name: http_proxy
|
||||
value: ""
|
||||
- name: https_proxy
|
||||
value: ""
|
||||
- name: no_proxy
|
||||
value: ""
|
||||
securityContext: null
|
||||
image: "opea/faqgen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: faqgen
|
||||
containerPort: 8888
|
||||
protocol: TCP
|
||||
resources: null
|
||||
@@ -1,66 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: keycloak
|
||||
spec:
|
||||
progressDeadlineSeconds: 600
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 10
|
||||
selector:
|
||||
matchLabels:
|
||||
app: keycloak
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: keycloak
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- start-dev
|
||||
env:
|
||||
- name: KEYCLOAK_ADMIN
|
||||
value: admin
|
||||
- name: KEYCLOAK_ADMIN_PASSWORD
|
||||
value: admin
|
||||
- name: KC_PROXY
|
||||
value: edge
|
||||
image: quay.io/keycloak/keycloak:25.0.2
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: keycloak
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
protocol: TCP
|
||||
readinessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /realms/master
|
||||
port: 8080
|
||||
scheme: HTTP
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
resources: {}
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
dnsPolicy: ClusterFirst
|
||||
restartPolicy: Always
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: keycloak
|
||||
spec:
|
||||
allocateLoadBalancerNodePorts: true
|
||||
ports:
|
||||
- name: http
|
||||
nodePort: 31503
|
||||
port: 8080
|
||||
protocol: TCP
|
||||
targetPort: 8080
|
||||
selector:
|
||||
app: keycloak
|
||||
type: LoadBalancer
|
||||
@@ -1,71 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: mongo-config
|
||||
data:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: mongo
|
||||
labels:
|
||||
helm.sh/chart: mongo-0.1.0
|
||||
app.kubernetes.io/name: mongo
|
||||
app.kubernetes.io/instance: mongo
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 27017
|
||||
targetPort: 27017
|
||||
protocol: TCP
|
||||
name: mongo
|
||||
selector:
|
||||
app.kubernetes.io/name: mongo
|
||||
app.kubernetes.io/instance: mongo
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: mongo
|
||||
labels:
|
||||
helm.sh/chart: mongo-0.1.0
|
||||
app.kubernetes.io/name: mongo
|
||||
app.kubernetes.io/instance: mongo
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: mongo
|
||||
app.kubernetes.io/instance: mongo
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: mongo
|
||||
app.kubernetes.io/instance: mongo
|
||||
spec:
|
||||
securityContext: null
|
||||
containers:
|
||||
- name: mongo
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: mongo-config
|
||||
securityContext: null
|
||||
image: "mongo:7.0.11"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: mongo
|
||||
containerPort: 27017
|
||||
protocol: TCP
|
||||
resources: null
|
||||
command: ["mongod", "--bind_ip", "0.0.0.0", "--quiet", "--logpath", "/dev/null"]
|
||||
@@ -1,91 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: productivity-suite-react-ui
|
||||
labels:
|
||||
helm.sh/chart: productivity-suite-react-ui-0.1.0
|
||||
app.kubernetes.io/name: react-ui
|
||||
app.kubernetes.io/instance: productivity-suite
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
||||
name: react-ui
|
||||
selector:
|
||||
app.kubernetes.io/name: react-ui
|
||||
app.kubernetes.io/instance: productivity-suite
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: productivity-suite-react-ui
|
||||
labels:
|
||||
helm.sh/chart: productivity-suite-react-ui-0.1.0
|
||||
app.kubernetes.io/name: react-ui
|
||||
app.kubernetes.io/instance: productivity-suite
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: react-ui
|
||||
app.kubernetes.io/instance: productivity-suite
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: react-ui
|
||||
app.kubernetes.io/instance: productivity-suite
|
||||
spec:
|
||||
securityContext: null
|
||||
containers:
|
||||
- name: productivity-suite-react-ui
|
||||
env:
|
||||
- name: http_proxy
|
||||
value: ""
|
||||
- name: https_proxy
|
||||
value: ""
|
||||
- name: no_proxy
|
||||
value: ""
|
||||
- name: APP_BACKEND_SERVICE_ENDPOINT_CHATQNA
|
||||
value: ""
|
||||
- name: APP_BACKEND_SERVICE_ENDPOINT_CODEGEN
|
||||
value: ""
|
||||
- name: APP_BACKEND_SERVICE_ENDPOINT_DOCSUM
|
||||
value: ""
|
||||
- name: APP_BACKEND_SERVICE_ENDPOINT_FAQGEN
|
||||
value: ""
|
||||
- name: APP_DATAPREP_SERVICE_ENDPOINT
|
||||
value: ""
|
||||
- name: APP_DATAPREP_GET_FILE_ENDPOINT
|
||||
value: ""
|
||||
- name: APP_DATAPREP_DELETE_FILE_ENDPOINT
|
||||
value: ""
|
||||
- name: APP_CHAT_HISTORY_CREATE_ENDPOINT
|
||||
value: ""
|
||||
- name: APP_CHAT_HISTORY_DELETE_ENDPOINT
|
||||
value: ""
|
||||
- name: APP_CHAT_HISTORY_GET_ENDPOINT
|
||||
value: ""
|
||||
- name: APP_PROMPT_SERVICE_GET_ENDPOINT
|
||||
value: ""
|
||||
- name: APP_PROMPT_SERVICE_CREATE_ENDPOINT
|
||||
value: ""
|
||||
- name: APP_KEYCLOAK_SERVICE_ENDPOINT
|
||||
value: ""
|
||||
securityContext: null
|
||||
image: "opea/productivity-suite-react-ui-server:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: react-ui
|
||||
containerPort: 80
|
||||
protocol: TCP
|
||||
resources: null
|
||||
@@ -1,75 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: prompt-registry-config
|
||||
data:
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
MONGO_HOST: "mongo"
|
||||
MONGO_PORT: "27017"
|
||||
DB_NAME: "OPEA"
|
||||
COLLECTION_NAME: "Prompt"
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: prompt-registry
|
||||
labels:
|
||||
helm.sh/chart: prompt-registry-0.1.0
|
||||
app.kubernetes.io/name: prompt-registry
|
||||
app.kubernetes.io/instance: prompt-registry
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 6018
|
||||
targetPort: 6018
|
||||
protocol: TCP
|
||||
name: prompt-registry
|
||||
selector:
|
||||
app.kubernetes.io/name: prompt-registry
|
||||
app.kubernetes.io/instance: prompt-registry
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: prompt-registry
|
||||
labels:
|
||||
helm.sh/chart: prompt-registry-0.1.0
|
||||
app.kubernetes.io/name: prompt-registry
|
||||
app.kubernetes.io/instance: prompt-registry
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: prompt-registry
|
||||
app.kubernetes.io/instance: prompt-registry
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: prompt-registry
|
||||
app.kubernetes.io/instance: prompt-registry
|
||||
spec:
|
||||
securityContext: null
|
||||
containers:
|
||||
- name: prompt-registry
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: prompt-registry-config
|
||||
securityContext: null
|
||||
image: "opea/promptregistry-mongo-server:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: prompt-registry
|
||||
containerPort: 6018
|
||||
protocol: TCP
|
||||
resources: null
|
||||
---
|
||||
@@ -1,157 +0,0 @@
|
||||
set_model_id() {
|
||||
if [ -z "$1" ] && [ -z "$2" ]; then
|
||||
yq -o json '.| select(.data | has("MODEL_ID"))| {"ConfigMap": .metadata.name, "MODEL_ID": .data.MODEL_ID}' *.yaml
|
||||
echo "usage:"
|
||||
echo " set_model_id \${ConfigMap} \${MODEL_ID}"
|
||||
return
|
||||
fi
|
||||
conf=$1
|
||||
file=${1%%-*}
|
||||
sed -i '/name: '"${conf}"'/,/---/s|\(MODEL_ID:\).*|\1 "'"${2}"'"|' ${file}.yaml
|
||||
}
|
||||
|
||||
set_model_mirror() {
|
||||
if [ -z "$1" ] ; then
|
||||
yq -o json '.| select(.data | has("MODEL_ID"))| {"ConfigMap": .metadata.name, "MODEL_MIRROR": .data.HF_ENDPOINT}' *.yaml
|
||||
echo "usage:"
|
||||
echo " set_model_mirror \${MODEL_MIRROR}"
|
||||
return
|
||||
fi
|
||||
cm=$(yq -r -o json '.| select(.data | has("MODEL_ID"))| .metadata.name' *.yaml)
|
||||
mirror=$1
|
||||
for i in $cm; do
|
||||
conf=$i
|
||||
file=${i%%-*}
|
||||
echo "ConfigMap: $conf set mirror as $mirror"
|
||||
has_mirror=$(yq -r -o json '.| select(.metadata.name == "'"${conf}"'")| .data.HF_ENDPOINT' ${file}.yaml)
|
||||
if [ "$has_mirror" == "null" ]; then
|
||||
sed -i '/name: '"${conf}"'/,/---/s|\(data:\)|\1\n HF_ENDPOINT: "'"${mirror}"'"|' ${file}.yaml
|
||||
else
|
||||
sed -i '/name: '"${conf}"'/,/---/s|\(HF_ENDPOINT:\).*|\1 "'"${1}"'"|' ${file}.yaml
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
set_hf_token() {
|
||||
if [ -z "$1" ] ; then
|
||||
echo "usage:"
|
||||
echo " set_hf_token \${HF_TOKEN}"
|
||||
return
|
||||
fi
|
||||
sed -i "s/\(HF_TOKEN:\).*/\1 \"${1}\"/g" *.yaml
|
||||
sed -i "s/\(HUGGINGFACEHUB_API_TOKEN:\).*/\1 \"${1}\"/g" *.yaml
|
||||
sed -i "s/\(HUGGING_FACE_HUB_TOKEN:\).*/\1 \"${1}\"/g" *.yaml
|
||||
}
|
||||
|
||||
set_https_proxy() {
|
||||
if [ -z "$1" ] ; then
|
||||
echo "usage:"
|
||||
echo " set_https_proxy \${https_proxy}"
|
||||
return
|
||||
fi
|
||||
https_proxy=$1
|
||||
sed -i -e "s|\(https_proxy:\)\s*\"\"|\1 \"$https_proxy\"|g" *.yaml
|
||||
sed -i '/https_proxy/{n;s|\(value:\)\s.*""|\1 "'"$https_proxy"'"|g}' *.yaml
|
||||
}
|
||||
|
||||
set_http_proxy() {
|
||||
if [ -z "$1" ] ; then
|
||||
echo "usage:"
|
||||
echo " set_http_proxy \${http_proxy}"
|
||||
return
|
||||
fi
|
||||
http_proxy=$1
|
||||
sed -i -e "s|\(http_proxy:\)\s*\"\"|\1 \"$http_proxy\"|g" *.yaml
|
||||
sed -i '/http_proxy/{n;s|\(value:\)\s.*""|\1 "'"$http_proxy"'"|g}' *.yaml
|
||||
}
|
||||
|
||||
set_no_proxy() {
|
||||
if [ -z "$1" ] ; then
|
||||
echo "usage:"
|
||||
echo " set_no_proxy \${no_proxy}"
|
||||
return
|
||||
fi
|
||||
no_proxy=$1
|
||||
sed -i -e "s|\(no_proxy:\)\s*\"\"|\1 \"$no_proxy\"|g" *.yaml
|
||||
sed -i '/no_proxy/{n;s|\(value:\)\s.*""|\1 "'"$no_proxy"'"|g}' *.yaml
|
||||
}
|
||||
|
||||
set_backend_service_endpoint() {
|
||||
for i in $(grep -oP "(?<=APP_BACKEND_SERVICE_ENDPOINT_).*" *.yaml); do
|
||||
echo $i
|
||||
name=${i##*:}
|
||||
file=${name,,}.yaml
|
||||
svc=$(yq -o json '. | select(.metadata.name == "'"${name,,}"'" and .kind=="Service")' $file)
|
||||
port=$(jq .spec.ports[0].port <<< $svc)
|
||||
|
||||
url=http://${name,,}.${ns:-default}.svc.cluster.local:${port}
|
||||
echo $url
|
||||
sed -i -e '/APP_BACKEND_SERVICE_ENDPOINT_'"$name"'/{n;s|\(value:\)\s.*|\1 "'"$url"'"|}' productivity_suite_reactui.yaml
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
set_dataprep_service_endpoint() {
|
||||
name=chatqna-data-prep
|
||||
file=chatqna.yaml
|
||||
svc=$(yq -o json '. | select(.metadata.name == "'"$name"'" and .kind=="Service")' $file)
|
||||
port=$(jq .spec.ports[0].port <<< $svc)
|
||||
url=http://${name}.${ns:-default}.svc.cluster.local:${port}
|
||||
echo $url
|
||||
for i in $(grep -oP "(?<=APP_)DATAPREP.*(?=_ENDPOINT)" *.yaml); do
|
||||
echo $i
|
||||
curd=${i##*:};
|
||||
sed -i -e '/'"$curd"'/{n;s|\(value:\)\s.*|\1 "'"$url"'"|}' productivity_suite_reactui.yaml;
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
set_chat_history_endpoint() {
|
||||
for i in $(grep -oP "(?<=APP_)CHAT_HISTORY.*(?=_ENDPOINT)" *.yaml); do
|
||||
echo $i;
|
||||
curd=${i##*:};
|
||||
name=${curd%_*};
|
||||
file=${name,,}.yaml;
|
||||
name=${name/_/-};
|
||||
svc=$(yq -o json '. | select(.metadata.name == "'"${name,,}"'" and .kind=="Service")' $file)
|
||||
port=$(jq .spec.ports[0].port <<< $svc)
|
||||
url=http://${name,,}.${ns:-default}.svc.cluster.local:${port};
|
||||
echo $url;
|
||||
sed -i -e '/'"$curd"'/{n;s|\(value:\)\s.*|\1 "'"$url"'"|}' productivity_suite_reactui.yaml;
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
set_prompt_service_endpoint() {
|
||||
for i in $(grep -oP "(?<=APP_)PROMPT_SERVICE.*(?=_ENDPOINT)" *.yaml); do
|
||||
echo $i;
|
||||
curd=${i##*:};
|
||||
curdr=${curd/SERVICE/REGISTRY};
|
||||
name=${curdr%_*};
|
||||
file=${name,,}.yaml;
|
||||
name=${name/_/-};
|
||||
svc=$(yq -o json '. | select(.metadata.name == "'"${name,,}"'" and .kind=="Service")' $file)
|
||||
port=$(jq .spec.ports[0].port <<< $svc)
|
||||
url=http://${name,,}.${ns:-default}.svc.cluster.local:${port};
|
||||
echo $url;
|
||||
sed -i -e '/'"$curd"'/{n;s|\(value:\)\s.*|\1 "'"$url"'"|}' productivity_suite_reactui.yaml ;
|
||||
done
|
||||
}
|
||||
|
||||
set_keycloak_service_endpoint() {
|
||||
name=keycloak
|
||||
file=keycloak_install.yaml
|
||||
svc=$(yq -o json '. | select(.metadata.name == "'"$name"'" and .kind=="Service")' $file)
|
||||
port=$(jq .spec.ports[0].port <<< $svc)
|
||||
url=http://${name}.${ns:-default}.svc.cluster.local:${port}
|
||||
echo $url
|
||||
sed -i -e '/APP_KEYCLOAK_SERVICE_ENDPOINT/{n;s|\(value:\)\s.*|\1 "'"$url"'"|}' productivity_suite_reactui.yaml
|
||||
}
|
||||
|
||||
set_services_endpoint() {
|
||||
set_backend_service_endpoint
|
||||
set_keycloak_service_endpoint
|
||||
set_chat_history_endpoint
|
||||
set_prompt_service_endpoint
|
||||
set_dataprep_service_endpoint
|
||||
}
|
||||
69
README-deploy-benchmark.md
Normal file
69
README-deploy-benchmark.md
Normal file
@@ -0,0 +1,69 @@
|
||||
# ChatQnA Benchmarking
|
||||
|
||||
## Purpose
|
||||
|
||||
We aim to run these benchmarks and share them with the OPEA community for three primary reasons:
|
||||
|
||||
- To offer insights on inference throughput in real-world scenarios, helping you choose the best service or deployment for your needs.
|
||||
- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
|
||||
- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading LLMs, serving frameworks etc.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Prerequisites](#prerequisites)
|
||||
- [Overview](#overview)
|
||||
- [Using deploy_and_benchmark.py](#using-deploy_and_benchmark.py-recommended)
|
||||
- [Data Preparation](#data-preparation)
|
||||
- [Configuration](#configuration)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before running the benchmarks, ensure you have:
|
||||
|
||||
1. **Kubernetes Environment**
|
||||
|
||||
- Kubernetes installation: Use [kubespray](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md) or other official Kubernetes installation guides
|
||||
- (Optional) [Kubernetes set up guide on Intel Gaudi product](https://github.com/opea-project/GenAIInfra/blob/main/README.md#setup-kubernetes-cluster)
|
||||
|
||||
2. **Configuration YAML**
|
||||
The configuration file (e.g., `./ChatQnA/benchmark_chatqna.yaml`) consists of two main sections: deployment and benchmarking. Required fields must be filled with valid values (like the Hugging Face token). For all other fields, you can either customize them according to your needs or leave them empty ("") to use the default values from the [helm charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts).
|
||||
|
||||
## Data Preparation
|
||||
|
||||
Before running benchmarks, you need to:
|
||||
|
||||
1. **Prepare Test Data**
|
||||
|
||||
- Download the retrieval file:
|
||||
```bash
|
||||
wget https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark/data/upload_file.txt
|
||||
```
|
||||
- For the `chatqna_qlist_pubmed` test case, prepare `pubmed_${max_lines}.txt` by following this [README](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/stresscli/README_Pubmed_qlist.md)
|
||||
|
||||
2. **Prepare Model Files (Recommended)**
|
||||
```bash
|
||||
pip install -U "huggingface_hub[cli]"
|
||||
sudo mkdir -p /mnt/models
|
||||
sudo chmod 777 /mnt/models
|
||||
huggingface-cli download --cache-dir /mnt/models Intel/neural-chat-7b-v3-3
|
||||
```
|
||||
|
||||
## Overview
|
||||
|
||||
The benchmarking process consists of two main components: deployment and benchmarking. We provide `deploy_and_benchmark.py` as a unified entry point that combines both steps.
|
||||
|
||||
### Using deploy_and_benchmark.py (Recommended)
|
||||
|
||||
The script `deploy_and_benchmark.py` serves as the main entry point. Here's an example using ChatQnA configuration (you can replace it with any other example's configuration YAML file):
|
||||
|
||||
1. For a specific number of nodes:
|
||||
|
||||
```bash
|
||||
python deploy_and_benchmark.py ./ChatQnA/benchmark_chatqna.yaml --target-node 1
|
||||
```
|
||||
|
||||
2. For all node configurations:
|
||||
```bash
|
||||
python deploy_and_benchmark.py ./ChatQnA/benchmark_chatqna.yaml
|
||||
```
|
||||
This will iterate through the node list in your configuration YAML file, performing deployment and benchmarking for each node count.
|
||||
@@ -50,7 +50,7 @@ Deployment are based on released docker images by default, check [docker image l
|
||||
| CodeTrans | [Xeon Instructions](CodeTrans/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](CodeTrans/docker_compose/intel/hpu/gaudi/README.md) | [ROCm Instructions](CodeTrans/docker_compose/amd/gpu/rocm/README.md) | [CodeTrans with Helm Charts](CodeTrans/kubernetes/helm/README.md) | [CodeTrans with GMC](CodeTrans/kubernetes/gmc/README.md) |
|
||||
| DocSum | [Xeon Instructions](DocSum/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](DocSum/docker_compose/intel/hpu/gaudi/README.md) | [ROCm Instructions](DocSum/docker_compose/amd/gpu/rocm/README.md) | [DocSum with Helm Charts](DocSum/kubernetes/helm/README.md) | [DocSum with GMC](DocSum/kubernetes/gmc/README.md) |
|
||||
| SearchQnA | [Xeon Instructions](SearchQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](SearchQnA/docker_compose/intel/hpu/gaudi/README.md) | Not Supported | [SearchQnA with Helm Charts](SearchQnA/kubernetes/helm/README.md) | [SearchQnA with GMC](SearchQnA/kubernetes/gmc/README.md) |
|
||||
| FaqGen | [Xeon Instructions](FaqGen/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](FaqGen/docker_compose/intel/hpu/gaudi/README.md) | [ROCm Instructions](FaqGen/docker_compose/amd/gpu/rocm/README.md) | [FaqGen with Helm Charts](FaqGen/kubernetes/helm/README.md) | [FaqGen with GMC](FaqGen/kubernetes/gmc/README.md) |
|
||||
| FaqGen | [Xeon Instructions](FaqGen/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](FaqGen/docker_compose/intel/hpu/gaudi/README.md) | [ROCm Instructions](FaqGen/docker_compose/amd/gpu/rocm/README.md) | [FaqGen with Helm Charts](FaqGen/kubernetes/helm/README.md) | Not supported |
|
||||
| Translation | [Xeon Instructions](Translation/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](Translation/docker_compose/intel/hpu/gaudi/README.md) | [ROCm Instructions](Translation/docker_compose/amd/gpu/rocm/README.md) | Not Supported | [Translation with GMC](Translation/kubernetes/gmc/README.md) |
|
||||
| AudioQnA | [Xeon Instructions](AudioQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](AudioQnA/docker_compose/intel/hpu/gaudi/README.md) | [ROCm Instructions](AudioQnA/docker_compose/amd/gpu/rocm/README.md) | [AudioQnA with Helm Charts](AudioQnA/kubernetes/helm/README.md) | [AudioQnA with GMC](AudioQnA/kubernetes/gmc/README.md) |
|
||||
| VisualQnA | [Xeon Instructions](VisualQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](VisualQnA/docker_compose/intel/hpu/gaudi/README.md) | [ROCm Instructions](VisualQnA/docker_compose/amd/gpu/rocm/README.md) | [VisualQnA with Helm Charts](VisualQnA/kubernetes/helm/README.md) | [VisualQnA with GMC](VisualQnA/kubernetes/gmc/README.md) |
|
||||
|
||||
@@ -11,3 +11,12 @@ services:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/finetuning/src/Dockerfile
|
||||
image: ${REGISTRY:-opea}/finetuning:${TAG:-latest}
|
||||
finetuning-gaudi:
|
||||
build:
|
||||
args:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
no_proxy: ${no_proxy}
|
||||
context: GenAIComps
|
||||
dockerfile: comps/finetuning/src/Dockerfile.intel_hpu
|
||||
image: ${REGISTRY:-opea}/finetuning-gaudi:${TAG:-latest}
|
||||
|
||||
131
RerankFinetuning/tests/test_compose_on_gaudi.sh
Normal file
131
RerankFinetuning/tests/test_compose_on_gaudi.sh
Normal file
@@ -0,0 +1,131 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -x
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||
export REGISTRY=${IMAGE_REPO}
|
||||
export TAG=${IMAGE_TAG}
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
finetuning_service_port=8015
|
||||
ray_port=8265
|
||||
service_name=finetuning-gaudi
|
||||
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
if [ ! -d "GenAIComps" ] ; then
|
||||
git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git
|
||||
fi
|
||||
docker compose -f build.yaml build ${service_name} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
}
|
||||
|
||||
function start_service() {
|
||||
export no_proxy="localhost,127.0.0.1,"${ip_address}
|
||||
docker run -d --name="finetuning-server" -p $finetuning_service_port:$finetuning_service_port -p $ray_port:$ray_port --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy ${IMAGE_REPO}/finetuning-gaudi:${IMAGE_TAG}
|
||||
sleep 1m
|
||||
}
|
||||
|
||||
function validate_microservice() {
|
||||
cd $LOG_PATH
|
||||
export no_proxy="localhost,127.0.0.1,"${ip_address}
|
||||
|
||||
# test /v1/dataprep upload file
|
||||
URL="http://${ip_address}:$finetuning_service_port/v1/files"
|
||||
cat <<EOF > test_data.json
|
||||
{"query": "Five women walk along a beach wearing flip-flops.", "pos": ["Some women with flip-flops on, are walking along the beach"], "neg": ["The 4 women are sitting on the beach.", "There was a reform in 1996.", "She's not going to court to clear her record.", "The man is talking about hawaii.", "A woman is standing outside.", "The battle was over. ", "A group of people plays volleyball."]}
|
||||
{"query": "A woman standing on a high cliff on one leg looking over a river.", "pos": ["A woman is standing on a cliff."], "neg": ["A woman sits on a chair.", "George Bush told the Republicans there was no way he would let them even consider this foolish idea, against his top advisors advice.", "The family was falling apart.", "no one showed up to the meeting", "A boy is sitting outside playing in the sand.", "Ended as soon as I received the wire.", "A child is reading in her bedroom."]}
|
||||
{"query": "Two woman are playing instruments; one a clarinet, the other a violin.", "pos": ["Some people are playing a tune."], "neg": ["Two women are playing a guitar and drums.", "A man is skiing down a mountain.", "The fatal dose was not taken when the murderer thought it would be.", "Person on bike", "The girl is standing, leaning against the archway.", "A group of women watch soap operas.", "No matter how old people get they never forget. "]}
|
||||
{"query": "A girl with a blue tank top sitting watching three dogs.", "pos": ["A girl is wearing blue."], "neg": ["A girl is with three cats.", "The people are watching a funeral procession.", "The child is wearing black.", "Financing is an issue for us in public schools.", "Kids at a pool.", "It is calming to be assaulted.", "I face a serious problem at eighteen years old. "]}
|
||||
{"query": "A yellow dog running along a forest path.", "pos": ["a dog is running"], "neg": ["a cat is running", "Steele did not keep her original story.", "The rule discourages people to pay their child support.", "A man in a vest sits in a car.", "Person in black clothing, with white bandanna and sunglasses waits at a bus stop.", "Neither the Globe or Mail had comments on the current state of Canada's road system. ", "The Spring Creek facility is old and outdated."]}
|
||||
{"query": "It sets out essential activities in each phase along with critical factors related to those activities.", "pos": ["Critical factors for essential activities are set out."], "neg": ["It lays out critical activities but makes no provision for critical factors related to those activities.", "People are assembled in protest.", "The state would prefer for you to do that.", "A girl sits beside a boy.", "Two males are performing.", "Nobody is jumping", "Conrad was being plotted against, to be hit on the head."]}
|
||||
EOF
|
||||
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'file=@./test_data.json' -F purpose="fine-tune" -H 'Content-Type: multipart/form-data' "$URL")
|
||||
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
|
||||
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
|
||||
SERVICE_NAME="finetuning-server - upload - file"
|
||||
|
||||
# Parse the JSON response
|
||||
purpose=$(echo "$RESPONSE_BODY" | jq -r '.purpose')
|
||||
filename=$(echo "$RESPONSE_BODY" | jq -r '.filename')
|
||||
|
||||
# Define expected values
|
||||
expected_purpose="fine-tune"
|
||||
expected_filename="test_data.json"
|
||||
|
||||
if [ "$HTTP_STATUS" -ne "200" ]; then
|
||||
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
|
||||
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_upload_file.log
|
||||
exit 1
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
|
||||
fi
|
||||
# Check if the parsed values match the expected values
|
||||
if [[ "$purpose" != "$expected_purpose" || "$filename" != "$expected_filename" ]]; then
|
||||
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
|
||||
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_upload_file.log
|
||||
exit 1
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] Content is as expected."
|
||||
fi
|
||||
|
||||
# test /v1/fine_tuning/jobs
|
||||
URL="http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs"
|
||||
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' -d '{"training_file": "test_data.json","model": "BAAI/bge-reranker-base","General":{"task":"rerank","lora_config":null}}' "$URL")
|
||||
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
|
||||
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
|
||||
SERVICE_NAME="finetuning-server - create finetuning job"
|
||||
|
||||
if [ "$HTTP_STATUS" -ne "200" ]; then
|
||||
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
|
||||
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_create.log
|
||||
exit 1
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
|
||||
fi
|
||||
if [[ "$RESPONSE_BODY" != *'{"id":"ft-job'* ]]; then
|
||||
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
|
||||
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_create.log
|
||||
exit 1
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] Content is as expected."
|
||||
fi
|
||||
|
||||
sleep 3m
|
||||
|
||||
docker logs finetuning-server 2>&1 | tee ${LOG_PATH}/finetuning-server_create.log
|
||||
FINETUNING_LOG=$(grep "succeeded" ${LOG_PATH}/finetuning-server_create.log)
|
||||
if [[ "$FINETUNING_LOG" != *'succeeded'* ]]; then
|
||||
echo "Finetuning failed."
|
||||
RAY_JOBID=$(grep "Submitted Ray job" ${LOG_PATH}/finetuning-server_create.log | sed 's/.*raysubmit/raysubmit/' | cut -d' ' -f 1)
|
||||
docker exec finetuning-server python -c "import os;os.environ['RAY_ADDRESS']='http://localhost:8265';from ray.job_submission import JobSubmissionClient;client = JobSubmissionClient();print(client.get_job_logs('${RAY_JOBID}'))" 2>&1 | tee ${LOG_PATH}/finetuning.log
|
||||
exit 1
|
||||
else
|
||||
echo "Finetuning succeeded."
|
||||
fi
|
||||
}
|
||||
|
||||
function stop_docker() {
|
||||
cid=$(docker ps -aq --filter "name=finetuning-server*")
|
||||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
stop_docker
|
||||
|
||||
build_docker_images
|
||||
start_service
|
||||
|
||||
validate_microservice
|
||||
|
||||
stop_docker
|
||||
echo y | docker system prune
|
||||
|
||||
}
|
||||
|
||||
main
|
||||
@@ -14,13 +14,14 @@ LOG_PATH="$WORKPATH/tests"
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
finetuning_service_port=8015
|
||||
ray_port=8265
|
||||
service_name=finetuning
|
||||
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
if [ ! -d "GenAIComps" ] ; then
|
||||
git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git
|
||||
fi
|
||||
docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
docker compose -f build.yaml build ${service_name} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
}
|
||||
|
||||
function start_service() {
|
||||
@@ -94,7 +95,18 @@ EOF
|
||||
echo "[ $SERVICE_NAME ] Content is as expected."
|
||||
fi
|
||||
|
||||
sleep 1s
|
||||
sleep 3m
|
||||
|
||||
docker logs finetuning-server 2>&1 | tee ${LOG_PATH}/finetuning-server_create.log
|
||||
FINETUNING_LOG=$(grep "succeeded" ${LOG_PATH}/finetuning-server_create.log)
|
||||
if [[ "$FINETUNING_LOG" != *'succeeded'* ]]; then
|
||||
echo "Finetuning failed."
|
||||
RAY_JOBID=$(grep "Submitted Ray job" ${LOG_PATH}/finetuning-server_create.log | sed 's/.*raysubmit/raysubmit/' | cut -d' ' -f 1)
|
||||
docker exec finetuning-server python -c "import os;os.environ['RAY_ADDRESS']='http://localhost:8265';from ray.job_submission import JobSubmissionClient;client = JobSubmissionClient();print(client.get_job_logs('${RAY_JOBID}'))" 2>&1 | tee ${LOG_PATH}/finetuning.log
|
||||
exit 1
|
||||
else
|
||||
echo "Finetuning succeeded."
|
||||
fi
|
||||
}
|
||||
|
||||
function stop_docker() {
|
||||
|
||||
179
SearchQnA/docker_compose/amd/gpu/rocm/README.md
Normal file
179
SearchQnA/docker_compose/amd/gpu/rocm/README.md
Normal file
@@ -0,0 +1,179 @@
|
||||
# Build and deploy SearchQnA Application on AMD GPU (ROCm)
|
||||
|
||||
## Build images
|
||||
|
||||
### Build Embedding Image
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/Dockerfile .
|
||||
```
|
||||
|
||||
### Build Retriever Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/web-retriever-chroma:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/web_retrievers/src/Dockerfile .
|
||||
```
|
||||
|
||||
### Build Rerank Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/rerankings/src/Dockerfile .
|
||||
```
|
||||
|
||||
### Build the LLM Docker Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile .
|
||||
```
|
||||
|
||||
### Build the MegaService Docker Image
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
cd GenAIExamples/SearchQnA
|
||||
docker build --no-cache -t opea/searchqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||
```
|
||||
|
||||
### Build the UI Docker Image
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/SearchQnA/ui
|
||||
docker build --no-cache -t opea/opea/searchqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
|
||||
```
|
||||
|
||||
## Deploy SearchQnA Application
|
||||
|
||||
### Features of Docker compose for AMD GPUs
|
||||
|
||||
1. Added forwarding of GPU devices to the container TGI service with instructions:
|
||||
|
||||
```yaml
|
||||
shm_size: 1g
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri/:/dev/dri/
|
||||
cap_add:
|
||||
- SYS_PTRACE
|
||||
group_add:
|
||||
- video
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
```
|
||||
|
||||
In this case, all GPUs are thrown. To reset a specific GPU, you need to use specific device names cardN and renderN.
|
||||
|
||||
For example:
|
||||
|
||||
```yaml
|
||||
shm_size: 1g
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri/card0:/dev/dri/card0
|
||||
- /dev/dri/render128:/dev/dri/render128
|
||||
cap_add:
|
||||
- SYS_PTRACE
|
||||
group_add:
|
||||
- video
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
```
|
||||
|
||||
To find out which GPU device IDs cardN and renderN correspond to the same GPU, use the GPU driver utility
|
||||
|
||||
### Go to the directory with the Docker compose file
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm
|
||||
```
|
||||
|
||||
### Set environments
|
||||
|
||||
In the file "GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh " it is necessary to set the required values. Parameter assignments are specified in the comments for each variable setting command
|
||||
|
||||
```bash
|
||||
chmod +x set_env.sh
|
||||
. set_env.sh
|
||||
```
|
||||
|
||||
### Run services
|
||||
|
||||
```
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
# Validate the MicroServices and MegaService
|
||||
|
||||
## Validate TEI service
|
||||
|
||||
```bash
|
||||
curl http://${SEARCH_HOST_IP}:3001/embed \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
## Validate Embedding service
|
||||
|
||||
```bash
|
||||
curl http://${SEARCH_HOST_IP}:3002/v1/embeddings\
|
||||
-X POST \
|
||||
-d '{"text":"hello"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
## Validate Web Retriever service
|
||||
|
||||
```bash
|
||||
export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
||||
curl http://${SEARCH_HOST_IP}:3003/v1/web_retrieval \
|
||||
-X POST \
|
||||
-d "{\"text\":\"What is the 2024 holiday schedule?\",\"embedding\":${your_embedding}}" \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
## Validate TEI Reranking service
|
||||
|
||||
```bash
|
||||
curl http://${SEARCH_HOST_IP}:3004/rerank \
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
## Validate Reranking service
|
||||
|
||||
```bash
|
||||
curl http://${SEARCH_HOST_IP}:3005/v1/reranking\
|
||||
-X POST \
|
||||
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
## Validate TGI service
|
||||
|
||||
```bash
|
||||
curl http://${SEARCH_HOST_IP}:3006/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
## Validate LLM service
|
||||
|
||||
```bash
|
||||
curl http://${SEARCH_HOST_IP}:3007/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
## Validate MegaService
|
||||
|
||||
```bash
|
||||
curl http://${SEARCH_HOST_IP}:3008/v1/searchqna -H "Content-Type: application/json" -d '{
|
||||
"messages": "What is the latest news? Give me also the source link.",
|
||||
"stream": "True"
|
||||
}'
|
||||
```
|
||||
173
SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml
Normal file
173
SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml
Normal file
@@ -0,0 +1,173 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
search-tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: search-tei-embedding-server
|
||||
ports:
|
||||
- "3001:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
|
||||
command: --model-id ${SEARCH_EMBEDDING_MODEL_ID} --auto-truncate
|
||||
search-embedding:
|
||||
image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
|
||||
container_name: search-embedding-server
|
||||
depends_on:
|
||||
- search-tei-embedding-service
|
||||
ports:
|
||||
- "3002:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_HOST_IP: ${SEARCH_HOST_IP}
|
||||
TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT}
|
||||
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
search-web-retriever:
|
||||
image: ${REGISTRY:-opea}/web-retriever:${TAG:-latest}
|
||||
container_name: search-web-retriever-server
|
||||
ports:
|
||||
- "3003:7077"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT}
|
||||
GOOGLE_API_KEY: ${SEARCH_GOOGLE_API_KEY}
|
||||
GOOGLE_CSE_ID: ${SEARCH_GOOGLE_CSE_ID}
|
||||
restart: unless-stopped
|
||||
search-tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: search-tei-reranking-server
|
||||
ports:
|
||||
- "3004:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${SEARCH_RERANK_MODEL_ID} --auto-truncate
|
||||
search-reranking:
|
||||
image: ${REGISTRY:-opea}/reranking:${TAG:-latest}
|
||||
container_name: search-reranking-server
|
||||
depends_on:
|
||||
- search-tei-reranking-service
|
||||
ports:
|
||||
- "3005:8000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${SEARCH_TEI_RERANKING_ENDPOINT}
|
||||
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
search-tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
|
||||
container_name: search-tgi-service
|
||||
ports:
|
||||
- "3006:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
|
||||
shm_size: 1g
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri/:/dev/dri/
|
||||
cap_add:
|
||||
- SYS_PTRACE
|
||||
group_add:
|
||||
- video
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
ipc: host
|
||||
command: --model-id ${SEARCH_LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
|
||||
search-llm:
|
||||
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
|
||||
container_name: search-llm-server
|
||||
depends_on:
|
||||
- search-tgi-service
|
||||
ports:
|
||||
- "3007:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${SEARCH_TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_ENDPOINT: ${SEARCH_TGI_LLM_ENDPOINT}
|
||||
LLM_MODEL_ID: ${SEARCH_LLM_MODEL_ID}
|
||||
LLM_MODEL: ${SEARCH_LLM_MODEL_ID}
|
||||
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
|
||||
OPENAI_API_KEY: ${SEARCH_OPENAI_API_KEY}
|
||||
restart: unless-stopped
|
||||
search-backend-server:
|
||||
image: ${REGISTRY:-opea}/searchqna:${TAG:-latest}
|
||||
container_name: search-backend-server
|
||||
depends_on:
|
||||
- search-tei-embedding-service
|
||||
- search-embedding
|
||||
- search-web-retriever
|
||||
- search-tei-reranking-service
|
||||
- search-reranking
|
||||
- search-tgi-service
|
||||
- search-llm
|
||||
ports:
|
||||
- "${SEARCH_BACKEND_SERVICE_PORT:-3008}:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${SEARCH_MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${SEARCH_EMBEDDING_SERVICE_HOST_IP}
|
||||
- WEB_RETRIEVER_SERVICE_HOST_IP=${SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP}
|
||||
- RERANK_SERVICE_HOST_IP=${SEARCH_RERANK_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_HOST_IP=${SEARCH_LLM_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_PORT=${SEARCH_EMBEDDING_SERVICE_PORT}
|
||||
- WEB_RETRIEVER_SERVICE_PORT=${SEARCH_WEB_RETRIEVER_SERVICE_PORT}
|
||||
- RERANK_SERVICE_PORT=${SEARCH_RERANK_SERVICE_PORT}
|
||||
- LLM_SERVICE_PORT=${SEARCH_LLM_SERVICE_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
search-ui-server:
|
||||
image: ${REGISTRY:-opea}/searchqna-ui:${TAG:-latest}
|
||||
container_name: search-ui-server
|
||||
depends_on:
|
||||
- search-backend-server
|
||||
ports:
|
||||
- "${SEARCH_FRONTEND_SERVICE_PORT:-5173}:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- BACKEND_BASE_URL=${SEARCH_BACKEND_SERVICE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
36
SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh
Normal file
36
SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh
Normal file
@@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
export SEARCH_HOST_IP=10.53.22.29
|
||||
export SEARCH_EXTERNAL_HOST_IP=68.69.180.77
|
||||
export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5'
|
||||
export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${SEARCH_HOST_IP}:3001
|
||||
export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base'
|
||||
export SEARCH_TEI_RERANKING_ENDPOINT=http://${SEARCH_HOST_IP}:3004
|
||||
export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export SEARCH_OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||
|
||||
export SEARCH_TGI_LLM_ENDPOINT=http://${SEARCH_HOST_IP}:3006
|
||||
export SEARCH_LLM_MODEL_ID='Intel/neural-chat-7b-v3-3'
|
||||
|
||||
export SEARCH_MEGA_SERVICE_HOST_IP=${SEARCH_EXTERNAL_HOST_IP}
|
||||
export SEARCH_EMBEDDING_SERVICE_HOST_IP=${SEARCH_HOST_IP}
|
||||
export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${SEARCH_HOST_IP}
|
||||
export SEARCH_RERANK_SERVICE_HOST_IP=${SEARCH_HOST_IP}
|
||||
export SEARCH_LLM_SERVICE_HOST_IP=${SEARCH_HOST_IP}
|
||||
|
||||
export SEARCH_EMBEDDING_SERVICE_PORT=3002
|
||||
export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003
|
||||
export SEARCH_RERANK_SERVICE_PORT=3005
|
||||
export SEARCH_LLM_SERVICE_PORT=3007
|
||||
|
||||
export SEARCH_FRONTEND_SERVICE_PORT=18143
|
||||
export SEARCH_BACKEND_SERVICE_PORT=18142
|
||||
export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${SEARCH_EXTERNAL_HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna
|
||||
|
||||
export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY}
|
||||
export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID}
|
||||
137
SearchQnA/tests/test_compose_on_rocm.sh
Normal file
137
SearchQnA/tests/test_compose_on_rocm.sh
Normal file
@@ -0,0 +1,137 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||
export REGISTRY=${IMAGE_REPO}
|
||||
export TAG=${IMAGE_TAG}
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="searchqna searchqna-ui embedding web-retriever reranking llm-textgen"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/amd/gpu/rocm/
|
||||
export SEARCH_HOST_IP=${ip_address}
|
||||
export SEARCH_EXTERNAL_HOST_IP=${ip_address}
|
||||
export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5'
|
||||
export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${SEARCH_HOST_IP}:3001
|
||||
export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base'
|
||||
export SEARCH_TEI_RERANKING_ENDPOINT=http://${SEARCH_HOST_IP}:3004
|
||||
export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export SEARCH_OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||
export SEARCH_TGI_LLM_ENDPOINT=http://${SEARCH_HOST_IP}:3006
|
||||
export SEARCH_LLM_MODEL_ID='Intel/neural-chat-7b-v3-3'
|
||||
export SEARCH_MEGA_SERVICE_HOST_IP=${SEARCH_EXTERNAL_HOST_IP}
|
||||
export SEARCH_EMBEDDING_SERVICE_HOST_IP=${SEARCH_HOST_IP}
|
||||
export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${SEARCH_HOST_IP}
|
||||
export SEARCH_RERANK_SERVICE_HOST_IP=${SEARCH_HOST_IP}
|
||||
export SEARCH_LLM_SERVICE_HOST_IP=${SEARCH_HOST_IP}
|
||||
export SEARCH_EMBEDDING_SERVICE_PORT=3002
|
||||
export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003
|
||||
export SEARCH_RERANK_SERVICE_PORT=3005
|
||||
export SEARCH_LLM_SERVICE_PORT=3007
|
||||
export SEARCH_FRONTEND_SERVICE_PORT=5173
|
||||
export SEARCH_BACKEND_SERVICE_PORT=3008
|
||||
export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${SEARCH_HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna
|
||||
export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY}
|
||||
export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID}
|
||||
|
||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs search-tgi-service > $LOG_PATH/search-tgi-service_start.log
|
||||
if grep -q Connected $LOG_PATH/search-tgi-service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
function validate_megaservice() {
|
||||
result=$(http_proxy="" curl http://${ip_address}:3008/v1/searchqna -XPOST -d '{"messages": "What is black myth wukong?", "stream": "False"}' -H 'Content-Type: application/json')
|
||||
echo $result
|
||||
|
||||
if [[ $result == *"the"* ]]; then
|
||||
docker logs search-web-retriever-server
|
||||
docker logs search-backend-server
|
||||
echo "Result correct."
|
||||
else
|
||||
docker logs search-web-retriever-server
|
||||
docker logs search-backend-server
|
||||
echo "Result wrong."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
function validate_frontend() {
|
||||
cd $WORKPATH/ui/svelte
|
||||
local conda_env_name="OPEA_e2e"
|
||||
export PATH=${HOME}/miniconda3/bin/:$PATH
|
||||
if conda info --envs | grep -q "$conda_env_name"; then
|
||||
echo "$conda_env_name exist!"
|
||||
else
|
||||
conda create -n ${conda_env_name} python=3.12 -y
|
||||
fi
|
||||
source activate ${conda_env_name}
|
||||
|
||||
sed -i "s/localhost/$ip_address/g" playwright.config.ts
|
||||
|
||||
conda install -c conda-forge nodejs=22.6.0 -y
|
||||
npm install && npm ci && npx playwright install --with-deps
|
||||
node -v && npm -v && pip list
|
||||
|
||||
exit_status=0
|
||||
npx playwright test || exit_status=$?
|
||||
|
||||
if [ $exit_status -ne 0 ]; then
|
||||
echo "[TEST INFO]: ---------frontend test failed---------"
|
||||
exit $exit_status
|
||||
else
|
||||
echo "[TEST INFO]: ---------frontend test passed---------"
|
||||
fi
|
||||
}
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/amd/gpu/rocm/
|
||||
docker compose stop && docker compose rm -f
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
stop_docker
|
||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||
start_services
|
||||
|
||||
validate_megaservice
|
||||
validate_frontend
|
||||
|
||||
stop_docker
|
||||
echo y | docker system prune
|
||||
|
||||
}
|
||||
|
||||
main
|
||||
@@ -1,41 +0,0 @@
|
||||
# Deploy Translation in Kubernetes Cluster
|
||||
|
||||
> [NOTE]
|
||||
> The following values must be set before you can deploy:
|
||||
> HUGGINGFACEHUB_API_TOKEN
|
||||
>
|
||||
> You can also customize the "MODEL_ID" if needed.
|
||||
>
|
||||
> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the Translation workload is running. Otherwise, you need to modify the `translation.yaml` file to change the `model-volume` to a directory that exists on the node.
|
||||
|
||||
## Deploy On Xeon
|
||||
|
||||
```
|
||||
cd GenAIExamples/Translation/kubernetes/intel/cpu/xeon/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" translation.yaml
|
||||
kubectl apply -f translation.yaml
|
||||
```
|
||||
|
||||
## Deploy On Gaudi
|
||||
|
||||
```
|
||||
cd GenAIExamples/Translation/kubernetes/intel/hpu/gaudi/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" translation.yaml
|
||||
kubectl apply -f translation.yaml
|
||||
```
|
||||
|
||||
## Verify Services
|
||||
|
||||
To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
|
||||
|
||||
Then run the command `kubectl port-forward svc/translation 8888:8888` to expose the Translation service for access.
|
||||
|
||||
Open another terminal and run the following command to verify the service if working:
|
||||
|
||||
```console
|
||||
curl http://localhost:8888/v1/translation \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
|
||||
```
|
||||
@@ -1,495 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: translation-tgi-config
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
data:
|
||||
MODEL_ID: "haoranxu/ALMA-13B"
|
||||
PORT: "2080"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
CUDA_GRAPHS: "0"
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: translation-llm-uservice-config
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://translation-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: translation-ui-config
|
||||
labels:
|
||||
app.kubernetes.io/name: translation-ui
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
data:
|
||||
BASE_URL: "/v1/translation"
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
default.conf: |+
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
listen [::]:80;
|
||||
|
||||
location /home {
|
||||
alias /usr/share/nginx/html/index.html;
|
||||
}
|
||||
|
||||
location / {
|
||||
proxy_pass http://translation-ui:5173;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
location /v1/translation {
|
||||
proxy_pass http://translation:8888;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
}
|
||||
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: translation-nginx-config
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: translation-ui
|
||||
labels:
|
||||
app.kubernetes.io/name: translation-ui
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 5173
|
||||
targetPort: ui
|
||||
protocol: TCP
|
||||
name: ui
|
||||
selector:
|
||||
app.kubernetes.io/name: translation-ui
|
||||
app.kubernetes.io/instance: translation
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: translation-llm-uservice
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: translation
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: translation-tgi
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 2080
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: translation
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: translation-nginx
|
||||
spec:
|
||||
ports:
|
||||
- port: 80
|
||||
protocol: TCP
|
||||
targetPort: 80
|
||||
selector:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app: translation-nginx
|
||||
type: NodePort
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: translation
|
||||
labels:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 8888
|
||||
targetPort: 8888
|
||||
protocol: TCP
|
||||
name: translation
|
||||
selector:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app: translation
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: translation-ui
|
||||
labels:
|
||||
app.kubernetes.io/name: translation-ui
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: translation-ui
|
||||
app.kubernetes.io/instance: translation
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: translation-ui
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: translation-ui
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: translation-ui-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "opea/translation-ui:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: ui
|
||||
containerPort: 80
|
||||
protocol: TCP
|
||||
resources:
|
||||
{}
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: translation-llm-uservice
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: translation
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: translation
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: translation
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: translation-llm-uservice-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-textgen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: translation-tgi
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: translation
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: translation
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: tgi
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: translation-tgi-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
emptyDir: {}
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: translation
|
||||
labels:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app: translation
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app: translation
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app: translation
|
||||
spec:
|
||||
securityContext:
|
||||
null
|
||||
containers:
|
||||
- name: translation
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: translation-llm-uservice
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/translation:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: translation
|
||||
containerPort: 8888
|
||||
protocol: TCP
|
||||
resources:
|
||||
null
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: translation-nginx
|
||||
labels:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app: translation-nginx
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app: translation-nginx
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app: translation-nginx
|
||||
spec:
|
||||
containers:
|
||||
- image: nginx:1.27.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: nginx
|
||||
volumeMounts:
|
||||
- mountPath: /etc/nginx/conf.d
|
||||
name: nginx-config-volume
|
||||
securityContext: {}
|
||||
volumes:
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: translation-nginx-config
|
||||
name: nginx-config-volume
|
||||
@@ -1,497 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: translation-tgi-config
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
data:
|
||||
MODEL_ID: "haoranxu/ALMA-13B"
|
||||
PORT: "2080"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
MAX_INPUT_LENGTH: "1024"
|
||||
MAX_TOTAL_TOKENS: "2048"
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: translation-llm-uservice-config
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://translation-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
LOGFLAG: ""
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: translation-ui-config
|
||||
labels:
|
||||
app.kubernetes.io/name: translation-ui
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
data:
|
||||
BASE_URL: "/v1/translation"
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
data:
|
||||
default.conf: |+
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
listen [::]:80;
|
||||
|
||||
location /home {
|
||||
alias /usr/share/nginx/html/index.html;
|
||||
}
|
||||
|
||||
location / {
|
||||
proxy_pass http://translation-ui:5173;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
location /v1/translation {
|
||||
proxy_pass http://translation;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
}
|
||||
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: translation-nginx-config
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: translation-ui
|
||||
labels:
|
||||
app.kubernetes.io/name: translation-ui
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 5173
|
||||
targetPort: ui
|
||||
protocol: TCP
|
||||
name: ui
|
||||
selector:
|
||||
app.kubernetes.io/name: translation-ui
|
||||
app.kubernetes.io/instance: translation
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: translation-llm-uservice
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: llm-uservice
|
||||
selector:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: translation
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: translation-tgi
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 2080
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: translation
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: translation-nginx
|
||||
spec:
|
||||
ports:
|
||||
- port: 80
|
||||
protocol: TCP
|
||||
targetPort: 80
|
||||
selector:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app: translation-nginx
|
||||
type: NodePort
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: translation
|
||||
labels:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 8888
|
||||
targetPort: 8888
|
||||
protocol: TCP
|
||||
name: translation
|
||||
selector:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app: translation
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: translation-ui
|
||||
labels:
|
||||
app.kubernetes.io/name: translation-ui
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: translation-ui
|
||||
app.kubernetes.io/instance: translation
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: translation-ui
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: translation-ui
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: translation-ui-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "opea/translation-ui:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: ui
|
||||
containerPort: 80
|
||||
protocol: TCP
|
||||
resources:
|
||||
{}
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: translation-llm-uservice
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: translation
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: translation
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: translation
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: translation-llm-uservice-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-textgen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: v1/health_check
|
||||
port: llm-uservice
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: translation-tgi
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "2.1.0"
|
||||
spec:
|
||||
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: translation
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: translation
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: tgi
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: translation-tgi-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 24
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
startupProbe:
|
||||
failureThreshold: 120
|
||||
initialDelaySeconds: 20
|
||||
periodSeconds: 5
|
||||
tcpSocket:
|
||||
port: http
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumes:
|
||||
- name: model-volume
|
||||
emptyDir: {}
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: translation
|
||||
labels:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app: translation
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app: translation
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app: translation
|
||||
spec:
|
||||
securityContext:
|
||||
null
|
||||
containers:
|
||||
- name: translation
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: translation-llm-uservice
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/translation:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: translation
|
||||
containerPort: 8888
|
||||
protocol: TCP
|
||||
resources:
|
||||
null
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: translation-nginx
|
||||
labels:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app.kubernetes.io/version: "v1.0"
|
||||
app: translation-nginx
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app: translation-nginx
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: translation
|
||||
app.kubernetes.io/instance: translation
|
||||
app: translation-nginx
|
||||
spec:
|
||||
containers:
|
||||
- image: nginx:1.27.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: nginx
|
||||
volumeMounts:
|
||||
- mountPath: /etc/nginx/conf.d
|
||||
name: nginx-config-volume
|
||||
securityContext: {}
|
||||
volumes:
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: translation-nginx-config
|
||||
name: nginx-config-volume
|
||||
@@ -23,6 +23,7 @@
|
||||
"@tailwindcss/typography": "0.5.7",
|
||||
"@types/debug": "4.1.7",
|
||||
"@types/node": "^20.12.13",
|
||||
"@types/pica": "^9.0.5",
|
||||
"@typescript-eslint/eslint-plugin": "^5.27.0",
|
||||
"@typescript-eslint/parser": "^5.27.0",
|
||||
"autoprefixer": "^10.4.16",
|
||||
@@ -51,6 +52,7 @@
|
||||
"flowbite-svelte-icons": "^1.4.0",
|
||||
"fuse.js": "^6.6.2",
|
||||
"lodash": "^4.17.21",
|
||||
"pica": "^9.0.1",
|
||||
"playwright": "^1.44.0",
|
||||
"ramda": "^0.29.0",
|
||||
"sse.js": "^0.6.1",
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 61 KiB |
BIN
VisualQnA/ui/svelte/src/lib/assets/imageData/extreme_ironing.png
Normal file
BIN
VisualQnA/ui/svelte/src/lib/assets/imageData/extreme_ironing.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 303 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 93 KiB |
BIN
VisualQnA/ui/svelte/src/lib/assets/imageData/waterview.png
Normal file
BIN
VisualQnA/ui/svelte/src/lib/assets/imageData/waterview.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 230 KiB |
@@ -31,9 +31,7 @@
|
||||
class={msg.role === 0
|
||||
? "flex w-full gap-3"
|
||||
: "flex w-full items-center gap-3"}
|
||||
data-testid={msg.role === 0
|
||||
? "display-answer"
|
||||
: "display-question"}
|
||||
data-testid={msg.role === 0 ? "display-answer" : "display-question"}
|
||||
>
|
||||
<div
|
||||
class={msg.role === 0
|
||||
@@ -44,10 +42,15 @@
|
||||
</div>
|
||||
<div class="group relative flex items-start">
|
||||
<div class="flex flex-col items-start">
|
||||
<img src={msg.imgSrc} alt="Uploaded Image" class="m-2 max-w-28 max-h-28" />
|
||||
|
||||
{#if msg.imgSrc}
|
||||
<img
|
||||
src={msg.imgSrc}
|
||||
alt="Uploaded Image"
|
||||
class="max-w-28 m-2 max-h-28"
|
||||
/>
|
||||
{/if}
|
||||
<p
|
||||
class="xl:max-w-[65vw] max-w-[60vw] items-start whitespace-pre-line break-keep text-[0.8rem] leading-5 sm:max-w-[50rem]"
|
||||
class="max-w-[60vw] items-start whitespace-pre-line break-keep text-[0.8rem] leading-5 sm:max-w-[50rem] xl:max-w-[65vw]"
|
||||
>
|
||||
{@html msg.content}
|
||||
</p>
|
||||
|
||||
@@ -5,93 +5,98 @@
|
||||
|
||||
<script>
|
||||
import { createEventDispatcher } from "svelte";
|
||||
import extreme_ironing from '$lib/assets/imageData/extreme_ironing.jpg';
|
||||
import waterview from '$lib/assets/imageData/waterview.jpg';
|
||||
import extreme_ironing from "$lib/assets/imageData/extreme_ironing.png";
|
||||
import waterview from "$lib/assets/imageData/waterview.png";
|
||||
import { base64ImageStore } from "$lib/shared/stores/common/Store";
|
||||
|
||||
let dispatch = createEventDispatcher();
|
||||
let dispatch = createEventDispatcher();
|
||||
|
||||
let images = [
|
||||
{
|
||||
id: 1,
|
||||
alt: 'Waterview',
|
||||
imgurl: waterview,
|
||||
prompt: 'What are the things I should be cautious about when I visit here?'
|
||||
},
|
||||
{
|
||||
id: 0,
|
||||
alt: 'Extreme Ironing',
|
||||
imgurl: extreme_ironing,
|
||||
prompt: 'What is unusual about this image?'
|
||||
}
|
||||
];
|
||||
let images = [
|
||||
{
|
||||
id: 1,
|
||||
alt: "Waterview",
|
||||
imgurl: waterview,
|
||||
prompt:
|
||||
"What are the things I should be cautious about when I visit here?",
|
||||
},
|
||||
{
|
||||
id: 0,
|
||||
alt: "Extreme Ironing",
|
||||
imgurl: extreme_ironing,
|
||||
prompt: "What is unusual about this image?",
|
||||
},
|
||||
];
|
||||
|
||||
let currentIndex = 0;
|
||||
let currentIndex = 0;
|
||||
|
||||
function nextImage() {
|
||||
currentIndex = (currentIndex + 1) % images.length;
|
||||
}
|
||||
function nextImage() {
|
||||
currentIndex = (currentIndex + 1) % images.length;
|
||||
}
|
||||
|
||||
function prevImage() {
|
||||
currentIndex = (currentIndex - 1 + images.length) % images.length;
|
||||
}
|
||||
function prevImage() {
|
||||
currentIndex = (currentIndex - 1 + images.length) % images.length;
|
||||
}
|
||||
|
||||
async function handleImageClick() {
|
||||
const imgUrl = images[currentIndex].imgurl;
|
||||
|
||||
async function handleImageClick() {
|
||||
const imgUrl = images[currentIndex].imgurl;
|
||||
const base64Data = await convertImageToBase64(imgUrl);
|
||||
const currentPrompt = images[currentIndex].prompt;
|
||||
dispatch("imagePrompt", { content: currentPrompt });
|
||||
base64ImageStore.set(base64Data);
|
||||
}
|
||||
const base64Data = await convertImageToBase64(imgUrl);
|
||||
|
||||
async function convertImageToBase64(url) {
|
||||
const response = await fetch(url);
|
||||
const blob = await response.blob();
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onloadend = () => resolve(reader.result);
|
||||
reader.onerror = reject;
|
||||
reader.readAsDataURL(blob);
|
||||
});
|
||||
}
|
||||
base64ImageStore.set(base64Data);
|
||||
|
||||
const currentPrompt = images[currentIndex].prompt;
|
||||
dispatch("imagePrompt", { content: currentPrompt });
|
||||
}
|
||||
|
||||
async function convertImageToBase64(url) {
|
||||
const response = await fetch(url);
|
||||
const blob = await response.blob();
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onloadend = () => resolve(reader.result);
|
||||
reader.onerror = reject;
|
||||
reader.readAsDataURL(blob);
|
||||
});
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="flex w-full flex-col gap-3 rounded-xl bg-white p-5 my-2">
|
||||
<p>Example</p>
|
||||
<div class="relative w-full max-w-4xl mx-auto">
|
||||
<button
|
||||
class="absolute left-0 top-1/2 transform -translate-y-1/2 z-10 w-8 h-8 rounded-full sm:w-10 sm:h-10 bg-white/30 dark:bg-gray-800/30 group-hover:bg-white/50 dark:group-hover:bg-gray-800/60 group-focus:ring-4 group-focus:ring-white dark:group-focus:ring-gray-800/70 group-focus:outline-none"
|
||||
on:click={prevImage}
|
||||
aria-label="Previous image"
|
||||
>
|
||||
❮
|
||||
</button>
|
||||
<div class="my-2 flex w-full flex-col gap-3 rounded-xl bg-white p-5">
|
||||
<p>Example</p>
|
||||
<div class="relative mx-auto w-full max-w-4xl">
|
||||
<button
|
||||
class="absolute left-0 top-1/2 z-10 h-8 w-8 -translate-y-1/2 transform rounded-full bg-white/30 group-hover:bg-white/50 group-focus:outline-none group-focus:ring-4 group-focus:ring-white dark:bg-gray-800/30 dark:group-hover:bg-gray-800/60 dark:group-focus:ring-gray-800/70 sm:h-10 sm:w-10"
|
||||
on:click={prevImage}
|
||||
aria-label="Previous image"
|
||||
>
|
||||
❮
|
||||
</button>
|
||||
|
||||
<div class="relative">
|
||||
<img
|
||||
src={images[currentIndex].imgurl}
|
||||
alt={images[currentIndex].alt}
|
||||
class="carousel-image w-full h-auto cursor-pointer"
|
||||
on:click={handleImageClick}
|
||||
/>
|
||||
<div class="absolute bottom-0 left-0 bg-opacity-55 bg-black text-white p-3 w-full">
|
||||
<p>{images[currentIndex].prompt}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="relative">
|
||||
<img
|
||||
src={images[currentIndex].imgurl}
|
||||
alt={images[currentIndex].alt}
|
||||
class="carousel-image h-auto w-full cursor-pointer"
|
||||
on:click={handleImageClick}
|
||||
/>
|
||||
<div
|
||||
class="absolute bottom-0 left-0 w-full bg-black bg-opacity-55 p-3 text-white"
|
||||
>
|
||||
<p>{images[currentIndex].prompt}</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<button
|
||||
class="absolute right-0 top-1/2 transform -translate-y-1/2 z-10 w-8 h-8 rounded-full sm:w-10 sm:h-10 bg-white/30 dark:bg-gray-800/30 group-hover:bg-white/50 dark:group-hover:bg-gray-800/60 group-focus:ring-4 group-focus:ring-white dark:group-focus:ring-gray-800/70 group-focus:outline-none"
|
||||
on:click={nextImage}
|
||||
aria-label="Next image"
|
||||
>
|
||||
❯
|
||||
</button>
|
||||
</div>
|
||||
<button
|
||||
class="absolute right-0 top-1/2 z-10 h-8 w-8 -translate-y-1/2 transform rounded-full bg-white/30 group-hover:bg-white/50 group-focus:outline-none group-focus:ring-4 group-focus:ring-white dark:bg-gray-800/30 dark:group-hover:bg-gray-800/60 dark:group-focus:ring-gray-800/70 sm:h-10 sm:w-10"
|
||||
on:click={nextImage}
|
||||
aria-label="Next image"
|
||||
>
|
||||
❯
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<style>
|
||||
.relative img {
|
||||
object-fit: cover;
|
||||
}
|
||||
.relative img {
|
||||
object-fit: cover;
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -6,11 +6,12 @@
|
||||
<script lang="ts">
|
||||
import { base64ImageStore } from "$lib/shared/stores/common/Store";
|
||||
import { Dropzone } from "flowbite-svelte";
|
||||
import Pica from 'pica';
|
||||
|
||||
let value = [];
|
||||
export let imageUrl = "";
|
||||
|
||||
$: if (imageUrl) {
|
||||
$: if (imageUrl !== "") {
|
||||
uploadImage();
|
||||
}
|
||||
|
||||
@@ -47,7 +48,7 @@
|
||||
};
|
||||
|
||||
const handleChange = (event) => {
|
||||
const files = event.target.files;
|
||||
const files = event.target.files;
|
||||
if (files.length > 0) {
|
||||
value = [files[0].name]; // Allow only one file selection
|
||||
readFileAsBase64(files[0]); // Convert to Base64
|
||||
@@ -55,14 +56,73 @@
|
||||
};
|
||||
|
||||
const readFileAsBase64 = (file) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = () => {
|
||||
const base64Data = reader.result; // Get Base64 data
|
||||
base64ImageStore.set(base64Data); // Store the Base64 string in the store
|
||||
imageUrl = URL.createObjectURL(file); // Keep the object URL for preview
|
||||
};
|
||||
reader.readAsDataURL(file); // Read the file as a Data URL
|
||||
};
|
||||
const reader = new FileReader();
|
||||
reader.onload = () => {
|
||||
const base64Data = reader.result;
|
||||
const fileType = file.type;
|
||||
|
||||
if (!fileType.includes("png")) {
|
||||
convertImageToPNG(base64Data); // Convert if not PNG
|
||||
} else {
|
||||
base64ImageStore.set(base64Data); // Store Base64
|
||||
}
|
||||
|
||||
imageUrl = URL.createObjectURL(file); // Create URL for preview
|
||||
};
|
||||
reader.readAsDataURL(file); // Read file as Data URL
|
||||
};
|
||||
|
||||
const convertImageToPNG = async (base64Data) => {
|
||||
if (!base64Data || !base64Data.startsWith("data:image/")) {
|
||||
console.error("Invalid Base64 data");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log("Starting image conversion...");
|
||||
|
||||
const img = new Image();
|
||||
img.src = base64Data;
|
||||
|
||||
img.onload = async () => {
|
||||
const canvas = document.createElement("canvas");
|
||||
const ctx = canvas.getContext("2d");
|
||||
let width = img.width;
|
||||
let height = img.height;
|
||||
|
||||
// Set resize factor to 1 (no scaling) to keep the original size
|
||||
const scaleFactor = 0.1; // Resize factor (keep original size)
|
||||
width = Math.floor(width * scaleFactor);
|
||||
height = Math.floor(height * scaleFactor);
|
||||
|
||||
canvas.width = width;
|
||||
canvas.height = height;
|
||||
|
||||
ctx.drawImage(img, 0, 0, width, height); // Draw the original image (no resizing)
|
||||
|
||||
const outputCanvas = document.createElement("canvas");
|
||||
outputCanvas.width = width;
|
||||
outputCanvas.height = height;
|
||||
|
||||
const pica = new Pica();
|
||||
|
||||
try {
|
||||
// Resize and compress the image using Pica
|
||||
await pica.resize(canvas, outputCanvas);
|
||||
|
||||
// Convert canvas to PNG format with data URL
|
||||
const pngDataUrl = outputCanvas.toDataURL("image/png", 0.8); // Adjust quality (0.9 is high, between 0-1)
|
||||
|
||||
// Store the Base64 PNG image
|
||||
base64ImageStore.set(pngDataUrl);
|
||||
} catch (err) {
|
||||
console.error("Error during image processing:", err);
|
||||
}
|
||||
};
|
||||
|
||||
img.onerror = (err) => {
|
||||
console.error("Error loading image:", err);
|
||||
};
|
||||
};
|
||||
|
||||
const showFiles = (files) => {
|
||||
if (files.length === 1) return files[0];
|
||||
|
||||
343
benchmark.py
Normal file
343
benchmark.py
Normal file
@@ -0,0 +1,343 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
import yaml
|
||||
from evals.benchmark.stresscli.commands.load_test import locust_runtests
|
||||
from kubernetes import client, config
|
||||
|
||||
# only support chatqna for now
|
||||
service_endpoints = {
|
||||
"chatqna": "/v1/chatqna",
|
||||
}
|
||||
|
||||
|
||||
def load_yaml(file_path):
|
||||
with open(file_path, "r") as f:
|
||||
data = yaml.safe_load(f)
|
||||
return data
|
||||
|
||||
|
||||
def construct_benchmark_config(test_suite_config):
|
||||
"""Extract relevant data from the YAML based on the specified test cases."""
|
||||
|
||||
return {
|
||||
"concurrency": test_suite_config.get("concurrency", []),
|
||||
"totoal_query_num": test_suite_config.get("user_queries", []),
|
||||
"duration:": test_suite_config.get("duration:", []),
|
||||
"query_num_per_concurrency": test_suite_config.get("query_num_per_concurrency", []),
|
||||
"possion": test_suite_config.get("possion", False),
|
||||
"possion_arrival_rate": test_suite_config.get("possion_arrival_rate", 1.0),
|
||||
"warmup_iterations": test_suite_config.get("warmup_iterations", 10),
|
||||
"seed": test_suite_config.get("seed", None),
|
||||
"test_cases": test_suite_config.get("test_cases", ["chatqnafixed"]),
|
||||
"user_queries": test_suite_config.get("user_queries", [1]),
|
||||
"query_token_size": test_suite_config.get("query_token_size", 128),
|
||||
"llm_max_token_size": test_suite_config.get("llm", {}).get("max_token_size", [128]),
|
||||
}
|
||||
|
||||
|
||||
def _get_cluster_ip(service_name, namespace="default"):
|
||||
"""Get the Cluster IP of a service in a Kubernetes cluster."""
|
||||
# Load the Kubernetes configuration
|
||||
config.load_kube_config() # or use config.load_incluster_config() if running inside a Kubernetes pod
|
||||
|
||||
# Create an API client for the core API (which handles services)
|
||||
v1 = client.CoreV1Api()
|
||||
|
||||
try:
|
||||
# Get the service object
|
||||
service = v1.read_namespaced_service(name=service_name, namespace=namespace)
|
||||
|
||||
# Extract the Cluster IP
|
||||
cluster_ip = service.spec.cluster_ip
|
||||
|
||||
# Extract the port number (assuming the first port, modify if necessary)
|
||||
if service.spec.ports:
|
||||
port_number = service.spec.ports[0].port # Get the first port number
|
||||
else:
|
||||
port_number = None
|
||||
|
||||
return cluster_ip, port_number
|
||||
except client.exceptions.ApiException as e:
|
||||
print(f"Error fetching service: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _get_service_ip(service_name, deployment_type="k8s", service_ip=None, service_port=None, namespace="default"):
|
||||
"""Get the service IP and port based on the deployment type.
|
||||
|
||||
Args:
|
||||
service_name (str): The name of the service.
|
||||
deployment_type (str): The type of deployment ("k8s" or "docker").
|
||||
service_ip (str): The IP address of the service (required for Docker deployment).
|
||||
service_port (int): The port of the service (required for Docker deployment).
|
||||
namespace (str): The namespace of the service (default is "default").
|
||||
|
||||
Returns:
|
||||
(str, int): The service IP and port.
|
||||
"""
|
||||
if deployment_type == "k8s":
|
||||
# Kubernetes IP and port retrieval logic
|
||||
svc_ip, port = _get_cluster_ip(service_name, namespace)
|
||||
elif deployment_type == "docker":
|
||||
# For Docker deployment, service_ip and service_port must be specified
|
||||
if not service_ip or not service_port:
|
||||
raise ValueError(
|
||||
"For Docker deployment, service_ip and service_port must be provided in the configuration."
|
||||
)
|
||||
svc_ip = service_ip
|
||||
port = service_port
|
||||
else:
|
||||
raise ValueError("Unsupported deployment type. Use 'k8s' or 'docker'.")
|
||||
|
||||
return svc_ip, port
|
||||
|
||||
|
||||
def _create_yaml_content(service, base_url, bench_target, test_phase, num_queries, test_params):
|
||||
"""Create content for the run.yaml file."""
|
||||
|
||||
# If a load shape includes the parameter concurrent_level,
|
||||
# the parameter will be passed to Locust to launch fixed
|
||||
# number of simulated users.
|
||||
concurrency = 1
|
||||
if num_queries >= 0:
|
||||
concurrency = max(1, num_queries // test_params["concurrent_level"])
|
||||
else:
|
||||
concurrency = test_params["concurrent_level"]
|
||||
|
||||
import importlib.util
|
||||
|
||||
package_name = "opea-eval"
|
||||
spec = importlib.util.find_spec(package_name)
|
||||
print(spec)
|
||||
|
||||
# get folder path of opea-eval
|
||||
eval_path = None
|
||||
import pkg_resources
|
||||
|
||||
for dist in pkg_resources.working_set:
|
||||
if "opea-eval" in dist.project_name:
|
||||
eval_path = dist.location
|
||||
if not eval_path:
|
||||
print("Fail to load opea-eval package. Please install it first.")
|
||||
exit(1)
|
||||
|
||||
yaml_content = {
|
||||
"profile": {
|
||||
"storage": {"hostpath": test_params["test_output_dir"]},
|
||||
"global-settings": {
|
||||
"tool": "locust",
|
||||
"locustfile": os.path.join(eval_path, "evals/benchmark/stresscli/locust/aistress.py"),
|
||||
"host": base_url,
|
||||
"stop-timeout": test_params["query_timeout"],
|
||||
"processes": 2,
|
||||
"namespace": test_params["namespace"],
|
||||
"bench-target": bench_target,
|
||||
"service-metric-collect": test_params["collect_service_metric"],
|
||||
"service-list": service.get("service_list", []),
|
||||
"dataset": service.get("dataset", "default"),
|
||||
"prompts": service.get("prompts", None),
|
||||
"max-output": service.get("max_output", 128),
|
||||
"seed": test_params.get("seed", None),
|
||||
"llm-model": test_params["llm_model"],
|
||||
"deployment-type": test_params["deployment_type"],
|
||||
"load-shape": test_params["load_shape"],
|
||||
},
|
||||
"runs": [{"name": test_phase, "users": concurrency, "max-request": num_queries}],
|
||||
}
|
||||
}
|
||||
|
||||
# For the following scenarios, test will stop after the specified run-time
|
||||
if test_params["run_time"] is not None and test_phase != "warmup":
|
||||
yaml_content["profile"]["global-settings"]["run-time"] = test_params["run_time"]
|
||||
|
||||
return yaml_content
|
||||
|
||||
|
||||
def _create_stresscli_confs(case_params, test_params, test_phase, num_queries, base_url, ts) -> str:
|
||||
"""Create a stresscli configuration file and persist it on disk."""
|
||||
stresscli_confs = []
|
||||
# Get the workload
|
||||
test_cases = test_params["test_cases"]
|
||||
for test_case in test_cases:
|
||||
stresscli_conf = {}
|
||||
print(test_case)
|
||||
if isinstance(test_case, str):
|
||||
bench_target = test_case
|
||||
elif isinstance(test_case, dict):
|
||||
bench_target = list(test_case.keys())[0]
|
||||
dataset_conf = test_case[bench_target]
|
||||
if bench_target == "chatqna_qlist_pubmed":
|
||||
max_lines = dataset_conf["dataset"].split("pub_med")[-1]
|
||||
stresscli_conf["envs"] = {"DATASET": f"pubmed_{max_lines}.txt", "MAX_LINES": max_lines}
|
||||
# Generate the content of stresscli configuration file
|
||||
stresscli_yaml = _create_yaml_content(case_params, base_url, bench_target, test_phase, num_queries, test_params)
|
||||
|
||||
# Dump the stresscli configuration file
|
||||
service_name = case_params.get("service_name")
|
||||
run_yaml_path = os.path.join(
|
||||
test_params["test_output_dir"], f"run_{service_name}_{ts}_{test_phase}_{num_queries}_{bench_target}.yaml"
|
||||
)
|
||||
with open(run_yaml_path, "w") as yaml_file:
|
||||
yaml.dump(stresscli_yaml, yaml_file)
|
||||
stresscli_conf["run_yaml_path"] = run_yaml_path
|
||||
stresscli_confs.append(stresscli_conf)
|
||||
return stresscli_confs
|
||||
|
||||
|
||||
def create_stresscli_confs(service, base_url, test_suite_config, index):
|
||||
"""Create and save the run.yaml file for the service being tested."""
|
||||
os.makedirs(test_suite_config["test_output_dir"], exist_ok=True)
|
||||
|
||||
stresscli_confs = []
|
||||
|
||||
# Add YAML configuration of stresscli for warm-ups
|
||||
warm_ups = test_suite_config["warm_ups"]
|
||||
if warm_ups is not None and warm_ups > 0:
|
||||
stresscli_confs.extend(_create_stresscli_confs(service, test_suite_config, "warmup", warm_ups, base_url, index))
|
||||
|
||||
# Add YAML configuration of stresscli for benchmark
|
||||
user_queries_lst = test_suite_config["user_queries"]
|
||||
if user_queries_lst is None or len(user_queries_lst) == 0:
|
||||
# Test stop is controlled by run time
|
||||
stresscli_confs.extend(_create_stresscli_confs(service, test_suite_config, "benchmark", -1, base_url, index))
|
||||
else:
|
||||
# Test stop is controlled by request count
|
||||
for user_queries in user_queries_lst:
|
||||
stresscli_confs.extend(
|
||||
_create_stresscli_confs(service, test_suite_config, "benchmark", user_queries, base_url, index)
|
||||
)
|
||||
|
||||
return stresscli_confs
|
||||
|
||||
|
||||
def _run_service_test(example, service, test_suite_config):
|
||||
"""Run the test for a specific service and example."""
|
||||
print(f"[OPEA BENCHMARK] 🚀 Example: [ {example} ] Service: [ {service.get('service_name')} ], Running test...")
|
||||
|
||||
# Get the service name
|
||||
service_name = service.get("service_name")
|
||||
|
||||
# Get the deployment type from the test suite configuration
|
||||
deployment_type = test_suite_config.get("deployment_type", "k8s")
|
||||
|
||||
# Get the service IP and port based on deployment type
|
||||
svc_ip, port = _get_service_ip(
|
||||
service_name,
|
||||
deployment_type,
|
||||
test_suite_config.get("service_ip"),
|
||||
test_suite_config.get("service_port"),
|
||||
test_suite_config.get("namespace"),
|
||||
)
|
||||
|
||||
base_url = f"http://{svc_ip}:{port}"
|
||||
endpoint = service_endpoints[example]
|
||||
url = f"{base_url}{endpoint}"
|
||||
print(f"[OPEA BENCHMARK] 🚀 Running test for {service_name} at {url}")
|
||||
|
||||
# Generate a unique index based on the current time
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
# Create the run.yaml for the service
|
||||
stresscli_confs = create_stresscli_confs(service, base_url, test_suite_config, timestamp)
|
||||
|
||||
# Do benchmark in for-loop for different user queries
|
||||
output_folders = []
|
||||
for index, stresscli_conf in enumerate(stresscli_confs, start=1):
|
||||
run_yaml_path = stresscli_conf["run_yaml_path"]
|
||||
print(f"[OPEA BENCHMARK] 🚀 The {index} time test is running, run yaml: {run_yaml_path}...")
|
||||
os.environ["MAX_TOKENS"] = str(service.get("max_output"))
|
||||
if stresscli_conf.get("envs") is not None:
|
||||
for key, value in stresscli_conf.get("envs").items():
|
||||
os.environ[key] = value
|
||||
|
||||
output_folders.append(locust_runtests(None, run_yaml_path))
|
||||
|
||||
print(f"[OPEA BENCHMARK] 🚀 Test completed for {service_name} at {url}")
|
||||
return output_folders
|
||||
|
||||
|
||||
def run_benchmark(benchmark_config, chart_name, namespace, llm_model=None, report=False):
|
||||
# If llm_model is None or an empty string, set to default value
|
||||
if not llm_model:
|
||||
llm_model = "Qwen/Qwen2.5-Coder-7B-Instruct"
|
||||
|
||||
# Extract data
|
||||
parsed_data = construct_benchmark_config(benchmark_config)
|
||||
test_suite_config = {
|
||||
"user_queries": parsed_data["user_queries"], # num of user queries
|
||||
"random_prompt": False, # whether to use random prompt, set to False by default
|
||||
"run_time": "60m", # The max total run time for the test suite, set to 60m by default
|
||||
"collect_service_metric": False, # whether to collect service metrics, set to False by default
|
||||
"llm_model": llm_model, # The LLM model used for the test
|
||||
"deployment_type": "k8s", # Default is "k8s", can also be "docker"
|
||||
"service_ip": None, # Leave as None for k8s, specify for Docker
|
||||
"service_port": None, # Leave as None for k8s, specify for Docker
|
||||
"test_output_dir": os.getcwd() + "/benchmark_output", # The directory to store the test output
|
||||
"load_shape": {
|
||||
"name": "constant",
|
||||
"params": {"constant": {"concurrent_level": 4}, "poisson": {"arrival_rate": 1.0}},
|
||||
},
|
||||
"concurrent_level": 4,
|
||||
"arrival_rate": 1.0,
|
||||
"query_timeout": 120,
|
||||
"warm_ups": parsed_data["warmup_iterations"],
|
||||
"seed": parsed_data["seed"],
|
||||
"namespace": namespace,
|
||||
"test_cases": parsed_data["test_cases"],
|
||||
"llm_max_token_size": parsed_data["llm_max_token_size"],
|
||||
}
|
||||
|
||||
dataset = None
|
||||
query_data = None
|
||||
|
||||
# Do benchmark in for-loop for different llm_max_token_size
|
||||
for llm_max_token in parsed_data["llm_max_token_size"]:
|
||||
print(f"[OPEA BENCHMARK] 🚀 Run benchmark on {dataset} with llm max-output-token {llm_max_token}.")
|
||||
case_data = {}
|
||||
# Support chatqna only for now
|
||||
if chart_name == "chatqna":
|
||||
case_data = {
|
||||
"run_test": True,
|
||||
"service_name": "chatqna",
|
||||
"service_list": [
|
||||
"chatqna",
|
||||
"chatqna-chatqna-ui",
|
||||
"chatqna-data-prep",
|
||||
"chatqna-nginx",
|
||||
"chatqna-redis-vector-db",
|
||||
"chatqna-retriever-usvc",
|
||||
"chatqna-tei",
|
||||
"chatqna-teirerank",
|
||||
"chatqna-tgi",
|
||||
],
|
||||
"test_cases": parsed_data["test_cases"],
|
||||
# Activate if random_prompt=true: leave blank = default dataset(WebQuestions) or sharegpt
|
||||
"prompts": query_data,
|
||||
"max_output": llm_max_token, # max number of output tokens
|
||||
"k": 1, # number of retrieved documents
|
||||
}
|
||||
output_folder = _run_service_test(chart_name, case_data, test_suite_config)
|
||||
|
||||
print(f"[OPEA BENCHMARK] 🚀 Test Finished. Output saved in {output_folder}.")
|
||||
|
||||
if report:
|
||||
print(output_folder)
|
||||
all_results = dict()
|
||||
for folder in output_folder:
|
||||
from evals.benchmark.stresscli.commands.report import get_report_results
|
||||
|
||||
results = get_report_results(folder)
|
||||
all_results[folder] = results
|
||||
print(f"results = {results}\n")
|
||||
|
||||
return all_results
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
benchmark_config = load_yaml("./benchmark.yaml")
|
||||
run_benchmark(benchmark_config=benchmark_config, chart_name="chatqna", namespace="deploy-benchmark")
|
||||
674
deploy.py
Normal file
674
deploy.py
Normal file
@@ -0,0 +1,674 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from enum import Enum, auto
|
||||
|
||||
import yaml
|
||||
|
||||
################################################################################
|
||||
# #
|
||||
# HELM VALUES GENERATION SECTION #
|
||||
# #
|
||||
################################################################################
|
||||
|
||||
|
||||
def configure_node_selectors(values, node_selector, deploy_config):
|
||||
"""Configure node selectors for all services."""
|
||||
for service_name, config in deploy_config["services"].items():
|
||||
if service_name == "backend":
|
||||
values["nodeSelector"] = {key: value for key, value in node_selector.items()}
|
||||
elif service_name == "llm":
|
||||
engine = config.get("engine", "tgi")
|
||||
values[engine] = {"nodeSelector": {key: value for key, value in node_selector.items()}}
|
||||
else:
|
||||
values[service_name] = {"nodeSelector": {key: value for key, value in node_selector.items()}}
|
||||
return values
|
||||
|
||||
|
||||
def configure_replica(values, deploy_config):
|
||||
"""Get replica configuration based on example type and node count."""
|
||||
for service_name, config in deploy_config["services"].items():
|
||||
if not config.get("replicaCount"):
|
||||
continue
|
||||
|
||||
if service_name == "llm":
|
||||
engine = config.get("engine", "tgi")
|
||||
values[engine]["replicaCount"] = config["replicaCount"]
|
||||
elif service_name == "backend":
|
||||
values["replicaCount"] = config["replicaCount"]
|
||||
else:
|
||||
values[service_name]["replicaCount"] = config["replicaCount"]
|
||||
|
||||
return values
|
||||
|
||||
|
||||
def get_output_filename(num_nodes, with_rerank, example_type, device, action_type):
|
||||
"""Generate output filename based on configuration."""
|
||||
rerank_suffix = "with-rerank-" if with_rerank else ""
|
||||
action_suffix = "deploy-" if action_type == 0 else "update-" if action_type == 1 else ""
|
||||
|
||||
return f"{example_type}-{num_nodes}-{device}-{action_suffix}{rerank_suffix}values.yaml"
|
||||
|
||||
|
||||
def configure_resources(values, deploy_config):
|
||||
"""Configure resources when tuning is enabled."""
|
||||
resource_configs = []
|
||||
|
||||
for service_name, config in deploy_config["services"].items():
|
||||
resources = {}
|
||||
if deploy_config["device"] == "gaudi" and config.get("cards_per_instance", 0) > 1:
|
||||
resources = {
|
||||
"limits": {"habana.ai/gaudi": config["cards_per_instance"]},
|
||||
"requests": {"habana.ai/gaudi": config["cards_per_instance"]},
|
||||
}
|
||||
else:
|
||||
limits = {}
|
||||
requests = {}
|
||||
|
||||
# Only add CPU if cores_per_instance has a value
|
||||
if config.get("cores_per_instance"):
|
||||
limits["cpu"] = config["cores_per_instance"]
|
||||
requests["cpu"] = config["cores_per_instance"]
|
||||
|
||||
# Only add memory if memory_capacity has a value
|
||||
if config.get("memory_capacity"):
|
||||
limits["memory"] = config["memory_capacity"]
|
||||
requests["memory"] = config["memory_capacity"]
|
||||
|
||||
# Only create resources if we have any limits/requests
|
||||
if limits and requests:
|
||||
resources["limits"] = limits
|
||||
resources["requests"] = requests
|
||||
|
||||
if resources:
|
||||
if service_name == "llm":
|
||||
engine = config.get("engine", "tgi")
|
||||
resource_configs.append(
|
||||
{
|
||||
"name": engine,
|
||||
"resources": resources,
|
||||
}
|
||||
)
|
||||
else:
|
||||
resource_configs.append(
|
||||
{
|
||||
"name": service_name,
|
||||
"resources": resources,
|
||||
}
|
||||
)
|
||||
|
||||
for config in [r for r in resource_configs if r]:
|
||||
service_name = config["name"]
|
||||
if service_name == "backend":
|
||||
values["resources"] = config["resources"]
|
||||
elif service_name in values:
|
||||
values[service_name]["resources"] = config["resources"]
|
||||
|
||||
return values
|
||||
|
||||
|
||||
def configure_extra_cmd_args(values, deploy_config):
|
||||
"""Configure extra command line arguments for services."""
|
||||
for service_name, config in deploy_config["services"].items():
|
||||
extra_cmd_args = []
|
||||
|
||||
for param in [
|
||||
"max_batch_size",
|
||||
"max_input_length",
|
||||
"max_total_tokens",
|
||||
"max_batch_total_tokens",
|
||||
"max_batch_prefill_tokens",
|
||||
]:
|
||||
if config.get(param):
|
||||
extra_cmd_args.extend([f"--{param.replace('_', '-')}", str(config[param])])
|
||||
|
||||
if extra_cmd_args:
|
||||
if service_name == "llm":
|
||||
engine = config.get("engine", "tgi")
|
||||
if engine not in values:
|
||||
values[engine] = {}
|
||||
values[engine]["extraCmdArgs"] = extra_cmd_args
|
||||
else:
|
||||
if service_name not in values:
|
||||
values[service_name] = {}
|
||||
values[service_name]["extraCmdArgs"] = extra_cmd_args
|
||||
|
||||
return values
|
||||
|
||||
|
||||
def configure_models(values, deploy_config):
|
||||
"""Configure model settings for services."""
|
||||
for service_name, config in deploy_config["services"].items():
|
||||
# Skip if no model_id defined or service is disabled
|
||||
if not config.get("model_id") or config.get("enabled") is False:
|
||||
continue
|
||||
|
||||
if service_name == "llm":
|
||||
# For LLM service, use its engine as the key
|
||||
engine = config.get("engine", "tgi")
|
||||
values[engine]["LLM_MODEL_ID"] = config.get("model_id")
|
||||
elif service_name == "tei":
|
||||
values[service_name]["EMBEDDING_MODEL_ID"] = config.get("model_id")
|
||||
elif service_name == "teirerank":
|
||||
values[service_name]["RERANK_MODEL_ID"] = config.get("model_id")
|
||||
|
||||
return values
|
||||
|
||||
|
||||
def configure_rerank(values, with_rerank, deploy_config, example_type, node_selector):
|
||||
"""Configure rerank service."""
|
||||
if with_rerank:
|
||||
if "teirerank" not in values:
|
||||
values["teirerank"] = {"nodeSelector": {key: value for key, value in node_selector.items()}}
|
||||
elif "nodeSelector" not in values["teirerank"]:
|
||||
values["teirerank"]["nodeSelector"] = {key: value for key, value in node_selector.items()}
|
||||
else:
|
||||
if example_type == "chatqna":
|
||||
values["image"] = {"repository": "opea/chatqna-without-rerank"}
|
||||
if "teirerank" not in values:
|
||||
values["teirerank"] = {"enabled": False}
|
||||
elif "enabled" not in values["teirerank"]:
|
||||
values["teirerank"]["enabled"] = False
|
||||
return values
|
||||
|
||||
|
||||
def generate_helm_values(example_type, deploy_config, chart_dir, action_type, node_selector=None):
|
||||
"""Create a values.yaml file based on the provided configuration."""
|
||||
if deploy_config is None:
|
||||
raise ValueError("deploy_config is required")
|
||||
|
||||
# Ensure the chart_dir exists
|
||||
if not os.path.exists(chart_dir):
|
||||
return {"status": "false", "message": f"Chart directory {chart_dir} does not exist"}
|
||||
|
||||
num_nodes = deploy_config.get("node", 1)
|
||||
with_rerank = deploy_config["services"].get("teirerank", {}).get("enabled", False)
|
||||
|
||||
print(f"Generating values for {example_type} example")
|
||||
print(f"with_rerank: {with_rerank}")
|
||||
print(f"num_nodes: {num_nodes}")
|
||||
print(f"node_selector: {node_selector}")
|
||||
|
||||
# Initialize base values
|
||||
values = {
|
||||
"global": {
|
||||
"HUGGINGFACEHUB_API_TOKEN": deploy_config.get("HUGGINGFACEHUB_API_TOKEN", ""),
|
||||
"modelUseHostPath": deploy_config.get("modelUseHostPath", ""),
|
||||
}
|
||||
}
|
||||
|
||||
# Configure components
|
||||
values = configure_node_selectors(values, node_selector or {}, deploy_config)
|
||||
values = configure_rerank(values, with_rerank, deploy_config, example_type, node_selector or {})
|
||||
values = configure_replica(values, deploy_config)
|
||||
values = configure_resources(values, deploy_config)
|
||||
values = configure_extra_cmd_args(values, deploy_config)
|
||||
values = configure_models(values, deploy_config)
|
||||
|
||||
device = deploy_config.get("device", "unknown")
|
||||
|
||||
# Generate and write YAML file
|
||||
filename = get_output_filename(num_nodes, with_rerank, example_type, device, action_type)
|
||||
yaml_string = yaml.dump(values, default_flow_style=False)
|
||||
|
||||
filepath = os.path.join(chart_dir, filename)
|
||||
|
||||
# Write the YAML data to the file
|
||||
with open(filepath, "w") as file:
|
||||
file.write(yaml_string)
|
||||
|
||||
print(f"YAML file {filepath} has been generated.")
|
||||
return {"status": "success", "filepath": filepath}
|
||||
|
||||
|
||||
################################################################################
|
||||
# #
|
||||
# DEPLOYMENT SECTION #
|
||||
# #
|
||||
################################################################################
|
||||
|
||||
|
||||
def run_kubectl_command(command):
|
||||
"""Run a kubectl command and return the output."""
|
||||
try:
|
||||
result = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
return result.stdout
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error running command: {command}\n{e.stderr}")
|
||||
exit(1)
|
||||
|
||||
|
||||
def get_all_nodes():
|
||||
"""Get the list of all nodes in the Kubernetes cluster."""
|
||||
command = ["kubectl", "get", "nodes", "-o", "json"]
|
||||
output = run_kubectl_command(command)
|
||||
nodes = json.loads(output)
|
||||
return [node["metadata"]["name"] for node in nodes["items"]]
|
||||
|
||||
|
||||
def add_label_to_node(node_name, label):
|
||||
"""Add a label to the specified node."""
|
||||
command = ["kubectl", "label", "node", node_name, label, "--overwrite"]
|
||||
print(f"Labeling node {node_name} with {label}...")
|
||||
run_kubectl_command(command)
|
||||
print(f"Label {label} added to node {node_name} successfully.")
|
||||
|
||||
|
||||
def add_labels_to_nodes(node_count=None, label=None, node_names=None):
|
||||
"""Add a label to the specified number of nodes or to specified nodes."""
|
||||
|
||||
if node_names:
|
||||
# Add label to the specified nodes
|
||||
for node_name in node_names:
|
||||
add_label_to_node(node_name, label)
|
||||
else:
|
||||
# Fetch the node list and label the specified number of nodes
|
||||
all_nodes = get_all_nodes()
|
||||
if node_count is None or node_count > len(all_nodes):
|
||||
print(f"Error: Node count exceeds the number of available nodes ({len(all_nodes)} available).")
|
||||
sys.exit(1)
|
||||
|
||||
selected_nodes = all_nodes[:node_count]
|
||||
for node_name in selected_nodes:
|
||||
add_label_to_node(node_name, label)
|
||||
|
||||
|
||||
def clear_labels_from_nodes(label, node_names=None):
|
||||
"""Clear the specified label from specific nodes if provided, otherwise from all nodes."""
|
||||
label_key = label.split("=")[0] # Extract key from 'key=value' format
|
||||
|
||||
# If specific nodes are provided, use them; otherwise, get all nodes
|
||||
nodes_to_clear = node_names if node_names else get_all_nodes()
|
||||
|
||||
for node_name in nodes_to_clear:
|
||||
# Check if the node has the label by inspecting its metadata
|
||||
command = ["kubectl", "get", "node", node_name, "-o", "json"]
|
||||
node_info = run_kubectl_command(command)
|
||||
node_metadata = json.loads(node_info)
|
||||
|
||||
# Check if the label exists on this node
|
||||
labels = node_metadata["metadata"].get("labels", {})
|
||||
if label_key in labels:
|
||||
# Remove the label from the node
|
||||
command = ["kubectl", "label", "node", node_name, f"{label_key}-"]
|
||||
print(f"Removing label {label_key} from node {node_name}...")
|
||||
run_kubectl_command(command)
|
||||
print(f"Label {label_key} removed from node {node_name} successfully.")
|
||||
else:
|
||||
print(f"Label {label_key} not found on node {node_name}, skipping.")
|
||||
|
||||
|
||||
def get_hw_values_file(deploy_config, chart_dir):
|
||||
"""Get the hardware-specific values file based on the deploy configuration."""
|
||||
device_type = deploy_config.get("device", "cpu")
|
||||
print(f"Device type is {device_type}. Using existing Helm chart values files...")
|
||||
if device_type == "cpu":
|
||||
print(f"Device type is {device_type}. Using existing Helm chart values files.")
|
||||
return None
|
||||
|
||||
llm_engine = deploy_config.get("services", {}).get("llm", {}).get("engine", "tgi")
|
||||
version = deploy_config.get("version", "1.1.0")
|
||||
|
||||
if os.path.isdir(chart_dir):
|
||||
# Determine which values file to use based on version
|
||||
if version in ["1.0.0", "1.1.0"]:
|
||||
hw_values_file = os.path.join(chart_dir, f"{device_type}-values.yaml")
|
||||
else:
|
||||
hw_values_file = os.path.join(chart_dir, f"{device_type}-{llm_engine}-values.yaml")
|
||||
|
||||
if not os.path.exists(hw_values_file):
|
||||
print(f"Warning: {hw_values_file} not found")
|
||||
hw_values_file = None
|
||||
else:
|
||||
print(f"Device-specific values file found: {hw_values_file}")
|
||||
else:
|
||||
print(f"Error: Could not find directory for {chart_dir}")
|
||||
hw_values_file = None
|
||||
|
||||
return hw_values_file
|
||||
|
||||
|
||||
def install_helm_release(release_name, chart_name, namespace, hw_values_file, deploy_values_file):
|
||||
"""Deploy a Helm release with a specified name and chart.
|
||||
|
||||
Parameters:
|
||||
- release_name: The name of the Helm release.
|
||||
- chart_name: The Helm chart name or path.
|
||||
- namespace: The Kubernetes namespace for deployment.
|
||||
- hw_values_file: The values file for hw specific
|
||||
- deploy_values_file: The values file for deployment.
|
||||
"""
|
||||
|
||||
# Check if the namespace exists; if not, create it
|
||||
try:
|
||||
command = ["kubectl", "get", "namespace", namespace]
|
||||
subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
except subprocess.CalledProcessError:
|
||||
print(f"Namespace '{namespace}' does not exist. Creating it...")
|
||||
command = ["kubectl", "create", "namespace", namespace]
|
||||
subprocess.run(command, check=True)
|
||||
print(f"Namespace '{namespace}' created successfully.")
|
||||
|
||||
try:
|
||||
# Prepare the Helm install command
|
||||
command = ["helm", "install", release_name, chart_name, "--namespace", namespace]
|
||||
|
||||
# Append values files in order
|
||||
if hw_values_file:
|
||||
command.extend(["-f", hw_values_file])
|
||||
if deploy_values_file:
|
||||
command.extend(["-f", deploy_values_file])
|
||||
|
||||
# Execute the Helm install command
|
||||
print(f"Running command: {' '.join(command)}")
|
||||
subprocess.run(command, check=True)
|
||||
print("Deployment initiated successfully.")
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error occurred while deploying Helm release: {e}")
|
||||
|
||||
|
||||
def uninstall_helm_release(release_name, namespace=None):
|
||||
"""Uninstall a Helm release and clean up resources, optionally delete the namespace if not 'default'."""
|
||||
# Default to 'default' namespace if none is specified
|
||||
if not namespace:
|
||||
namespace = "default"
|
||||
|
||||
try:
|
||||
# Uninstall the Helm release
|
||||
command = ["helm", "uninstall", release_name, "--namespace", namespace]
|
||||
print(f"Uninstalling Helm release {release_name} in namespace {namespace}...")
|
||||
run_kubectl_command(command)
|
||||
print(f"Helm release {release_name} uninstalled successfully.")
|
||||
|
||||
# If the namespace is specified and not 'default', delete it
|
||||
if namespace != "default":
|
||||
print(f"Deleting namespace {namespace}...")
|
||||
delete_namespace_command = ["kubectl", "delete", "namespace", namespace]
|
||||
run_kubectl_command(delete_namespace_command)
|
||||
print(f"Namespace {namespace} deleted successfully.")
|
||||
else:
|
||||
print("Namespace is 'default', skipping deletion.")
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error occurred while uninstalling Helm release or deleting namespace: {e}")
|
||||
|
||||
|
||||
def update_service(release_name, chart_name, namespace, hw_values_file, deploy_values_file, update_values_file):
|
||||
"""Update the deployment using helm upgrade with new values.
|
||||
|
||||
Args:
|
||||
release_name: The helm release name
|
||||
namespace: The kubernetes namespace
|
||||
deploy_config: The deployment configuration
|
||||
chart_name: The chart name for the deployment
|
||||
"""
|
||||
|
||||
# Construct helm upgrade command
|
||||
command = [
|
||||
"helm",
|
||||
"upgrade",
|
||||
release_name,
|
||||
chart_name,
|
||||
"--namespace",
|
||||
namespace,
|
||||
"-f",
|
||||
hw_values_file,
|
||||
"-f",
|
||||
deploy_values_file,
|
||||
"-f",
|
||||
update_values_file,
|
||||
]
|
||||
# Execute helm upgrade
|
||||
print(f"Running command: {' '.join(command)}")
|
||||
run_kubectl_command(command)
|
||||
print("Deployment updated successfully")
|
||||
|
||||
|
||||
def read_deploy_config(config_path):
|
||||
"""Read and parse the deploy config file.
|
||||
|
||||
Args:
|
||||
config_path: Path to the deploy config file
|
||||
|
||||
Returns:
|
||||
The parsed deploy config dictionary or None if failed
|
||||
"""
|
||||
try:
|
||||
with open(config_path, "r") as f:
|
||||
return yaml.safe_load(f)
|
||||
except Exception as e:
|
||||
print(f"Failed to load deploy config: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
def check_deployment_ready(release_name, namespace, timeout=300, interval=5, logfile="deployment.log"):
|
||||
"""Wait until all pods in the deployment are running and ready.
|
||||
|
||||
Args:
|
||||
namespace: The Kubernetes namespace
|
||||
timeout: The maximum time to wait in seconds (default 120 seconds)
|
||||
interval: The interval between checks in seconds (default 5 seconds)
|
||||
logfile: The file to log output to (default 'deployment.log')
|
||||
|
||||
Returns:
|
||||
0 if success, 1 if failure (timeout reached)
|
||||
"""
|
||||
try:
|
||||
# Get the list of deployments in the namespace
|
||||
cmd = ["kubectl", "-n", namespace, "get", "deployments", "-o", "jsonpath='{.items[*].metadata.name}'"]
|
||||
deployments_output = subprocess.check_output(cmd, text=True)
|
||||
deployments = deployments_output.strip().split()
|
||||
|
||||
# Strip the first and last elements of single quotes if present
|
||||
deployments[0] = deployments[0].strip("'")
|
||||
deployments[-1] = deployments[-1].strip("'")
|
||||
|
||||
with open(logfile, "a") as log:
|
||||
log.write(f"Found deployments: {', '.join(deployments)}\n")
|
||||
|
||||
timer = 0
|
||||
|
||||
# Loop through each deployment to check its readiness
|
||||
for deployment_name in deployments:
|
||||
|
||||
if "-" not in deployment_name or "ui" in deployment_name or "nginx" in deployment_name:
|
||||
continue
|
||||
|
||||
instance_name = deployment_name.split("-", 1)[0]
|
||||
app_name = deployment_name.split("-", 1)[1]
|
||||
|
||||
if instance_name != release_name:
|
||||
continue
|
||||
|
||||
cmd = ["kubectl", "-n", namespace, "get", "deployment", deployment_name, "-o", "jsonpath={.spec.replicas}"]
|
||||
desired_replicas = int(subprocess.check_output(cmd, text=True).strip())
|
||||
|
||||
with open(logfile, "a") as log:
|
||||
log.write(f"Checking deployment '{deployment_name}' with desired replicas: {desired_replicas}\n")
|
||||
|
||||
while True:
|
||||
cmd = [
|
||||
"kubectl",
|
||||
"-n",
|
||||
namespace,
|
||||
"get",
|
||||
"pods",
|
||||
"-l",
|
||||
f"app.kubernetes.io/instance={instance_name}",
|
||||
"-l",
|
||||
f"app.kubernetes.io/name={app_name}",
|
||||
"--field-selector=status.phase=Running",
|
||||
"-o",
|
||||
"json",
|
||||
]
|
||||
|
||||
pods_output = subprocess.check_output(cmd, text=True)
|
||||
pods = json.loads(pods_output)
|
||||
|
||||
ready_pods = sum(
|
||||
1
|
||||
for pod in pods["items"]
|
||||
if all(container.get("ready") for container in pod.get("status", {}).get("containerStatuses", []))
|
||||
)
|
||||
|
||||
terminating_pods = sum(
|
||||
1 for pod in pods["items"] if pod.get("metadata", {}).get("deletionTimestamp") is not None
|
||||
)
|
||||
|
||||
with open(logfile, "a") as log:
|
||||
log.write(
|
||||
f"Ready pods: {ready_pods}, Desired replicas: {desired_replicas}, Terminating pods: {terminating_pods}\n"
|
||||
)
|
||||
|
||||
if ready_pods == desired_replicas and terminating_pods == 0:
|
||||
with open(logfile, "a") as log:
|
||||
log.write(f"All pods for deployment '{deployment_name}' are running and ready.\n")
|
||||
break
|
||||
|
||||
if timer >= timeout:
|
||||
with open(logfile, "a") as log:
|
||||
log.write(
|
||||
f"Timeout reached for deployment '{deployment_name}'. Not all pods are running and ready.\n"
|
||||
)
|
||||
return 1 # Failure
|
||||
|
||||
time.sleep(interval)
|
||||
timer += interval
|
||||
|
||||
return 0 # Success for all deployments
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
with open(logfile, "a") as log:
|
||||
log.write(f"Error executing kubectl command: {e}\n")
|
||||
return 1 # Failure
|
||||
except json.JSONDecodeError as e:
|
||||
with open(logfile, "a") as log:
|
||||
log.write(f"Error parsing kubectl output: {e}\n")
|
||||
return 1 # Failure
|
||||
except Exception as e:
|
||||
with open(logfile, "a") as log:
|
||||
log.write(f"Unexpected error: {e}\n")
|
||||
return 1 # Failure
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Manage Helm Deployment.")
|
||||
parser.add_argument(
|
||||
"--chart-name",
|
||||
type=str,
|
||||
default="chatqna",
|
||||
help="The chart name to deploy (default: chatqna).",
|
||||
)
|
||||
parser.add_argument("--namespace", default="default", help="Kubernetes namespace (default: default).")
|
||||
parser.add_argument("--user-values", help="Path to a user-specified values.yaml file.")
|
||||
parser.add_argument("--deploy-config", help="Path to a deploy config yaml file.")
|
||||
parser.add_argument(
|
||||
"--create-values-only", action="store_true", help="Only create the values.yaml file without deploying."
|
||||
)
|
||||
parser.add_argument("--uninstall", action="store_true", help="Uninstall the Helm release.")
|
||||
parser.add_argument("--num-nodes", type=int, default=1, help="Number of nodes to use (default: 1).")
|
||||
parser.add_argument("--node-names", nargs="*", help="Optional specific node names to label.")
|
||||
parser.add_argument("--add-label", action="store_true", help="Add label to specified nodes if this flag is set.")
|
||||
parser.add_argument(
|
||||
"--delete-label", action="store_true", help="Delete label from specified nodes if this flag is set."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--label", default="node-type=opea-benchmark", help="Label to add/delete (default: node-type=opea-benchmark)."
|
||||
)
|
||||
parser.add_argument("--update-service", action="store_true", help="Update the deployment with new configuration.")
|
||||
parser.add_argument("--check-ready", action="store_true", help="Check if all services in the deployment are ready.")
|
||||
parser.add_argument("--chart-dir", default=".", help="Path to the untarred Helm chart directory.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Node labeling management
|
||||
if args.add_label:
|
||||
add_labels_to_nodes(args.num_nodes, args.label, args.node_names)
|
||||
return
|
||||
elif args.delete_label:
|
||||
clear_labels_from_nodes(args.label, args.node_names)
|
||||
return
|
||||
elif args.check_ready:
|
||||
is_ready = check_deployment_ready(args.chart_name, args.namespace)
|
||||
return is_ready
|
||||
elif args.uninstall:
|
||||
uninstall_helm_release(args.chart_name, args.namespace)
|
||||
return
|
||||
|
||||
# Load deploy_config if provided
|
||||
deploy_config = None
|
||||
if args.deploy_config:
|
||||
deploy_config = read_deploy_config(args.deploy_config)
|
||||
if deploy_config is None:
|
||||
parser.error("Failed to load deploy config")
|
||||
return
|
||||
|
||||
hw_values_file = get_hw_values_file(deploy_config, args.chart_dir)
|
||||
|
||||
action_type = 0
|
||||
if args.update_service:
|
||||
action_type = 1
|
||||
|
||||
# The user file is provided for deploy when --update-service is not specified
|
||||
if args.user_values and not args.update_service:
|
||||
values_file_path = args.user_values
|
||||
else:
|
||||
if not args.deploy_config:
|
||||
parser.error("--deploy-config is required")
|
||||
|
||||
node_selector = {args.label.split("=")[0]: args.label.split("=")[1]}
|
||||
|
||||
print("go to generate deploy values" if action_type == 0 else "go to generate update values")
|
||||
|
||||
# Generate values file for deploy or update service
|
||||
result = generate_helm_values(
|
||||
example_type=args.chart_name,
|
||||
deploy_config=deploy_config,
|
||||
chart_dir=args.chart_dir,
|
||||
action_type=action_type, # 0 - deploy, 1 - update
|
||||
node_selector=node_selector,
|
||||
)
|
||||
|
||||
# Check result status
|
||||
if result["status"] == "success":
|
||||
values_file_path = result["filepath"]
|
||||
else:
|
||||
parser.error(f"Failed to generate values.yaml: {result['message']}")
|
||||
return
|
||||
|
||||
print("start to read the generated values file")
|
||||
# Read back the generated YAML file for verification
|
||||
with open(values_file_path, "r") as file:
|
||||
print("Generated YAML contents:")
|
||||
print(file.read())
|
||||
|
||||
# Handle service update if specified
|
||||
if args.update_service:
|
||||
if not args.user_values:
|
||||
parser.error("--user-values is required for update reference")
|
||||
|
||||
try:
|
||||
update_service(
|
||||
args.chart_name, args.chart_name, args.namespace, hw_values_file, args.user_values, values_file_path
|
||||
)
|
||||
return
|
||||
except Exception as e:
|
||||
parser.error(f"Failed to update deployment: {str(e)}")
|
||||
return
|
||||
|
||||
# Deploy unless --create-values-only is specified
|
||||
if not args.create_values_only:
|
||||
install_helm_release(args.chart_name, args.chart_name, args.namespace, hw_values_file, values_file_path)
|
||||
print(f"values_file_path: {values_file_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
File diff suppressed because it is too large
Load Diff
@@ -2,34 +2,44 @@
|
||||
|
||||
A list of released OPEA docker images in https://hub.docker.com/, contains all relevant images from the GenAIExamples, GenAIComps and GenAIInfra projects. Please expect more public available images in the future release.
|
||||
|
||||
Take ChatQnA for example. ChatQnA is a chatbot application service based on the Retrieval Augmented Generation (RAG) architecture. It consists of [opea/embedding](https://hub.docker.com/r/opea/embedding), [opea/retriever](https://hub.docker.com/r/opea/retriever-redis), [opea/reranking-tei](https://hub.docker.com/r/opea/reranking-tei), [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen), [opea/dataprep](), [opea/chatqna](https://hub.docker.com/r/opea/chatqna), [opea/chatqna-ui](https://hub.docker.com/r/opea/chatqna-ui) and [opea/chatqna-conversation-ui](https://hub.docker.com/r/opea/chatqna-conversation-ui) (Optional) multiple microservices. Other services are similar, see the corresponding README for details.
|
||||
Take ChatQnA for example. ChatQnA is a chatbot application service based on the Retrieval Augmented Generation (RAG) architecture. It consists of [opea/embedding](https://hub.docker.com/r/opea/embedding), [opea/retriever](https://hub.docker.com/r/opea/retriever-redis), [opea/reranking-tei](https://hub.docker.com/r/opea/reranking-tei), [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen), [opea/dataprep](https://hub.docker.com/r/opea/dataprep), [opea/chatqna](https://hub.docker.com/r/opea/chatqna), [opea/chatqna-ui](https://hub.docker.com/r/opea/chatqna-ui) and [opea/chatqna-conversation-ui](https://hub.docker.com/r/opea/chatqna-conversation-ui) (Optional) multiple microservices. Other services are similar, see the corresponding README for details.
|
||||
|
||||
## Example images
|
||||
|
||||
| Example Images | Dockerfile | Description |
|
||||
| ----------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| [opea/audioqna](https://hub.docker.com/r/opea/audioqna) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/Dockerfile) | The docker image served as a chatqna gateway and using language modeling to generate answers to user queries by converting audio input to text, and then using text-to-speech (TTS) to convert those answers back to speech for interaction. |
|
||||
| [opea/chatqna](https://hub.docker.com/r/opea/chatqna) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/Dockerfile) | The docker image served as a chatqna gateway and interacted with users by understanding their questions and providing relevant answers. |
|
||||
| [opea/audioqna](https://hub.docker.com/r/opea/audioqna) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/Dockerfile) | The docker image served as an audioqna gateway and using language modeling to generate answers to user queries by converting audio input to text, and then use text-to-speech (TTS) to convert those answers back to speech for interaction. |
|
||||
| [opea/audioqna-ui](https://hub.docker.com/r/opea/audioqna-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/ui/docker/Dockerfile) | The docker image acted as the audioqna UI entry for enabling seamless interaction with users |
|
||||
| [opea/audioqna-multilang](https://hub.docker.com/r/opea/audioqna-multilang) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/Dockerfile.multilang) | The docker image served as an audioqna gateway and using language modeling to generate answers to user queries by converting multilingual audio input to text, and then use multilingual text-to-speech (TTS) to convert those answers back to speech for interaction. |
|
||||
| [opea/avatarchatbot](https://hub.docker.com/r/opea/avatarchatbot) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/AvatarChatbot/Dockerfile) | The docker image served as a avatarchatbot gateway and interacted with users by understanding their questions and providing relevant answers. |
|
||||
| [opea/chatqna](https://hub.docker.com/r/opea/chatqna) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/Dockerfile) | The docker image served as a chatqna gateway and interacted with users by understanding their questions and providing relevant answers. |
|
||||
| [opea/chatqna-without-rerank](https://hub.docker.com/r/opea/chatqna-without-rerank) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/Dockerfile.without_rerank) | The purpose of the docker image is to provide a user interface for chat-based Q&A using React. It allows for interaction with users and supports continuing conversations with a history that is stored in the browser's local storage. |
|
||||
| [opea/chatqna-ui](https://hub.docker.com/r/opea/chatqna-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/ui/docker/Dockerfile) | The docker image acted as the chatqna UI entry for facilitating interaction with users for question answering |
|
||||
| [opea/chatqna-conversation-ui](https://hub.docker.com/r/opea/chatqna-conversation-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/ui/docker/Dockerfile.react) | The purpose of the docker image is to provide a user interface for chat-based Q&A using React. It allows for interaction with users and supports continuing conversations with a history that is stored in the browser's local storage. |
|
||||
| [opea/chatqna-guardrails](https://hub.docker.com/r/opea/chatqna-guardrails) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/Dockerfile.guardrails) | This docker image is used to encapsulate chatqna's LLM service to secure model inputs and outputs. Guardrails proactively prevents models from interacting with insecure content and signals in time to stop insecure behavior. |
|
||||
| [opea/chatqna-without-rerank](https://hub.docker.com/r/opea/chatqna-without-rerank) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/Dockerfile.without_rerank) | This docker image is used as a chatqna-without-rerank gateway to provide the chatqna service without rerank to Xeon customers who cannot run the Rerank service on HPUs, but need high performance and accuracy. |
|
||||
| [opea/codegen](https://hub.docker.com/r/opea/codegen) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/CodeGen/Dockerfile) | The docker image served as the codegen gateway to provide service of the automatic creation of source code from a higher-level representation |
|
||||
| [opea/codegen-ui](https://hub.docker.com/r/opea/codegen-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/CodeGen/ui/docker/Dockerfile) | The docker image acted as the codegen UI entry for facilitating interaction with users for automatically generating code from user's description |
|
||||
| [opea/codegen-react-ui](https://hub.docker.com/r/opea/codegen-react-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/CodeGen/ui/docker/Dockerfile.react) | The purpose of the docker image is to provide a user interface for Codegen using React. It allows generating the appropriate code based on the current user input. |
|
||||
| [opea/codetrans](https://hub.docker.com/r/opea/codetrans) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/CodeTrans/Dockerfile) | The docker image served as a codetrans gateway to provide service of converting source code written in one programming language into an equivalent version in another programming language |
|
||||
| [opea/codetrans-ui](https://hub.docker.com/r/opea/codetrans-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/CodeTrans/ui/docker/Dockerfile) | The docker image acted as the codetrans UI entry for facilitating interaction with users for translating one programming language to another one |
|
||||
| [opea/doc-index-retriever](https://hub.docker.com/r/opea/doc-index-retriever) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/DocIndexRetriever/Dockerfile) | The docker image acts as a DocRetriever gateway, It uses different methods to match user queries with a set of free text records. |
|
||||
| [opea/doc-index-retriever](https://hub.docker.com/r/opea/doc-index-retriever) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/DocIndexRetriever/Dockerfile) | The docker image acts as a DocRetriever gateway, It uses different methods to match user queries with a set of free text records. |
|
||||
| [opea/docsum](https://hub.docker.com/r/opea/docsum) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/DocSum/Dockerfile) | The docker image served as a docsum gateway to provide service of capturing the main points and essential details of the original text |
|
||||
| [opea/docsum-ui](https://hub.docker.com/r/opea/docsum-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/DocSum/ui/docker/Dockerfile) | The docker image acted as the docsum UI entry for facilitating interaction with users for document summarization |
|
||||
| [opea/docsum-react-ui](https://hub.docker.com/r/opea/docsum-react-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/DocSum/ui/docker/Dockerfile.react) | The purpose of the docker image is to provide a user interface for document summary using React. It allows upload a file or paste text and then click on “Generate Summary” to get a condensed summary of the generated content and automatically scroll to the bottom of the summary. |
|
||||
| [opea/docsum-gradio-ui](https://hub.docker.com/r/opea/docsum-gradio-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/DocSum/ui/docker/Dockerfile.gradio) | The purpose of the docker image is to provides a user interface for summarizing documents and text using a Dockerized frontend application. Users can upload files or paste text to generate summaries. |
|
||||
| [opea/edgecraftrag](https://hub.docker.com/r/opea/edgecraftrag) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/EdgeCraftRAG/Dockerfile) | The docker image served as an Edge Craft RAG (EC-RAG) gateway, delivering a customizable and production-ready Retrieval-Augmented Generation system optimized for edge solutions. |
|
||||
| [opea/edgecraftrag-ui](https://hub.docker.com/r/opea/edgecraftrag-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/EdgeCraftRAG/ui/docker/Dockerfile.ui) | The docker image acted as the Edge Craft RAG (EC-RAG) UI entry. It ensuring high-quality, performant interactions tailored for edge environments. |
|
||||
| [opea/edgecraftrag-server](https://hub.docker.com/r/opea/edgecraftrag-server) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/EdgeCraftRAG/Dockerfile.server) | The docker image served as an Edge Craft RAG (EC-RAG) server, delivering a customizable and production-ready Retrieval-Augmented Generation system optimized for edge solutions. |
|
||||
| [opea/faqgen](https://hub.docker.com/r/opea/faqgen) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/FaqGen/Dockerfile) | The docker image served as a faqgen gateway and automatically generating comprehensive, natural sounding Frequently Asked Questions (FAQs) from documents, legal texts, customer inquiries and other sources. |
|
||||
| [opea/faqgen-ui](https://hub.docker.com/r/opea/faqgen-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/FaqGen/ui/docker/Dockerfile) | The docker image serves as the docsum UI entry point for easy interaction with users, generating FAQs by pasting in question text. |
|
||||
| [opea/faqgen-ui](https://hub.docker.com/r/opea/faqgen-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/FaqGen/ui/docker/Dockerfile) | The docker image serves as the faqgen UI entry point for easy interaction with users, generating FAQs by pasting in question text. |
|
||||
| [opea/faqgen-react-ui](https://hub.docker.com/r/opea/faqgen-react-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/FaqGen/ui/docker/Dockerfile.react) | The purpose of the docker image is to provide a user interface for Generate FAQs using React. It allows generating FAQs by uploading files or pasting text. |
|
||||
| [opea/multimodalqna](https://hub.docker.com/r/opea/multimodalqna) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/MultimodalQnA/Dockerfile) | The docker image served as a multimodalqna gateway and dynamically fetches the most relevant multimodal information (frames, transcripts, and/or subtitles) from the user's video, image, or audio collection to solve the problem. |
|
||||
| [opea/multimodalqna-ui](https://hub.docker.com/r/opea/multimodalqna-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/MultimodalQnA/ui/docker/Dockerfile) | The docker image serves as the multimodalqna UI entry point for easy interaction with users. Answers to questions are generated from uploaded by users. |
|
||||
| [opea/graphrag](https://hub.docker.com/r/opea/graphrag) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/GraphRAG/Dockerfile) | The docker image served as a GraphRAG gateway, leveraging a knowledge graph derived from source documents to address both local and global queries. |
|
||||
| [opea/graphrag-ui](https://hub.docker.com/r/opea/graphrag-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/GraphRAG/ui/docker/Dockerfile) | The docker image acted as the GraphRAG UI entry for facilitating interaction with users |
|
||||
| [opea/graphrag-react-ui](https://hub.docker.com/r/opea/graphrag-react-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/GraphRAG/ui/docker/Dockerfile.react) | The purpose of the docker image is to provide a user interface for GraphRAG using React. |
|
||||
| [opea/multimodalqna](https://hub.docker.com/r/opea/multimodalqna) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/MultimodalQnA/Dockerfile) | The docker image served as a multimodalqna gateway and dynamically fetches the most relevant multimodal information (frames, transcripts, and/or subtitles) from the user's video collection to solve the problem. |
|
||||
| [opea/multimodalqna-ui](https://hub.docker.com/r/opea/multimodalqna-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/MultimodalQnA/ui/docker/Dockerfile) | The docker image serves as the multimodalqna UI entry point for easy interaction with users. Answers to questions are generated from videos uploaded by users.. |
|
||||
| [opea/productivity-suite-react-ui-server](https://hub.docker.com/r/opea/productivity-suite-react-ui-server) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ProductivitySuite/ui/docker/Dockerfile.react) | The purpose of the docker image is to provide a user interface for Productivity Suite Application using React. It allows interaction by uploading documents and inputs. |
|
||||
| [opea/searchqna](https://hub.docker.com/r/opea/searchqna/tags) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/SearchQnA/Dockerfile) | The docker image served as the searchqna gateway to provide service of retrieving accurate and relevant answers to user queries from a knowledge base or dataset |
|
||||
| [opea/searchqna](https://hub.docker.com/r/opea/searchqna) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/SearchQnA/Dockerfile) | The docker image served as the searchqna gateway to provide service of retrieving accurate and relevant answers to user queries from a knowledge base or dataset |
|
||||
| [opea/searchqna-ui](https://hub.docker.com/r/opea/searchqna-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/SearchQnA/ui/docker/Dockerfile) | The docker image acted as the searchqna UI entry for facilitating interaction with users for question answering |
|
||||
| [opea/translation](https://hub.docker.com/r/opea/translation) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/Translation/Dockerfile) | The docker image served as the translation gateway to provide service of language translation |
|
||||
| [opea/translation-ui](https://hub.docker.com/r/opea/translation-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/Translation/ui/docker/Dockerfile) | The docker image acted as the translation UI entry for facilitating interaction with users for language translation |
|
||||
@@ -40,46 +50,65 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
|
||||
|
||||
## Microservice images
|
||||
|
||||
| Microservice Images | Dockerfile | Description |
|
||||
| ------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| [opea/agent]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/agent/src/Dockerfile) | The docker image exposed the OPEA agent microservice for GenAI application use |
|
||||
| [opea/asr](https://hub.docker.com/r/opea/asr) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/Dockerfile) | The docker image exposed the OPEA Audio-Speech-Recognition microservice for GenAI application use |
|
||||
| [opea/chathistory-mongo-server](https://hub.docker.com/r/opea/chathistory-mongo-server) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/chathistory/src/Dockerfile) | The docker image exposes OPEA Chat History microservice which based on MongoDB database, designed to allow user to store, retrieve and manage chat conversations |
|
||||
| [opea/dataprep]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/src/Dockerfile) | The docker image exposed the OPEA dataprep microservice based on many vectordbs for GenAI application use |
|
||||
| [opea/embedding-multimodal-clip](https://hub.docker.com/r/opea/embedding-multimodal-clip) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/clip/src/Dockerfile) | The docker image exposes OPEA multimodal CLIP-based embedded microservices for use by GenAI applications |
|
||||
| [opea/embedding](https://hub.docker.com/r/opea/embedding) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/src/Dockerfile) | The docker image exposes OPEA multimodal embedded microservices for use by GenAI applications |
|
||||
| [opea/embedding-multimodal-bridgetower](https://hub.docker.com/r/opea/embedding-multimodal-bridgetower) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/bridgetower/src/Dockerfile) | The docker image exposes OPEA multimodal embedded microservices based on bridgetower for use by GenAI applications |
|
||||
| [opea/embedding-multimodal-bridgetower-gaudi](https://hub.docker.com/r/opea/embedding-multimodal-bridgetower-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/bridgetower/src/Dockerfile.intel_hpu) | The docker image exposes OPEA multimodal embedded microservices based on bridgetower for use by GenAI applications on the Gaudi |
|
||||
| [opea/feedbackmanagement](https://hub.docker.com/r/opea/feedbackmanagement) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/feedback_management/src/Dockerfile) | The docker image exposes that the OPEA feedback management microservice uses a MongoDB database for GenAI applications. |
|
||||
| [opea/finetuning](https://hub.docker.com/r/opea/finetuning) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/finetuning/src/Dockerfile) | The docker image exposed the OPEA Fine-tuning microservice for GenAI application use |
|
||||
| [opea/finetuning-gaudi](https://hub.docker.com/r/opea/finetuning-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/finetuning/src/Dockerfile.intel_hpu) | The docker image exposed the OPEA Fine-tuning microservice for GenAI application use on the Gaudi |
|
||||
| [opea/gmcrouter](https://hub.docker.com/r/opea/gmcrouter) | [Link](https://github.com/opea-project/GenAIInfra/blob/main/microservices-connector/Dockerfile.manager) | The docker image served as one of key parts of the OPEA GenAI Microservice Connector(GMC) to route the traffic among the microservices defined in GMC |
|
||||
| [opea/gmcmanager](https://hub.docker.com/r/opea/gmcmanager) | [Link](https://github.com/opea-project/GenAIInfra/blob/main/microservices-connector/Dockerfile.router) | The docker image served as one of key parts of the OPEA GenAI Microservice Connector(GMC) to be controller manager to handle GMC CRD |
|
||||
| [opea/guardrails]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/guardrails/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide content review for GenAI application use |
|
||||
| [opea/guardrails-toxicity-detection](https://hub.docker.com/r/opea/guardrails-toxicity-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/toxicity_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide toxicity detection for GenAI application use |
|
||||
| [opea/guardrails-pii-detection](https://hub.docker.com/r/opea/guardrails-pii-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/pii_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide PII detection for GenAI application use |
|
||||
| [opea/llm-docsum]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/doc-summarization/Dockerfile) | This docker image is designed to build a document summarization microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a document summary. |
|
||||
| [opea/llm-faqgen]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/faq-generation/Dockerfile) | This docker image is designed to build a frequently asked questions microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a FAQ. |
|
||||
| [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/text-generation/Dockerfile) | The docker image exposed the OPEA LLM microservice upon TGI docker image for GenAI application use |
|
||||
| [opea/llava-gaudi](https://hub.docker.com/r/opea/llava-hpu) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu) | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi |
|
||||
| [opea/lvm]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/Dockerfile) | This docker image is designed to build a large visual model (LVM) unified wrapper service |
|
||||
| [opea/lvm-llava](https://hub.docker.com/r/opea/lvm-llava) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llava/Dockerfile) | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) server for GenAI application use |
|
||||
| [opea/lvm-video-llama](https://hub.docker.com/r/opea/lvm-video-llama) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/video-llama/Dockerfile) | The docker image exposed the OPEA microservice running Video LLaMA as a large visual model (LVM) server for GenAI application use |
|
||||
| [opea/lvm-llama-vision]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile) | The docker image exposed the OPEA microservice running LLaMA Vision as a large visual model (LVM) server for GenAI application use |
|
||||
| [opea/lvm-predictionguard](https://hub.docker.com/r/opea/lvm-predictionguard) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/predictionguard/Dockerfile) | The docker image exposed the OPEA microservice running PredictionGuard as a large visual model (LVM) server for GenAI application use |
|
||||
| [opea/nginx](https://hub.docker.com/r/opea/nginx) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/nginx/src/Dockerfile) | The docker image exposed the OPEA nginx microservice for GenAI application use |
|
||||
| [opea/pathway](https://hub.docker.com/r/opea/vectorstore-pathway) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/pathway/src/Dockerfile) | The docker image exposed the OPEA Vectorstores microservice with Pathway for GenAI application use |
|
||||
| [opea/promptregistry-mongo-server](https://hub.docker.com/r/opea/promptregistry-mongo-server) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/prompt_registry/src/Dockerfile) | The docker image exposes the OPEA Prompt Registry microservices which based on MongoDB database, designed to store and retrieve user's preferred prompts |
|
||||
| [opea/reranking]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/rerankings/src/Dockerfile) | The docker image exposed the OPEA reranking microservice based on tei docker image for GenAI application use |
|
||||
| [opea/retriever]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/src/Dockerfile) | The docker image exposed the OPEA retrieval microservice based on milvus vectordb for GenAI application use |
|
||||
| [opea/speecht5](https://hub.docker.com/r/opea/speecht5) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/integrations/dependency/speecht5/Dockerfile) | The docker image exposed the OPEA SpeechT5 service for GenAI application use |
|
||||
| [opea/speecht5-gaudi](https://hub.docker.com/r/opea/speecht5-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu) | The docker image exposed the OPEA SpeechT5 service on Gaudi2 for GenAI application use |
|
||||
| [opea/tei-gaudi](https://hub.docker.com/r/opea/tei-gaudi/tags) | [Link](https://github.com/huggingface/tei-gaudi/blob/habana-main/Dockerfile-hpu) | The docker image powered by HuggingFace Text Embedding Inference (TEI) on Gaudi2 for deploying and serving Embedding Models |
|
||||
| [opea/lvm-video-llama](https://hub.docker.com/r/opea/lvm-video-llama) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/video-llama/Dockerfile) | The docker image exposed the OPEA microservice running Video-Llama as a large visual model (LVM) server for GenAI application use |
|
||||
| [opea/tts](https://hub.docker.com/r/opea/tts) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/Dockerfile) | The docker image exposed the OPEA Text-To-Speech microservice for GenAI application use |
|
||||
| [opea/vllm](https://hub.docker.com/r/opea/vllm) | [Link](https://github.com/vllm-project/vllm/blob/main/Dockerfile.cpu) | The docker image powered by vllm-project for deploying and serving vllm Models |
|
||||
| [opea/vllm-gaudi]() | [Link](https://github.com/HabanaAI/vllm-fork/blob/habana_main/Dockerfile.hpu) | The docker image powered by vllm-fork for deploying and serving vllm-gaudi Models |
|
||||
| [opea/vllm-openvino](https://hub.docker.com/r/opea/vllm-openvino) | [Link](https://github.com/vllm-project/vllm/blob/main/Dockerfile.openvino) | The docker image powered by vllm-project for deploying and serving vllm Models of the Openvino Framework |
|
||||
| [opea/web-retriever]() | [Link](https://github.com/opea-project/GenAIComps/tree/main/comps/web_retrievers/src/Dockerfile) | The docker image exposed the OPEA web retrieval microservice based on a search engine and vector DB |
|
||||
| [opea/whisper](https://hub.docker.com/r/opea/whisper) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/integrations/dependency/whisper/Dockerfile) | The docker image exposed the OPEA Whisper service for GenAI application use |
|
||||
| [opea/whisper-gaudi](https://hub.docker.com/r/opea/whisper-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu) | The docker image exposed the OPEA Whisper service on Gaudi2 for GenAI application use |
|
||||
| Microservice Images | Dockerfile | Description |
|
||||
| ------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| [opea/agent](https://hub.docker.com/r/opea/agent) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/agent/src/Dockerfile) | The docker image exposed the OPEA agent microservice for GenAI application use |
|
||||
| [opea/agent-ui](https://hub.docker.com/r/opea/agent-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/AgentQnA/ui/docker/Dockerfile) | The docker image exposed the OPEA agent microservice UI entry for GenAI application use |
|
||||
| [opea/asr](https://hub.docker.com/r/opea/asr) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/Dockerfile) | The docker image exposed the OPEA Audio-Speech-Recognition microservice for GenAI application use |
|
||||
| [opea/animation](https://hub.docker.com/r/opea/animation) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/animation/src/Dockerfile) | The purpose of the Docker image is to expose the OPEA Avatar Animation microservice for GenAI application use. |
|
||||
| [opea/chathistory-mongo-server](https://hub.docker.com/r/opea/chathistory-mongo-server) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/chathistory/src/Dockerfile) | The docker image exposes OPEA Chat History microservice which based on MongoDB database, designed to allow user to store, retrieve and manage chat conversations |
|
||||
| [opea/dataprep](https://hub.docker.com/r/opea/dataprep) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/src/Dockerfile) | The docker image exposed the OPEA dataprep microservice for GenAI application use |
|
||||
| [opea/embedding](https://hub.docker.com/r/opea/embedding) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/src/Dockerfile) | The docker image exposed the OPEA mosec embedding microservice for GenAI application use |
|
||||
| [opea/embedding-multimodal-clip](https://hub.docker.com/r/opea/embedding-multimodal-clip) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/clip/src/Dockerfile) | The docker image exposed the OPEA mosec embedding microservice base on Langchain framework for GenAI application use |
|
||||
| [opea/embedding-multimodal-bridgetower](https://hub.docker.com/r/opea/embedding-multimodal-bridgetower) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/bridgetower/src/Dockerfile) | The docker image exposes OPEA multimodal embedded microservices based on bridgetower for use by GenAI applications |
|
||||
| [opea/embedding-multimodal-bridgetower-gaudi](https://hub.docker.com/r/opea/embedding-multimodal-bridgetower-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/bridgetower/src/Dockerfile.intel_hpu) | The docker image exposes OPEA multimodal embedded microservices based on bridgetower for use by GenAI applications on the Gaudi |
|
||||
| [opea/feedbackmanagement-mongo](https://hub.docker.com/r/opea/feedbackmanagement-mongo) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/feedback_management/src/Dockerfile) | The docker image exposes that the OPEA feedback management microservice uses a MongoDB database for GenAI applications. |
|
||||
| [opea/finetuning](https://hub.docker.com/r/opea/finetuning) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/finetuning/src/Dockerfile) | The docker image exposed the OPEA Fine-tuning microservice for GenAI application use |
|
||||
| [opea/finetuning-gaudi](https://hub.docker.com/r/opea/finetuning-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/finetuning/src/Dockerfile.intel_hpu) | The docker image exposed the OPEA Fine-tuning microservice for GenAI application use on the Gaudi |
|
||||
| [opea/guardrails](https://hub.docker.com/r/opea/guardrails) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/guardrails/Dockerfile) | The docker image exposed the OPEA guardrail microservice for GenAI application use |
|
||||
| [opea/guardrails-toxicity-predictionguard](https://hub.docker.com/r/opea/guardrails-toxicity-predictionguard) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/toxicity_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide toxicity detection for GenAI application use |
|
||||
| [opea/guardrails-pii-predictionguard](https://hub.docker.com/r/opea/guardrails-pii-predictionguard) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/pii_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide PII detection for GenAI application use |
|
||||
| [opea/guardrails-injection-predictionguard](https://hub.docker.com/r/opea/guardrails-injection-predictionguard) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/prompt_injection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide injection predictionguard for GenAI application use |
|
||||
| [opea/guardrails-hallucination-detection](https://hub.docker.com/r/opea/guardrails-hallucination-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/hallucination_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide hallucination detection for GenAI application use |
|
||||
| [opea/guardrails-factuality-predictionguard](https://hub.docker.com/r/opea/guardrails-factuality-predictionguard) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/factuality_alignment/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide factuality predictionguard for GenAI application use |
|
||||
| [opea/guardrails-bias-detection](https://hub.docker.com/r/opea/guardrails-bias-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/bias_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide bias detection for GenAI application use |
|
||||
| [opea/image2image-gaudi](https://hub.docker.com/r/opea/image2image-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/image2image/src/Dockerfile.intel_hpu) | The purpose of the Docker image is to expose the OPEA Image-to-Image microservice for GenAI application use on the Gaudi. |
|
||||
| [opea/image2image](https://hub.docker.com/r/opea/image2image) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/image2image/src/Dockerfile) | The purpose of the Docker image is to expose the OPEA Image-to-Image microservice for GenAI application use. |
|
||||
| [opea/image2video-gaudi](https://hub.docker.com/r/opea/image2video-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/image2video/src/Dockerfile.intel_hpu) | The purpose of the Docker image is to expose the OPEA image-to-video microservice for GenAI application use on the Gaudi. |
|
||||
| [opea/image2video](https://hub.docker.com/r/opea/image2video) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/image2video/src/Dockerfile) | The purpose of the Docker image is to expose the OPEA image-to-video microservice for GenAI application use. |
|
||||
| [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/text-generation/Dockerfile) | The docker image exposed the OPEA LLM microservice upon textgen docker image for GenAI application use |
|
||||
| [opea/llm-textgen-gaudi](https://hub.docker.com/r/opea/llm-textgen-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/text-generation/Dockerfile.intel_hpu) | The docker image exposed the OPEA LLM microservice upon textgen docker image for GenAI application use on the Gaudi2 |
|
||||
| [opea/llm-eval](https://hub.docker.com/r/opea/llm-eval) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/utils/lm-eval/Dockerfile) | The docker image exposed the OPEA LLM microservice upon eval docker image for GenAI application use |
|
||||
| [opea/llm-docsum](https://hub.docker.com/r/opea/llm-docsum) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/doc-summarization/Dockerfile) | The docker image exposed the OPEA LLM microservice upon docsum docker image for GenAI application use |
|
||||
| [opea/llm-faqgen](https://hub.docker.com/r/opea/llm-faqgen) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/faq-generation/Dockerfile) | This docker image is designed to build a frequently asked questions microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a FAQ. |
|
||||
| [opea/lvm](https://hub.docker.com/r/opea/lvm) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/Dockerfile) | The docker image exposed the OPEA large visual model (LVM) microservice for GenAI application use |
|
||||
| [opea/lvm-llava](https://hub.docker.com/r/opea/lvm-llava) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llava/Dockerfile) | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) server for GenAI application use |
|
||||
| [opea/lvm-video-llama](https://hub.docker.com/r/opea/lvm-video-llama) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/video-llama/Dockerfile) | The docker image exposed the OPEA microservice running Video-Llama as a large visual model (LVM) for GenAI application use |
|
||||
| [opea/lvm-predictionguard](https://hub.docker.com/r/opea/lvm-predictionguard) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/predictionguard/Dockerfile) | The docker image exposed the OPEA microservice running predictionguard as a large visual model (LVM) server for GenAI application use |
|
||||
| [opea/llava-gaudi](https://hub.docker.com/r/opea/llava-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu) | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi2 |
|
||||
| [opea/lvm-llama-vision](https://hub.docker.com/r/opea/lvm-llama-vision) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile) | The docker image exposed the OPEA microservice running Llama Vision as the base large visual model service for GenAI application use |
|
||||
| [opea/lvm-llama-vision-tp](https://hub.docker.com/r/opea/lvm-llama-vision-tp) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.tp) | The docker image exposed the OPEA microservice running Llama Vision with deepspeed as the base large visual model service for GenAI application use |
|
||||
| [opea/lvm-llama-vision-guard](https://hub.docker.com/r/opea/lvm-llama-vision-guard) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.guard) | The docker image exposed the OPEA microservice running Llama Vision Guard as the base large visual model service for GenAI application use |
|
||||
| [opea/promptregistry-mongo](https://hub.docker.com/r/opea/promptregistry-mongo) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/prompt_registry/src/Dockerfile) | The docker image exposes the OPEA Prompt Registry microservices which based on MongoDB database, designed to store and retrieve user's preferred prompts |
|
||||
| [opea/reranking](https://hub.docker.com/r/opea/reranking) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/rerankings/src/Dockerfile) | The docker image exposed the OPEA reranking microservice for GenAI application use |
|
||||
| [opea/retriever](https://hub.docker.com/r/opea/retriever) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/src/Dockerfile) | The docker image exposed the OPEA retrieval microservice for GenAI application use |
|
||||
| [opea/text2image](https://hub.docker.com/r/opea/text2image) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/text2image/src/Dockerfile) | The docker image exposed the OPEA text-to-image microservice for GenAI application use |
|
||||
| [opea/text2image-gaudi](https://hub.docker.com/r/opea/text2image-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/text2image/src/Dockerfile.intel_hpu) | The docker image exposed the OPEA text-to-image microservice for GenAI application use on the Gaudi |
|
||||
| [opea/text2image-ui](https://hub.docker.com/r/opea/text2image-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/Text2Image/ui/docker/Dockerfile) | The docker image exposed the OPEA text-to-image microservice UI entry for GenAI application use |
|
||||
| [opea/text2sql](https://hub.docker.com/r/opea/text2sql) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/text2sql/src/Dockerfile) | The docker image exposed the OPEA text to Structured Query Language microservice for GenAI application use |
|
||||
| [opea/text2sql-react-ui](https://hub.docker.com/r/opea/text2sql-react-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/DBQnA/ui/docker/Dockerfile.react) | The docker image exposed the OPEA text to Structured Query Language microservice react UI entry for GenAI application use |
|
||||
| [opea/tts](https://hub.docker.com/r/opea/tts) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/Dockerfile) | The docker image exposed the OPEA Text-To-Speech microservice for GenAI application use |
|
||||
| [opea/speecht5](https://hub.docker.com/r/opea/speecht5) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/integrations/dependency/speecht5/Dockerfile) | The docker image exposed the OPEA SpeechT5 service for GenAI application use |
|
||||
| [opea/speecht5-gaudi](https://hub.docker.com/r/opea/speecht5-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu) | The docker image exposed the OPEA SpeechT5 service on Gaudi2 for GenAI application use |
|
||||
| [opea/gpt-sovits](https://hub.docker.com/r/opea/gpt-sovits) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile) | The docker image exposed the OPEA gpt-sovits service for GenAI application use |
|
||||
| [opea/nginx](https://hub.docker.com/r/opea/nginx) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/nginx/src/Dockerfile) | The docker image exposed the OPEA nginx microservice for GenAI application use |
|
||||
| [opea/vectorstore-pathway](https://hub.docker.com/r/opea/vectorstore-pathway) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/pathway/src/Dockerfile) | The docker image exposed the OPEA Vectorstores microservice with Pathway for GenAI application use |
|
||||
| [opea/wav2lip](https://hub.docker.com/r/opea/wav2lip) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/wav2lip/src/Dockerfile) | The docker image exposed the OPEA Generate lip movements from audio files microservice with Pathway for GenAI application use |
|
||||
| [opea/wav2lip-gaudi](https://hub.docker.com/r/opea/wav2lip-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/wav2lip/src/Dockerfile.intel_hpu) | The docker image exposed the OPEA Generate lip movements from audio files microservice with Pathway for GenAI application use on the Gaudi2 |
|
||||
| [opea/vllm-arc](https://hub.docker.com/r/opea/vllm-arc) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/vllm/src/Dockerfile.intel_gpu) | The docker image powered by vllm-project for deploying and serving vllm Models on Arc |
|
||||
| [opea/vllm-openvino](https://hub.docker.com/r/opea/vllm-openvino) | [Link](https://github.com/vllm-project/vllm/blob/main/Dockerfile.openvino) | The docker image powered by vllm-project for deploying and serving vllm Models of the Openvino Framework |
|
||||
| [opea/vllm-gaudi](https://hub.docker.com/r/opea/vllm-gaudi) | [Link](https://github.com/HabanaAI/vllm-fork/blob/habana_main/Dockerfile.hpu) | The docker image powered by vllm-project for deploying and serving vllm Models on Gaudi2 |
|
||||
| [opea/vllm](https://hub.docker.com/r/opea/vllm) | [Link](https://github.com/HabanaAI/vllm-fork/blob/habana_main/Dockerfile.cpu) | The docker image powered by vllm-project for deploying and serving vllm Models |
|
||||
| [opea/whisper-gaudi](https://hub.docker.com/r/opea/whisper-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu) | The docker image exposed the OPEA Whisper service on Gaudi2 for GenAI application use |
|
||||
| [opea/whisper](https://hub.docker.com/r/opea/whisper) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/integrations/dependency/whisper/Dockerfile) | The docker image exposed the OPEA Whisper service for GenAI application use |
|
||||
| [opea/web-retriever](https://hub.docker.com/r/opea/web-retriever) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/web_retrievers/src/Dockerfile) | The docker image exposed the OPEA retrieval microservice based on chroma vectordb for GenAI application use |
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
kubernetes
|
||||
locust
|
||||
numpy
|
||||
opea-eval
|
||||
opea-eval>=1.2
|
||||
pytest
|
||||
pyyaml
|
||||
requests
|
||||
|
||||
Reference in New Issue
Block a user