Compare commits

..

2 Commits

Author SHA1 Message Date
bjzhjing
c8c6fa2e3e Provide unified scalable deployment and benchmarking support for exam… (#1315)
Signed-off-by: Cathy Zhang <cathy.zhang@intel.com>
Signed-off-by: letonghan <letong.han@intel.com>
Co-authored-by: letonghan <letong.han@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
(cherry picked from commit ed163087ba)
2025-01-24 22:55:38 +08:00
NeuralChatBot
905a5100f9 Freeze OPEA images tag
Signed-off-by: NeuralChatBot <grp_neural_chat_bot@intel.com>
2025-01-24 08:31:22 +00:00
73 changed files with 206 additions and 25998 deletions

View File

@@ -79,6 +79,7 @@ jobs:
fi
if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt
fi
git clone --depth 1 --branch ${{ inputs.opea_branch }} https://github.com/opea-project/GenAIComps.git
cd GenAIComps && git rev-parse HEAD && cd ../

View File

@@ -97,7 +97,6 @@ jobs:
helm-test:
needs: [get-test-case]
if: ${{ fromJSON(needs.get-test-case.outputs.value_files).length != 0 }}
strategy:
matrix:
value_file: ${{ fromJSON(needs.get-test-case.outputs.value_files) }}

View File

@@ -91,7 +91,6 @@ jobs:
compose-test:
needs: [get-test-case]
if: ${{ needs.get-test-case.outputs.test_cases != '' }}
strategy:
matrix:
test_case: ${{ fromJSON(needs.get-test-case.outputs.test_cases) }}
@@ -127,7 +126,6 @@ jobs:
shell: bash
env:
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
HF_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
PINECONE_KEY: ${{ secrets.PINECONE_KEY }}

View File

@@ -41,11 +41,9 @@ jobs:
publish:
needs: [get-image-list]
if: ${{ needs.get-image-list.outputs.matrix != '' }}
strategy:
matrix:
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
fail-fast: false
runs-on: "docker-build-${{ inputs.node }}"
steps:
- uses: docker/login-action@v3.2.0

View File

@@ -47,7 +47,6 @@ jobs:
scan-docker:
needs: get-image-list
runs-on: "docker-build-${{ inputs.node }}"
if: ${{ fromJSON(needs.get-image-list.outputs.matrix).length != 0 }}
strategy:
matrix:
image: ${{ fromJson(needs.get-image-list.outputs.matrix) }}

View File

@@ -76,7 +76,7 @@ jobs:
build-deploy-gmc:
needs: [get-test-matrix]
if: ${{ fromJSON(inputs.deploy_gmc) }} && ${{ fromJSON(needs.get-test-matrix.outputs.nodes).length != 0 }}
if: ${{ fromJSON(inputs.deploy_gmc) }}
strategy:
matrix:
node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
@@ -90,7 +90,7 @@ jobs:
run-examples:
needs: [get-test-matrix, build-deploy-gmc]
if: always() && ${{ fromJSON(needs.get-test-matrix.outputs.examples).length != 0 }}
if: always()
strategy:
matrix:
example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }}

View File

@@ -25,9 +25,9 @@ jobs:
- name: Set up Git
run: |
git config --global user.name "CICD-at-OPEA"
git config --global user.email "CICD@opea.dev"
git remote set-url origin https://CICD-at-OPEA:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
git config --global user.name "NeuralChatBot"
git config --global user.email "grp_neural_chat_bot@intel.com"
git remote set-url origin https://NeuralChatBot:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
- name: Run script
run: |

View File

@@ -51,7 +51,6 @@ jobs:
image-build:
needs: get-test-matrix
if: ${{ needs.get-test-matrix.outputs.nodes != '' }}
strategy:
matrix:
node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}

View File

@@ -33,7 +33,6 @@ jobs:
clean-up:
needs: get-build-matrix
if: ${{ fromJSON(needs.get-build-matrix.outputs.nodes).length != 0 }}
strategy:
matrix:
node: ${{ fromJson(needs.get-build-matrix.outputs.nodes) }}
@@ -48,7 +47,6 @@ jobs:
build:
needs: [get-build-matrix, clean-up]
if: ${{ fromJSON(needs.get-build-matrix.outputs.nodes).length != 0 }}
strategy:
matrix:
example: ${{ fromJson(needs.get-build-matrix.outputs.examples) }}

View File

@@ -34,7 +34,6 @@ jobs:
build-and-test:
needs: get-build-matrix
if: ${{ needs.get-build-matrix.outputs.examples_json != '' }}
strategy:
matrix:
example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
@@ -54,11 +53,9 @@ jobs:
publish:
needs: [get-build-matrix, get-image-list, build-and-test]
if: ${{ needs.get-image-list.outputs.matrix != '' }}
strategy:
matrix:
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
fail-fast: false
runs-on: "docker-build-gaudi"
steps:
- uses: docker/login-action@v3.2.0

View File

@@ -65,7 +65,7 @@ jobs:
helm-chart-test:
needs: [job1]
if: always() && ${{ fromJSON(needs.job1.outputs.run_matrix).length != 0 }}
if: always() && ${{ needs.job1.outputs.run_matrix.example.length > 0 }}
uses: ./.github/workflows/_helm-e2e.yml
strategy:
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}

View File

@@ -32,10 +32,10 @@ jobs:
example-test:
needs: [get-test-matrix]
if: ${{ needs.get-test-matrix.outputs.run_matrix != '' }}
strategy:
matrix: ${{ fromJSON(needs.get-test-matrix.outputs.run_matrix) }}
fail-fast: false
if: ${{ !github.event.pull_request.draft }}
uses: ./.github/workflows/_run-docker-compose.yml
with:
registry: "opea"

View File

@@ -24,7 +24,6 @@ jobs:
image-build:
needs: job1
if: ${{ fromJSON(needs.job1.outputs.run_matrix).length != 0 }}
strategy:
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
fail-fast: false

View File

@@ -16,8 +16,8 @@ jobs:
freeze-images:
runs-on: ubuntu-latest
env:
USER_NAME: "CICD-at-OPEA"
USER_EMAIL: "CICD@opea.dev"
USER_NAME: "NeuralChatBot"
USER_EMAIL: "grp_neural_chat_bot@intel.com"
BRANCH_NAME: "update_images_tag"
steps:
- name: Checkout repository

View File

@@ -43,6 +43,7 @@ function build_vllm_docker_image() {
fi
cd ./vllm-fork
git checkout v0.6.4.post2+Gaudi-1.19.0
sed -i 's/triton/triton==3.1.0/g' requirements-hpu.txt
docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:ci --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
if [ $? -ne 0 ]; then
echo "opea/vllm-gaudi:ci failed"

View File

@@ -18,7 +18,7 @@ Here're some of the project's features:
2. cd command to the current folder.
```
cd AgentQnA/ui/svelte
cd AgentQnA/ui
```
3. Modify the required .env variables.
@@ -41,7 +41,7 @@ Here're some of the project's features:
npm run dev
```
- The application will be available at `http://localhost:5173`.
- The application will be available at `http://localhost:3000`.
5. **For Docker Setup:**
@@ -54,7 +54,7 @@ Here're some of the project's features:
- Run the Docker container:
```
docker run -d -p 5173:5173 --name agent-ui opea:agent-ui
docker run -d -p 3000:3000 --name agent-ui opea:agent-ui
```
- The application will be available at `http://localhost:5173`.
- The application will be available at `http://localhost:3000`.

View File

@@ -1,209 +0,0 @@
# Build Mega Service of AvatarChatbot on AMD GPU
This document outlines the deployment process for a AvatarChatbot application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server.
## 🚀 Build Docker images
### 1. Source Code install GenAIComps
```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
```
### 2. Build ASR Image
```bash
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile .
```
### 3. Build LLM Image
```bash
docker build --no-cache -t opea/llm-textgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile .
```
### 4. Build TTS Image
```bash
docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile .
docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/Dockerfile .
```
### 5. Build Animation Image
```bash
docker build -t opea/wav2lip:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/wav2lip/src/Dockerfile .
docker build -t opea/animation:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/src/Dockerfile .
```
### 6. Build MegaService Docker Image
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
```bash
git clone https://github.com/opea-project/GenAIExamples.git
cd GenAIExamples/AvatarChatbot/
docker build --no-cache -t opea/avatarchatbot:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
```
Then run the command `docker images`, you will have following images ready:
1. `opea/whisper:latest`
2. `opea/asr:latest`
3. `opea/llm-tgi:latest`
4. `opea/speecht5:latest`
5. `opea/tts:latest`
6. `opea/wav2lip:latest`
7. `opea/animation:latest`
8. `opea/avatarchatbot:latest`
## 🚀 Set the environment variables
Before starting the services with `docker compose`, you have to recheck the following environment variables.
```bash
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export host_ip=$(hostname -I | awk '{print $1}')
export TGI_SERVICE_PORT=3006
export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_SERVICE_PORT}
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export ASR_ENDPOINT=http://${host_ip}:7066
export TTS_ENDPOINT=http://${host_ip}:7055
export WAV2LIP_ENDPOINT=http://${host_ip}:7860
export MEGA_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export TTS_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ANIMATION_SERVICE_HOST_IP=${host_ip}
export MEGA_SERVICE_PORT=8888
export ASR_SERVICE_PORT=3001
export TTS_SERVICE_PORT=3002
export LLM_SERVICE_PORT=3007
export ANIMATION_SERVICE_PORT=3008
export DEVICE="cpu"
export WAV2LIP_PORT=7860
export INFERENCE_MODE='wav2lip+gfpgan'
export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
export FACE="assets/img/avatar5.png"
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
export AUDIO='None'
export FACESIZE=96
export OUTFILE="/outputs/result.mp4"
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
export UPSCALE_FACTOR=1
export FPS=10
```
Warning!!! - The Wav2lip service works in this solution using only the CPU. To use AMD GPUs and achieve operational performance, the Wav2lip image needs to be modified to adapt to AMD hardware and the ROCm framework.
## 🚀 Start the MegaService
```bash
cd GenAIExamples/AvatarChatbot/docker_compose/intel/cpu/xeon/
docker compose -f compose.yaml up -d
```
## 🚀 Test MicroServices
```bash
# whisper service
curl http://${host_ip}:7066/v1/asr \
-X POST \
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-H 'Content-Type: application/json'
# asr microservice
curl http://${host_ip}:3001/v1/audio/transcriptions \
-X POST \
-d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-H 'Content-Type: application/json'
# tgi service
curl http://${host_ip}:3006/generate \
-X POST \
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-H 'Content-Type: application/json'
# llm microservice
curl http://${host_ip}:3007/v1/chat/completions\
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
-H 'Content-Type: application/json'
# speecht5 service
curl http://${host_ip}:7055/v1/tts \
-X POST \
-d '{"text": "Who are you?"}' \
-H 'Content-Type: application/json'
# tts microservice
curl http://${host_ip}:3002/v1/audio/speech \
-X POST \
-d '{"text": "Who are you?"}' \
-H 'Content-Type: application/json'
# wav2lip service
cd ../../../..
curl http://${host_ip}:7860/v1/wav2lip \
-X POST \
-d @assets/audio/sample_minecraft.json \
-H 'Content-Type: application/json'
# animation microservice
curl http://${host_ip}:3008/v1/animation \
-X POST \
-d @assets/audio/sample_question.json \
-H "Content-Type: application/json"
```
## 🚀 Test MegaService
```bash
curl http://${host_ip}:3009/v1/avatarchatbot \
-X POST \
-d @assets/audio/sample_whoareyou.json \
-H 'Content-Type: application/json'
```
If the megaservice is running properly, you should see the following output:
```bash
"/outputs/result.mp4"
```
The output file will be saved in the current working directory, as `${PWD}` is mapped to `/outputs` inside the wav2lip-service Docker container.
## Gradio UI
```bash
cd $WORKPATH/GenAIExamples/AvatarChatbot
python3 ui/gradio/app_gradio_demo_avatarchatbot.py
```
The UI can be viewed at http://${host_ip}:7861
<img src="../../../../assets/img/UI.png" alt="UI Example" width="60%">
In the current version v1.0, you need to set the avatar figure image/video and the DL model choice in the environment variables before starting AvatarChatbot backend service and running the UI. Please just customize the audio question in the UI.
\*\* We will enable change of avatar figure between runs in v2.0
## Troubleshooting
```bash
cd GenAIExamples/AvatarChatbot/tests
export IMAGE_REPO="opea"
export IMAGE_TAG="latest"
export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
test_avatarchatbot_on_xeon.sh
```

View File

@@ -1,158 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
whisper-service:
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
container_name: whisper-service
ports:
- "7066:7066"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
restart: unless-stopped
asr:
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
container_name: asr-service
ports:
- "3001:9099"
ipc: host
environment:
ASR_ENDPOINT: ${ASR_ENDPOINT}
speecht5-service:
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
container_name: speecht5-service
ports:
- "7055:7055"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
restart: unless-stopped
tts:
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
container_name: tts-service
ports:
- "3002:9088"
ipc: host
environment:
TTS_ENDPOINT: ${TTS_ENDPOINT}
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
container_name: tgi-service
ports:
- "${TGI_SERVICE_PORT:-3006}:80"
volumes:
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
shm_size: 1g
devices:
- /dev/kfd:/dev/kfd
- /dev/dri/:/dev/dri/
cap_add:
- SYS_PTRACE
group_add:
- video
security_opt:
- seccomp:unconfined
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192
llm:
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
container_name: llm-tgi-server
depends_on:
- tgi-service
ports:
- "3007:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
OPENAI_API_KEY: ${OPENAI_API_KEY}
restart: unless-stopped
wav2lip-service:
image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
container_name: wav2lip-service
ports:
- "7860:7860"
ipc: host
volumes:
- ${PWD}:/outputs
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
DEVICE: ${DEVICE}
INFERENCE_MODE: ${INFERENCE_MODE}
CHECKPOINT_PATH: ${CHECKPOINT_PATH}
FACE: ${FACE}
AUDIO: ${AUDIO}
FACESIZE: ${FACESIZE}
OUTFILE: ${OUTFILE}
GFPGAN_MODEL_VERSION: ${GFPGAN_MODEL_VERSION}
UPSCALE_FACTOR: ${UPSCALE_FACTOR}
FPS: ${FPS}
WAV2LIP_PORT: ${WAV2LIP_PORT}
restart: unless-stopped
animation:
image: ${REGISTRY:-opea}/animation:${TAG:-latest}
container_name: animation-server
ports:
- "3008:9066"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
WAV2LIP_ENDPOINT: ${WAV2LIP_ENDPOINT}
restart: unless-stopped
avatarchatbot-backend-server:
image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
container_name: avatarchatbot-backend-server
depends_on:
- asr
- llm
- tts
- animation
ports:
- "3009:8888"
environment:
no_proxy: ${no_proxy}
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT}
ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP}
ASR_SERVICE_PORT: ${ASR_SERVICE_PORT}
LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP}
LLM_SERVICE_PORT: ${LLM_SERVICE_PORT}
LLM_SERVER_HOST_IP: ${LLM_SERVICE_HOST_IP}
LLM_SERVER_PORT: ${LLM_SERVICE_PORT}
TTS_SERVICE_HOST_IP: ${TTS_SERVICE_HOST_IP}
TTS_SERVICE_PORT: ${TTS_SERVICE_PORT}
ANIMATION_SERVICE_HOST_IP: ${ANIMATION_SERVICE_HOST_IP}
ANIMATION_SERVICE_PORT: ${ANIMATION_SERVICE_PORT}
WHISPER_SERVER_HOST_IP: ${WHISPER_SERVER_HOST_IP}
WHISPER_SERVER_PORT: ${WHISPER_SERVER_PORT}
SPEECHT5_SERVER_HOST_IP: ${SPEECHT5_SERVER_HOST_IP}
SPEECHT5_SERVER_PORT: ${SPEECHT5_SERVER_PORT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -1,47 +0,0 @@
#!/usr/bin/env bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export OPENAI_API_KEY=${OPENAI_API_KEY}
export host_ip=$(hostname -I | awk '{print $1}')
export TGI_SERVICE_PORT=3006
export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_SERVICE_PORT}
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export ASR_ENDPOINT=http://${host_ip}:7066
export TTS_ENDPOINT=http://${host_ip}:7055
export WAV2LIP_ENDPOINT=http://${host_ip}:7860
export WHISPER_SERVER_HOST_IP=${host_ip}
export WHISPER_SERVER_PORT=7066
export SPEECHT5_SERVER_HOST_IP=${host_ip}
export SPEECHT5_SERVER_PORT=7055
export MEGA_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export TTS_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ANIMATION_SERVICE_HOST_IP=${host_ip}
export MEGA_SERVICE_PORT=8888
export ASR_SERVICE_PORT=3001
export TTS_SERVICE_PORT=3002
export LLM_SERVICE_PORT=3007
export ANIMATION_SERVICE_PORT=3008
export DEVICE="cpu"
export WAV2LIP_PORT=7860
export INFERENCE_MODE='wav2lip+gfpgan'
export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
export FACE="/home/user/comps/animation/src/assets/img/avatar5.png"
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
export AUDIO='None'
export FACESIZE=96
export OUTFILE="/outputs/result.mp4"
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
export UPSCALE_FACTOR=1
export FPS=10

View File

@@ -1,170 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -e
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
if ls $LOG_PATH/*.log 1> /dev/null 2>&1; then
rm $LOG_PATH/*.log
echo "Log files removed."
else
echo "No log files to remove."
fi
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="avatarchatbot whisper asr llm-textgen speecht5 tts wav2lip animation"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
docker images && sleep 3s
}
function start_services() {
cd $WORKPATH/docker_compose/amd/gpu/rocm
export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
export OPENAI_API_KEY=$OPENAI_API_KEY
export host_ip=${ip_address}
export TGI_SERVICE_PORT=3006
export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_SERVICE_PORT}
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export ASR_ENDPOINT=http://${host_ip}:7066
export TTS_ENDPOINT=http://${host_ip}:7055
export WAV2LIP_ENDPOINT=http://${host_ip}:7860
export MEGA_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export TTS_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ANIMATION_SERVICE_HOST_IP=${host_ip}
export WHISPER_SERVER_HOST_IP=${host_ip}
export WHISPER_SERVER_PORT=7066
export SPEECHT5_SERVER_HOST_IP=${host_ip}
export SPEECHT5_SERVER_PORT=7055
export MEGA_SERVICE_PORT=8888
export ASR_SERVICE_PORT=3001
export TTS_SERVICE_PORT=3002
export LLM_SERVICE_PORT=3007
export ANIMATION_SERVICE_PORT=3008
export DEVICE="cpu"
export WAV2LIP_PORT=7860
export INFERENCE_MODE='wav2lip+gfpgan'
export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
export FACE="/home/user/comps/animation/src/assets/img/avatar5.png"
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
export AUDIO='None'
export FACESIZE=96
export OUTFILE="./outputs/result.mp4"
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
export UPSCALE_FACTOR=1
export FPS=5
# Start Docker Containers
docker compose up -d --force-recreate
echo "Check tgi-service status"
n=0
until [[ "$n" -ge 100 ]]; do
docker logs tgi-service > $LOG_PATH/tgi_service_start.log
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
echo "tgi-service are up and running"
sleep 5s
echo "Check wav2lip-service status"
n=0
until [[ "$n" -ge 100 ]]; do
docker logs wav2lip-service >& $LOG_PATH/wav2lip-service_start.log
if grep -q "Application startup complete" $LOG_PATH/wav2lip-service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
echo "wav2lip-service are up and running"
sleep 5s
}
function validate_megaservice() {
cd $WORKPATH
ls
result=$(http_proxy="" curl http://${ip_address}:3009/v1/avatarchatbot -X POST -d @assets/audio/sample_whoareyou.json -H 'Content-Type: application/json')
echo "result is === $result"
if [[ $result == *"mp4"* ]]; then
echo "Result correct."
else
docker logs whisper-service > $LOG_PATH/whisper-service.log
docker logs asr-service > $LOG_PATH/asr-service.log
docker logs speecht5-service > $LOG_PATH/speecht5-service.log
docker logs tts-service > $LOG_PATH/tts-service.log
docker logs tgi-service > $LOG_PATH/tgi-service.log
docker logs llm-tgi-server > $LOG_PATH/llm-tgi-server.log
docker logs wav2lip-service > $LOG_PATH/wav2lip-service.log
docker logs animation-server > $LOG_PATH/animation-server.log
echo "Result wrong."
exit 1
fi
}
#function validate_frontend() {
#}
function stop_docker() {
cd $WORKPATH/docker_compose/amd/gpu/rocm
docker compose down && docker compose rm -f
}
function main() {
echo $OPENAI_API_KEY
echo $OPENAI_KEY
stop_docker
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_services
# validate_microservices
sleep 30
validate_megaservice
# validate_frontend
stop_docker
echo y | docker system prune
}
main

View File

@@ -91,14 +91,6 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
docker compose up -d
```
To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
CPU example with Open Telemetry feature:
```bash
cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
docker compose -f compose.yaml -f compose.telemetry.yaml up -d
```
It will automatically download the docker image on `docker hub`:
```bash
@@ -240,13 +232,6 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/
docker compose up -d
```
To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
```bash
cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/
docker compose -f compose.yaml -f compose.telemetry.yaml up -d
```
Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) to build docker images from source.
### Deploy ChatQnA on Xeon
@@ -258,13 +243,6 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
docker compose up -d
```
To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
```bash
cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
docker compose -f compose.yaml -f compose.telemetry.yaml up -d
```
Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for more instructions on building docker images from source.
### Deploy ChatQnA on NVIDIA GPU
@@ -368,7 +346,7 @@ OPEA microservice deployment can easily be monitored through Grafana dashboards
## Tracing Services with OpenTelemetry Tracing and Jaeger
> NOTE: This feature is disabled by default. Please check the Deploy ChatQnA sessions for how to enable this feature with compose.telemetry.yaml file.
> NOTE: limited support. Only LLM inference serving with TGI on Gaudi is enabled for this feature.
OPEA microservice and TGI/TEI serving can easily be traced through Jaeger dashboards in conjunction with OpenTelemetry Tracing feature. Follow the [README](https://github.com/opea-project/GenAIComps/tree/main/comps/cores/telemetry#tracing) to trace additional functions if needed.
@@ -379,17 +357,8 @@ Users could also get the external IP via below command.
ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+'
```
Access the Jaeger dashboard UI at http://{EXTERNAL_IP}:16686
For TGI serving on Gaudi, users could see different services like opea, TEI and TGI.
![Screenshot from 2024-12-27 11-58-18](https://github.com/user-attachments/assets/6126fa70-e830-4780-bd3f-83cb6eff064e)
Here is a screenshot for one tracing of TGI serving request.
![Screenshot from 2024-12-27 11-26-25](https://github.com/user-attachments/assets/3a7c51c6-f422-41eb-8e82-c3df52cd48b8)
There are also OPEA related tracings. Users could understand the time breakdown of each service request by looking into each opea:schedule operation.
![image](https://github.com/user-attachments/assets/6137068b-b374-4ff8-b345-993343c0c25f)
There could be async function such as `llm/MicroService_asyn_generate` and user needs to check the trace of the async function in another operation like
opea:llm_generate_stream.
![image](https://github.com/user-attachments/assets/a973d283-198f-4ce2-a7eb-58515b77503e)

View File

@@ -34,36 +34,16 @@ To set up environment variables for deploying ChatQnA services, follow these ste
```
3. Set up other environment variables:
```bash
source ./set_env.sh
```
4. Change Model for LLM serving
By default, Meta-Llama-3-8B-Instruct is used for LLM serving, the default model can be changed to other validated LLM models.
Please pick a [validated llm models](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/src/text-generation#validated-llm-models) from the table.
To change the default model defined in set_env.sh, overwrite it by exporting LLM_MODEL_ID to the new model or by modifying set_env.sh, and then repeat step 3.
For example, change to Llama-2-7b-chat-hf using the following command.
```bash
export LLM_MODEL_ID="meta-llama/Llama-2-7b-chat-hf"
```
## Quick Start: 2.Run Docker Compose
```bash
docker compose up -d
```
To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
CPU example with Open Telemetry feature:
```bash
cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
docker compose -f compose.yaml -f compose.telemetry.yaml up -d
```
It will automatically download the docker image on `docker hub`:
```bash
@@ -283,16 +263,12 @@ If use vLLM as the LLM serving backend.
docker compose -f compose.yaml up -d
# Start ChatQnA without Rerank Pipeline
docker compose -f compose_without_rerank.yaml up -d
# Start ChatQnA with Rerank Pipeline and Open Telemetry Tracing
docker compose -f compose.yaml -f compose.telemetry.yaml up -d
```
If use TGI as the LLM serving backend.
```bash
docker compose -f compose_tgi.yaml up -d
# Start ChatQnA with Open Telemetry Tracing
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d
```
### Validate Microservices

View File

@@ -1,27 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
tei-embedding-service:
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
tei-reranking-service:
command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
jaeger:
image: jaegertracing/all-in-one:latest
container_name: jaeger
ports:
- "16686:16686"
- "4317:4317"
- "4318:4318"
- "9411:9411"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
COLLECTOR_ZIPKIN_HOST_PORT: 9411
restart: unless-stopped
chatqna-xeon-backend-server:
environment:
- ENABLE_OPEA_TELEMETRY=true
- TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}

View File

@@ -1,29 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
tei-embedding-service:
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
tei-reranking-service:
command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
tgi-service:
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
jaeger:
image: jaegertracing/all-in-one:latest
container_name: jaeger
ports:
- "16686:16686"
- "4317:4317"
- "4318:4318"
- "9411:9411"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
COLLECTOR_ZIPKIN_HOST_PORT: 9411
restart: unless-stopped
chatqna-xeon-backend-server:
environment:
- ENABLE_OPEA_TELEMETRY=true
- TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}

View File

@@ -14,7 +14,3 @@ export INDEX_NAME="rag-redis"
# Set it as a non-null string, such as true, if you want to enable logging facility,
# otherwise, keep it as "" to disable it.
export LOGFLAG=""
# Set OpenTelemetry Tracing Endpoint
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces

View File

@@ -10,7 +10,7 @@ Quick Start:
2. Run Docker Compose.
3. Consume the ChatQnA Service.
Note: The default LLM is `meta-llama/Meta-Llama-3-8B-Instruct`. Before deploying the application, please make sure either you've requested and been granted the access to it on [Huggingface](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) or you've downloaded the model locally from [ModelScope](https://www.modelscope.cn/models). We now support running the latest DeepSeek models, including [deepseek-ai/DeepSeek-R1-Distill-Llama-70B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B) and [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) on Gaudi accelerators. To run `deepseek-ai/DeepSeek-R1-Distill-Llama-70B`, update the `LLM_MODEL_ID` and configure `NUM_CARDS` to 8 in the [set_env.sh](./set_env.sh) script. To run `deepseek-ai/DeepSeek-R1-Distill-Qwen-32B`, update the `LLM_MODEL_ID` and configure `NUM_CARDS` to 4 in the [set_env.sh](./set_env.sh) script.
Note: The default LLM is `meta-llama/Meta-Llama-3-8B-Instruct`. Before deploying the application, please make sure either you've requested and been granted the access to it on [Huggingface](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) or you've downloaded the model locally from [ModelScope](https://www.modelscope.cn/models).
## Quick Start: 1.Setup Environment Variable
@@ -39,37 +39,12 @@ To set up environment variables for deploying ChatQnA services, follow these ste
source ./set_env.sh
```
4. Change Model for LLM serving
By default, Meta-Llama-3-8B-Instruct is used for LLM serving, the default model can be changed to other validated LLM models.
Please pick a [validated llm models](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/src/text-generation#validated-llm-models) from the table.
To change the default model defined in set_env.sh, overwrite it by exporting LLM_MODEL_ID to the new model or by modifying set_env.sh, and then repeat step 3.
For example, change to DeepSeek-R1-Distill-Qwen-32B using the following command.
```bash
export LLM_MODEL_ID="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
```
Please also check [required gaudi cards for different models](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/src/text-generation#system-requirements-for-llm-models) for new models.
It might be necessary to increase the number of Gaudi cards for the model by exporting NUM_CARDS to the new model or by modifying set_env.sh, and then repeating step 3. For example, increase the number of Gaudi cards for DeepSeek-R1-
Distill-Qwen-32B using the following command:
```bash
export NUM_CARDS=4
```
## Quick Start: 2.Run Docker Compose
```bash
docker compose up -d
```
To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
```bash
docker compose -f compose.yaml -f compose.telemetry.yaml up -d
```
It will automatically download the docker image on `docker hub`:
```bash
@@ -284,16 +259,12 @@ If use vLLM as the LLM serving backend.
docker compose -f compose.yaml up -d
# Start ChatQnA without Rerank Pipeline
docker compose -f compose_without_rerank.yaml up -d
# Start ChatQnA with Rerank Pipeline and Open Telemetry Tracing
docker compose -f compose.yaml -f compose.telemetry.yaml up -d
```
If use TGI as the LLM serving backend.
```bash
docker compose -f compose_tgi.yaml up -d
# Start ChatQnA with Open Telemetry Tracing
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d
```
If you want to enable guardrails microservice in the pipeline, please follow the below command instead:

View File

@@ -1,27 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
tei-embedding-service:
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
tei-reranking-service:
command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
jaeger:
image: jaegertracing/all-in-one:latest
container_name: jaeger
ports:
- "16686:16686"
- "4317:4317"
- "4318:4318"
- "9411:9411"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
COLLECTOR_ZIPKIN_HOST_PORT: 9411
restart: unless-stopped
chatqna-gaudi-backend-server:
environment:
- ENABLE_OPEA_TELEMETRY=true
- TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}

View File

@@ -92,7 +92,6 @@ services:
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
LLM_MODEL_ID: ${LLM_MODEL_ID}
NUM_CARDS: ${NUM_CARDS}
VLLM_TORCH_PROFILER_DIR: "/mnt"
healthcheck:
test: ["CMD-SHELL", "curl -f http://$host_ip:8007/health || exit 1"]
@@ -103,7 +102,7 @@ services:
cap_add:
- SYS_NICE
ipc: host
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
chatqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server

View File

@@ -133,13 +133,12 @@ services:
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
LLM_MODEL_ID: ${LLM_MODEL_ID}
NUM_CARDS: ${NUM_CARDS}
VLLM_TORCH_PROFILER_DIR: "/mnt"
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
chatqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna-guardrails:${TAG:-latest}
container_name: chatqna-gaudi-guardrails-server

View File

@@ -1,29 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
tei-embedding-service:
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
tei-reranking-service:
command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
tgi-service:
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
jaeger:
image: jaegertracing/all-in-one:latest
container_name: jaeger
ports:
- "16686:16686"
- "4317:4317"
- "4318:4318"
- "9411:9411"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
COLLECTOR_ZIPKIN_HOST_PORT: 9411
restart: unless-stopped
chatqna-gaudi-backend-server:
environment:
- ENABLE_OPEA_TELEMETRY=true
- TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}

View File

@@ -25,6 +25,7 @@ services:
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
TELEMETRY_ENDPOINT: ${TELEMETRY_ENDPOINT}
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-gaudi-server
@@ -37,7 +38,7 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
retriever:
image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
container_name: retriever-redis-server
@@ -55,6 +56,7 @@ services:
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
TELEMETRY_ENDPOINT: ${TELEMETRY_ENDPOINT}
LOGFLAG: ${LOGFLAG}
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
restart: unless-stopped
@@ -78,7 +80,7 @@ services:
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
container_name: tgi-gaudi-server
@@ -99,12 +101,26 @@ services:
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
NUM_CARDS: ${NUM_CARDS}
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model-id ${LLM_MODEL_ID} --num-shard ${NUM_CARDS} --max-input-length 2048 --max-total-tokens 4096
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
jaeger:
image: jaegertracing/all-in-one:latest
container_name: jaeger
ports:
- "16686:16686"
- "4317:4317"
- "4318:4318"
- "9411:9411"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
COLLECTOR_ZIPKIN_HOST_PORT: 9411
restart: unless-stopped
chatqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
@@ -130,6 +146,7 @@ services:
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LLM_MODEL=${LLM_MODEL_ID}
- LOGFLAG=${LOGFLAG}
- TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
ipc: host
restart: always
chatqna-gaudi-ui-server:

View File

@@ -73,13 +73,12 @@ services:
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
LLM_MODEL_ID: ${LLM_MODEL_ID}
NUM_CARDS: ${NUM_CARDS}
VLLM_TORCH_PROFILER_DIR: "/mnt"
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
chatqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna-without-rerank:${TAG:-latest}
container_name: chatqna-gaudi-backend-server

View File

@@ -11,7 +11,6 @@ export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export INDEX_NAME="rag-redis"
export NUM_CARDS=1
# Set it as a non-null string, such as true, if you want to enable logging facility,
# otherwise, keep it as "" to disable it.
export LOGFLAG=""

View File

@@ -29,6 +29,7 @@ function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna-guardrails chatqna-ui dataprep retriever vllm-gaudi guardrails nginx"
@@ -46,7 +47,6 @@ function start_services() {
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export NUM_CARDS=1
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export GURADRAILS_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"

View File

@@ -29,6 +29,7 @@ function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui dataprep retriever vllm-gaudi nginx"
@@ -44,16 +45,12 @@ function start_services() {
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export NUM_CARDS=1
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export host_ip=${ip_address}
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
# Start Docker Containers
docker compose -f compose.yaml -f compose.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 160 ]]; do
echo "n=$n"
@@ -174,7 +171,7 @@ function validate_frontend() {
function stop_docker() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
docker compose -f compose.yaml -f compose.telemetry.yaml down
docker compose -f compose.yaml down
}
function main() {

View File

@@ -49,12 +49,9 @@ function start_services() {
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export host_ip=${ip_address}
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
# Start Docker Containers
docker compose -f compose.yaml -f compose.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 100 ]]; do
docker logs vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1
@@ -175,7 +172,7 @@ function validate_frontend() {
function stop_docker() {
cd $WORKPATH/docker_compose/intel/cpu/xeon
docker compose -f compose.yaml -f compose.telemetry.yaml down
docker compose -f compose.yaml down
}
function main() {

View File

@@ -46,7 +46,6 @@ function start_services() {
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export NUM_CARDS=1
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
@@ -54,7 +53,7 @@ function start_services() {
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
# Start Docker Containers
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 500 ]]; do
@@ -218,7 +217,7 @@ function validate_frontend() {
function stop_docker() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml down
docker compose -f compose_tgi.yaml down
}
function main() {

View File

@@ -48,12 +48,9 @@ function start_services() {
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
# Start Docker Containers
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 100 ]]; do
@@ -219,7 +216,7 @@ function validate_frontend() {
function stop_docker() {
cd $WORKPATH/docker_compose/intel/cpu/xeon
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml down
docker compose -f compose_tgi.yaml down
}
function main() {

View File

@@ -29,6 +29,7 @@ function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
sed -i 's/triton/triton==3.1.0/g' vllm-fork/requirements-hpu.txt
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna-without-rerank chatqna-ui dataprep retriever vllm-gaudi nginx"
@@ -44,7 +45,6 @@ function start_services() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export NUM_CARDS=1
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}

View File

@@ -1,126 +0,0 @@
# Deploy on AMD GPU
This document outlines the deployment process for DBQnA application which helps generating a SQL query and its output given a NLP question, utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on an AMD GPU. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices. We will publish the Docker images to Docker Hub soon, which will simplify the deployment process for this service.
## 🚀 Build Docker Images
First of all, you need to build Docker Images locally. This step can be ignored once the Docker images are published to Docker hub.
### 1.1 Build Text to SQL service Image
```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
docker build --no-cache -t opea/texttosql:latest -f comps/text2sql/src/Dockerfile .
```
### 1.2 Build react UI Docker Image
Build the frontend Docker image based on react framework via below command:
```bash
cd GenAIExamples/DBQnA/ui
docker build --no-cache -t opea/dbqna-react-ui:latest --build-arg texttosql_url=$textToSql_host:$textToSql_port/v1 -f docker/Dockerfile.react .
```
Attention! Replace $textToSql_host and $textToSql_port with your own value.
Then run the command `docker images`, you will have the following Docker Images:
1. `opea/texttosql:latest`
2. `opea/dbqna-react-ui:latest`
## 🚀 Start Microservices
### Required Models
We set default model as "mistralai/Mistral-7B-Instruct-v0.3", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models.
If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.
### 2.1 Setup Environment Variables
Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
```bash
export host_ip="host_ip_address_or_dns_name"
export DBQNA_HUGGINGFACEHUB_API_TOKEN=""
export DBQNA_TGI_SERVICE_PORT=8008
export DBQNA_TGI_LLM_ENDPOINT="http://${host_ip}:${DBQNA_TGI_SERVICE_PORT}"
export DBQNA_LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export POSTGRES_USER="postgres"
export POSTGRES_PASSWORD="testpwd"
export POSTGRES_DB="chinook"
export DBQNA_TEXT_TO_SQL_PORT=18142
export DBQNA_UI_PORT=18143
```
Note: Please replace with `host_ip_address_or_dns_name` with your external IP address or DNS name, do not use localhost.
### 2.2 Start Microservice Docker Containers
There are 2 options to start the microservice
#### 2.2.1 Start the microservice using docker compose
```bash
cd GenAIExamples/DBQnA/docker_compose/amd/gpu/rocm
docker compose up -d
```
## 🚀 Validate Microservices
### 3.1 TGI Service
```bash
curl http://${host_ip}:$DBQNA_TGI_SERVICE_PORT/generate \
-X POST \
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-H 'Content-Type: application/json'
```
### 3.2 Postgres Microservice
Once Text-to-SQL microservice is started, user can use below command
#### 3.2.1 Test the Database connection
```bash
curl --location http://${host_ip}:${DBQNA_TEXT_TO_SQL_PORT}/v1/postgres/health \
--header 'Content-Type: application/json' \
--data '{"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${host_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}'
```
#### 3.2.2 Invoke the microservice.
```bash
curl http://${host_ip}:${DBQNA_TEXT_TO_SQL_PORT}/v1/texttosql \
-X POST \
-d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${host_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}}' \
-H 'Content-Type: application/json'
```
### 3.3 Frontend validation
We test the API in frontend validation to check if API returns HTTP_STATUS: 200 and validates if API response returns SQL query and output
The test is present in App.test.tsx under react root folder ui/react/
Command to run the test
```bash
npm run test
```
## 🚀 Launch the React UI
Open this URL `http://${host_ip}:${DBQNA_UI_PORT}` in your browser to access the frontend.
![project-screenshot](../../../../assets/img/dbQnA_ui_init.png)
Test DB Connection
![project-screenshot](../../../../assets/img/dbQnA_ui_successful_db_connection.png)
Create SQL query and output for given NLP question
![project-screenshot](../../../../assets/img/dbQnA_ui_succesful_sql_output_generation.png)

File diff suppressed because it is too large Load Diff

View File

@@ -1,75 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
version: "3.8"
services:
dbqna-tgi-service:
image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
container_name: dbqna-tgi-service
ports:
- "${DBQNA_TGI_SERVICE_PORT:-8008}:80"
volumes:
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_SERVICE_PORT: ${DBQNA_TGI_SERVICE_PORT}
MODEL_ID: ${DBQNA_LLM_MODEL_ID}
HUGGING_FACE_HUB_TOKEN: ${DBQNA_HUGGINGFACEHUB_API_TOKEN}
HUGGINGFACEHUB_API_TOKEN: ${DBQNA_HUGGINGFACEHUB_API_TOKEN}
shm_size: 1g
devices:
- /dev/kfd:/dev/kfd
- /dev/dri/:/dev/dri/
cap_add:
- SYS_PTRACE
group_add:
- video
security_opt:
- seccomp:unconfined
ipc: host
command: --model-id ${MODEL_ID} --max-input-length 2048 --max-total-tokens 4096
postgres:
image: postgres:latest
container_name: postgres-container
restart: always
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB}
ports:
- '5442:5432'
volumes:
- ./chinook.sql:/docker-entrypoint-initdb.d/chinook.sql
text2sql:
image: opea/text2sql:latest
container_name: text2sql
ports:
- "${DBQNA_TEXT_TO_SQL_PORT:-9090}:8080"
environment:
TGI_LLM_ENDPOINT: ${DBQNA_TGI_LLM_ENDPOINT}
text2sql-react-ui:
image: opea/text2sql-react-ui:latest
container_name: text2sql-react-ui
depends_on:
- text2sql
ports:
- "${DBQNA_UI_PORT:-5174}:80"
environment:
no_proxy: ${no_proxy}
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
texttosql_port: ${texttosql_port}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -1,16 +0,0 @@
#!/usr/bin/env bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
export host_ip=""
export DBQNA_HUGGINGFACEHUB_API_TOKEN=""
export DBQNA_TGI_SERVICE_PORT=8008
export DBQNA_TGI_LLM_ENDPOINT="http://${host_ip}:${DBQNA_TGI_SERVICE_PORT}"
export DBQNA_LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export MODEL_ID=${DBQNA_LLM_MODEL_ID}
export POSTGRES_USER="postgres"
export POSTGRES_PASSWORD="testpwd"
export POSTGRES_DB="chinook"
export DBQNA_TEXT_TO_SQL_PORT=9090
export DBQNA_UI_PORT=5174

View File

@@ -19,5 +19,4 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
no_proxy: ${no_proxy}
texttosql_url: ${build_texttosql_url}
image: ${REGISTRY:-opea}/text2sql-react-ui:${TAG:-latest}

View File

@@ -1,120 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -xe
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
tgi_port=8008
tgi_volume=$WORKPATH/data
export host_ip=${ip_address}
export DBQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export DBQNA_TGI_SERVICE_PORT=8008
export DBQNA_TGI_LLM_ENDPOINT="http://${host_ip}:${DBQNA_TGI_SERVICE_PORT}"
export DBQNA_LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export MODEL_ID=${DBQNA_LLM_MODEL_ID}
export POSTGRES_USER="postgres"
export POSTGRES_PASSWORD="testpwd"
export POSTGRES_DB="chinook"
export DBQNA_TEXT_TO_SQL_PORT=9090
export DBQNA_UI_PORT=5174
export build_texttosql_url="${ip_address}:${DBQNA_TEXT_TO_SQL_PORT}/v1"
function build_docker_images() {
cd "$WORKPATH"/docker_image_build
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="text2sql text2sql-react-ui"
docker compose -f build.yaml build ${service_list} --no-cache > "${LOG_PATH}"/docker_image_build.log
docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
docker images && sleep 1s
}
function start_service() {
cd "$WORKPATH"/docker_compose/amd/gpu/rocm
# Start Docker Containers
docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log
n=0
until [[ "$n" -ge 100 ]]; do
docker logs dbqna-tgi-service > "${LOG_PATH}"/tgi_service_start.log
if grep -q Connected "${LOG_PATH}"/tgi_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
}
function validate_microservice() {
result=$(http_proxy="" curl --connect-timeout 5 --max-time 120000 http://${ip_address}:${DBQNA_TEXT_TO_SQL_PORT}/v1/text2sql \
-X POST \
-d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${ip_address}'", "port": "5442", "database": "'${POSTGRES_DB}'" }}' \
-H 'Content-Type: application/json')
if [[ $result == *"output"* ]]; then
echo $result
echo "Result correct."
else
echo "Result wrong. Received was $result"
docker logs text2sql > ${LOG_PATH}/text2sql.log
docker logs dbqna-tgi-service > ${LOG_PATH}/tgi.log
exit 1
fi
}
function validate_frontend() {
echo "[ TEST INFO ]: --------- frontend test started ---------"
cd $WORKPATH/ui/react
local conda_env_name="OPEA_e2e"
export PATH=${HOME}/miniconda3/bin/:$PATH
if conda info --envs | grep -q "$conda_env_name"; then
echo "$conda_env_name exist!"
else
conda create -n ${conda_env_name} python=3.12 -y
fi
source activate ${conda_env_name}
echo "[ TEST INFO ]: --------- conda env activated ---------"
conda install -c conda-forge nodejs=22.6.0 -y
npm install && npm ci
node -v && npm -v && pip list
exit_status=0
npm run test || exit_status=$?
if [ $exit_status -ne 0 ]; then
echo "[TEST INFO]: ---------frontend test failed---------"
exit $exit_status
else
echo "[TEST INFO]: ---------frontend test passed---------"
fi
}
function stop_docker() {
cd $WORKPATH/docker_compose/amd/gpu/rocm/
docker compose stop && docker compose rm -f
}
function main() {
stop_docker
build_docker_images
start_service
sleep 10s
validate_microservice
validate_frontend
stop_docker
echo y | docker system prune
}
main

View File

@@ -3,13 +3,8 @@
# Stage 1: Build the React application using Node.js
# Use Node 20.11.1 as the base image for the build step
FROM node:20.11.1 AS vite-app
ARG texttosql_url
ENV TEXT_TO_SQL_URL=$texttosql_url
WORKDIR /usr/app/react
COPY react /usr/app/react
@@ -21,10 +16,6 @@ RUN ["npm", "run", "build"]
FROM nginx:alpine
ARG texttosql_url
ENV TEXT_TO_SQL_URL=$texttosql_url
EXPOSE 80
COPY --from=vite-app /usr/app/react/dist /usr/share/nginx/html

View File

@@ -1 +1 @@
VITE_TEXT_TO_SQL_URL=${TEXT_TO_SQL_URL}
VITE_TEXT_TO_SQL_URL=http://${HOSTNAME}:9090/v1

View File

@@ -26,7 +26,7 @@ test('testing api with dynamic host', async () => {
const formData = {
user: 'postgres',
database: 'chinook',
host: host,
host: host, // Dynamic IP
password: 'testpwd',
port: '5442',
};

View File

@@ -24,7 +24,7 @@ export default defineConfig({
},
define: {
// Dynamically set the hostname for the VITE_TEXT_TO_SQL_URL
"import.meta.env.VITE_TEXT_TO_SQL_URL": JSON.stringify(`http://${process.env.TEXT_TO_SQL_URL}`),
"import.meta.env.VITE_TEXT_TO_SQL_URL": JSON.stringify(`http://${os.hostname()}:9090/v1`),
"import.meta.env": process.env,
},
});

View File

@@ -8,7 +8,7 @@ services:
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "${DATA_PATH:-./data}:/data"
- "${DATA_PATH:-data}:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}

View File

@@ -296,8 +296,8 @@ class DocSumUI:
audio_ui.render()
with gr.TabItem("Upload Video"):
video_ui.render()
# with gr.TabItem("Enter URL"):
# url_ui.render()
with gr.TabItem("Enter URL"):
url_ui.render()
return self.demo
@@ -313,8 +313,6 @@ app = gr.mount_gradio_app(app, demo, path="/")
if __name__ == "__main__":
import argparse
import nltk
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default="0.0.0.0")
parser.add_argument("--port", type=int, default=5173)
@@ -322,8 +320,4 @@ if __name__ == "__main__":
args = parser.parse_args()
logger.info(">>> Starting server at %s:%d", args.host, args.port)
# Needed for UnstructuredURLLoader when reading content from a URL
nltk.download("punkt_tab")
nltk.download("averaged_perceptron_tagger_eng")
uvicorn.run(app, host=args.host, port=args.port)

View File

@@ -8,7 +8,7 @@ services:
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "${DATA_PATH:-./data}:/data"
- "${DATA_PATH:-data}:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}

View File

View File

@@ -52,7 +52,6 @@ export async function fetchTextStream(query: string | Blob, params: string, file
}
const reader = postResponse.body.getReader();
const decoder = new TextDecoder("utf-8");
let done, value;
let buffer = ""; // Initialize a buffer
@@ -62,7 +61,6 @@ export async function fetchTextStream(query: string | Blob, params: string, file
// Decode chunk and append to buffer
const chunk = decoder.decode(value, { stream: true });
buffer += chunk;
// Use regex to clean and extract data
@@ -74,21 +72,6 @@ export async function fetchTextStream(query: string | Blob, params: string, file
})
.filter((line) => line); // Remove empty lines
const validJsonChunks = cleanedChunks.filter((item) => {
if (item === "[DONE]") {
return true;
}
try {
JSON.parse(item);
return true;
} catch (e) {
return false;
}
});
cleanedChunks.length = 0;
cleanedChunks.push(...validJsonChunks);
for (const cleanedChunk of cleanedChunks) {
// Further clean to ensure all unnecessary parts are removed
yield cleanedChunk.replace(/^b'|['"]$/g, ""); // Again clean 'b' and other single or double quotes

View File

@@ -36,7 +36,6 @@
urlSuffix: string,
params: string
) => {
messages = "";
// Fetch the stream
const eventStream = await fetchTextStream(
query,

View File

@@ -50,7 +50,7 @@ Deployment are based on released docker images by default, check [docker image l
| CodeTrans | [Xeon Instructions](CodeTrans/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](CodeTrans/docker_compose/intel/hpu/gaudi/README.md) | [ROCm Instructions](CodeTrans/docker_compose/amd/gpu/rocm/README.md) | [CodeTrans with Helm Charts](CodeTrans/kubernetes/helm/README.md) | [CodeTrans with GMC](CodeTrans/kubernetes/gmc/README.md) |
| DocSum | [Xeon Instructions](DocSum/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](DocSum/docker_compose/intel/hpu/gaudi/README.md) | [ROCm Instructions](DocSum/docker_compose/amd/gpu/rocm/README.md) | [DocSum with Helm Charts](DocSum/kubernetes/helm/README.md) | [DocSum with GMC](DocSum/kubernetes/gmc/README.md) |
| SearchQnA | [Xeon Instructions](SearchQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](SearchQnA/docker_compose/intel/hpu/gaudi/README.md) | Not Supported | [SearchQnA with Helm Charts](SearchQnA/kubernetes/helm/README.md) | [SearchQnA with GMC](SearchQnA/kubernetes/gmc/README.md) |
| FaqGen | [Xeon Instructions](FaqGen/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](FaqGen/docker_compose/intel/hpu/gaudi/README.md) | [ROCm Instructions](FaqGen/docker_compose/amd/gpu/rocm/README.md) | [FaqGen with Helm Charts](FaqGen/kubernetes/helm/README.md) | Not supported |
| FaqGen | [Xeon Instructions](FaqGen/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](FaqGen/docker_compose/intel/hpu/gaudi/README.md) | [ROCm Instructions](FaqGen/docker_compose/amd/gpu/rocm/README.md) | [FaqGen with Helm Charts](FaqGen/kubernetes/helm/README.md) | [FaqGen with GMC](FaqGen/kubernetes/gmc/README.md) |
| Translation | [Xeon Instructions](Translation/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](Translation/docker_compose/intel/hpu/gaudi/README.md) | [ROCm Instructions](Translation/docker_compose/amd/gpu/rocm/README.md) | Not Supported | [Translation with GMC](Translation/kubernetes/gmc/README.md) |
| AudioQnA | [Xeon Instructions](AudioQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](AudioQnA/docker_compose/intel/hpu/gaudi/README.md) | [ROCm Instructions](AudioQnA/docker_compose/amd/gpu/rocm/README.md) | [AudioQnA with Helm Charts](AudioQnA/kubernetes/helm/README.md) | [AudioQnA with GMC](AudioQnA/kubernetes/gmc/README.md) |
| VisualQnA | [Xeon Instructions](VisualQnA/docker_compose/intel/cpu/xeon/README.md) | [Gaudi Instructions](VisualQnA/docker_compose/intel/hpu/gaudi/README.md) | [ROCm Instructions](VisualQnA/docker_compose/amd/gpu/rocm/README.md) | [VisualQnA with Helm Charts](VisualQnA/kubernetes/helm/README.md) | [VisualQnA with GMC](VisualQnA/kubernetes/gmc/README.md) |

View File

@@ -11,12 +11,3 @@ services:
context: GenAIComps
dockerfile: comps/finetuning/src/Dockerfile
image: ${REGISTRY:-opea}/finetuning:${TAG:-latest}
finetuning-gaudi:
build:
args:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
no_proxy: ${no_proxy}
context: GenAIComps
dockerfile: comps/finetuning/src/Dockerfile.intel_hpu
image: ${REGISTRY:-opea}/finetuning-gaudi:${TAG:-latest}

View File

@@ -1,131 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -x
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
finetuning_service_port=8015
ray_port=8265
service_name=finetuning-gaudi
function build_docker_images() {
cd $WORKPATH/docker_image_build
if [ ! -d "GenAIComps" ] ; then
git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git
fi
docker compose -f build.yaml build ${service_name} --no-cache > ${LOG_PATH}/docker_image_build.log
}
function start_service() {
export no_proxy="localhost,127.0.0.1,"${ip_address}
docker run -d --name="finetuning-server" -p $finetuning_service_port:$finetuning_service_port -p $ray_port:$ray_port --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy ${IMAGE_REPO}/finetuning-gaudi:${IMAGE_TAG}
sleep 1m
}
function validate_microservice() {
cd $LOG_PATH
export no_proxy="localhost,127.0.0.1,"${ip_address}
# test /v1/dataprep upload file
URL="http://${ip_address}:$finetuning_service_port/v1/files"
cat <<EOF > test_data.json
{"query": "Five women walk along a beach wearing flip-flops.", "pos": ["Some women with flip-flops on, are walking along the beach"], "neg": ["The 4 women are sitting on the beach.", "There was a reform in 1996.", "She's not going to court to clear her record.", "The man is talking about hawaii.", "A woman is standing outside.", "The battle was over. ", "A group of people plays volleyball."]}
{"query": "A woman standing on a high cliff on one leg looking over a river.", "pos": ["A woman is standing on a cliff."], "neg": ["A woman sits on a chair.", "George Bush told the Republicans there was no way he would let them even consider this foolish idea, against his top advisors advice.", "The family was falling apart.", "no one showed up to the meeting", "A boy is sitting outside playing in the sand.", "Ended as soon as I received the wire.", "A child is reading in her bedroom."]}
{"query": "Two woman are playing instruments; one a clarinet, the other a violin.", "pos": ["Some people are playing a tune."], "neg": ["Two women are playing a guitar and drums.", "A man is skiing down a mountain.", "The fatal dose was not taken when the murderer thought it would be.", "Person on bike", "The girl is standing, leaning against the archway.", "A group of women watch soap operas.", "No matter how old people get they never forget. "]}
{"query": "A girl with a blue tank top sitting watching three dogs.", "pos": ["A girl is wearing blue."], "neg": ["A girl is with three cats.", "The people are watching a funeral procession.", "The child is wearing black.", "Financing is an issue for us in public schools.", "Kids at a pool.", "It is calming to be assaulted.", "I face a serious problem at eighteen years old. "]}
{"query": "A yellow dog running along a forest path.", "pos": ["a dog is running"], "neg": ["a cat is running", "Steele did not keep her original story.", "The rule discourages people to pay their child support.", "A man in a vest sits in a car.", "Person in black clothing, with white bandanna and sunglasses waits at a bus stop.", "Neither the Globe or Mail had comments on the current state of Canada's road system. ", "The Spring Creek facility is old and outdated."]}
{"query": "It sets out essential activities in each phase along with critical factors related to those activities.", "pos": ["Critical factors for essential activities are set out."], "neg": ["It lays out critical activities but makes no provision for critical factors related to those activities.", "People are assembled in protest.", "The state would prefer for you to do that.", "A girl sits beside a boy.", "Two males are performing.", "Nobody is jumping", "Conrad was being plotted against, to be hit on the head."]}
EOF
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'file=@./test_data.json' -F purpose="fine-tune" -H 'Content-Type: multipart/form-data' "$URL")
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
SERVICE_NAME="finetuning-server - upload - file"
# Parse the JSON response
purpose=$(echo "$RESPONSE_BODY" | jq -r '.purpose')
filename=$(echo "$RESPONSE_BODY" | jq -r '.filename')
# Define expected values
expected_purpose="fine-tune"
expected_filename="test_data.json"
if [ "$HTTP_STATUS" -ne "200" ]; then
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_upload_file.log
exit 1
else
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
fi
# Check if the parsed values match the expected values
if [[ "$purpose" != "$expected_purpose" || "$filename" != "$expected_filename" ]]; then
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_upload_file.log
exit 1
else
echo "[ $SERVICE_NAME ] Content is as expected."
fi
# test /v1/fine_tuning/jobs
URL="http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs"
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' -d '{"training_file": "test_data.json","model": "BAAI/bge-reranker-base","General":{"task":"rerank","lora_config":null}}' "$URL")
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
SERVICE_NAME="finetuning-server - create finetuning job"
if [ "$HTTP_STATUS" -ne "200" ]; then
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_create.log
exit 1
else
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
fi
if [[ "$RESPONSE_BODY" != *'{"id":"ft-job'* ]]; then
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_create.log
exit 1
else
echo "[ $SERVICE_NAME ] Content is as expected."
fi
sleep 3m
docker logs finetuning-server 2>&1 | tee ${LOG_PATH}/finetuning-server_create.log
FINETUNING_LOG=$(grep "succeeded" ${LOG_PATH}/finetuning-server_create.log)
if [[ "$FINETUNING_LOG" != *'succeeded'* ]]; then
echo "Finetuning failed."
RAY_JOBID=$(grep "Submitted Ray job" ${LOG_PATH}/finetuning-server_create.log | sed 's/.*raysubmit/raysubmit/' | cut -d' ' -f 1)
docker exec finetuning-server python -c "import os;os.environ['RAY_ADDRESS']='http://localhost:8265';from ray.job_submission import JobSubmissionClient;client = JobSubmissionClient();print(client.get_job_logs('${RAY_JOBID}'))" 2>&1 | tee ${LOG_PATH}/finetuning.log
exit 1
else
echo "Finetuning succeeded."
fi
}
function stop_docker() {
cid=$(docker ps -aq --filter "name=finetuning-server*")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}
function main() {
stop_docker
build_docker_images
start_service
validate_microservice
stop_docker
echo y | docker system prune
}
main

View File

@@ -14,14 +14,13 @@ LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
finetuning_service_port=8015
ray_port=8265
service_name=finetuning
function build_docker_images() {
cd $WORKPATH/docker_image_build
if [ ! -d "GenAIComps" ] ; then
git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git
fi
docker compose -f build.yaml build ${service_name} --no-cache > ${LOG_PATH}/docker_image_build.log
docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
}
function start_service() {
@@ -95,18 +94,7 @@ EOF
echo "[ $SERVICE_NAME ] Content is as expected."
fi
sleep 3m
docker logs finetuning-server 2>&1 | tee ${LOG_PATH}/finetuning-server_create.log
FINETUNING_LOG=$(grep "succeeded" ${LOG_PATH}/finetuning-server_create.log)
if [[ "$FINETUNING_LOG" != *'succeeded'* ]]; then
echo "Finetuning failed."
RAY_JOBID=$(grep "Submitted Ray job" ${LOG_PATH}/finetuning-server_create.log | sed 's/.*raysubmit/raysubmit/' | cut -d' ' -f 1)
docker exec finetuning-server python -c "import os;os.environ['RAY_ADDRESS']='http://localhost:8265';from ray.job_submission import JobSubmissionClient;client = JobSubmissionClient();print(client.get_job_logs('${RAY_JOBID}'))" 2>&1 | tee ${LOG_PATH}/finetuning.log
exit 1
else
echo "Finetuning succeeded."
fi
sleep 1s
}
function stop_docker() {

View File

@@ -1,179 +0,0 @@
# Build and deploy SearchQnA Application on AMD GPU (ROCm)
## Build images
### Build Embedding Image
```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/Dockerfile .
```
### Build Retriever Image
```bash
docker build --no-cache -t opea/web-retriever-chroma:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/web_retrievers/src/Dockerfile .
```
### Build Rerank Image
```bash
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/rerankings/src/Dockerfile .
```
### Build the LLM Docker Image
```bash
docker build -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile .
```
### Build the MegaService Docker Image
```bash
git clone https://github.com/opea-project/GenAIExamples.git
cd GenAIExamples/SearchQnA
docker build --no-cache -t opea/searchqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
```
### Build the UI Docker Image
```bash
cd GenAIExamples/SearchQnA/ui
docker build --no-cache -t opea/opea/searchqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
```
## Deploy SearchQnA Application
### Features of Docker compose for AMD GPUs
1. Added forwarding of GPU devices to the container TGI service with instructions:
```yaml
shm_size: 1g
devices:
- /dev/kfd:/dev/kfd
- /dev/dri/:/dev/dri/
cap_add:
- SYS_PTRACE
group_add:
- video
security_opt:
- seccomp:unconfined
```
In this case, all GPUs are thrown. To reset a specific GPU, you need to use specific device names cardN and renderN.
For example:
```yaml
shm_size: 1g
devices:
- /dev/kfd:/dev/kfd
- /dev/dri/card0:/dev/dri/card0
- /dev/dri/render128:/dev/dri/render128
cap_add:
- SYS_PTRACE
group_add:
- video
security_opt:
- seccomp:unconfined
```
To find out which GPU device IDs cardN and renderN correspond to the same GPU, use the GPU driver utility
### Go to the directory with the Docker compose file
```bash
cd GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm
```
### Set environments
In the file "GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh " it is necessary to set the required values. Parameter assignments are specified in the comments for each variable setting command
```bash
chmod +x set_env.sh
. set_env.sh
```
### Run services
```
docker compose up -d
```
# Validate the MicroServices and MegaService
## Validate TEI service
```bash
curl http://${SEARCH_HOST_IP}:3001/embed \
-X POST \
-d '{"inputs":"What is Deep Learning?"}' \
-H 'Content-Type: application/json'
```
## Validate Embedding service
```bash
curl http://${SEARCH_HOST_IP}:3002/v1/embeddings\
-X POST \
-d '{"text":"hello"}' \
-H 'Content-Type: application/json'
```
## Validate Web Retriever service
```bash
export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
curl http://${SEARCH_HOST_IP}:3003/v1/web_retrieval \
-X POST \
-d "{\"text\":\"What is the 2024 holiday schedule?\",\"embedding\":${your_embedding}}" \
-H 'Content-Type: application/json'
```
## Validate TEI Reranking service
```bash
curl http://${SEARCH_HOST_IP}:3004/rerank \
-X POST \
-d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
-H 'Content-Type: application/json'
```
## Validate Reranking service
```bash
curl http://${SEARCH_HOST_IP}:3005/v1/reranking\
-X POST \
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-H 'Content-Type: application/json'
```
## Validate TGI service
```bash
curl http://${SEARCH_HOST_IP}:3006/generate \
-X POST \
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-H 'Content-Type: application/json'
```
## Validate LLM service
```bash
curl http://${SEARCH_HOST_IP}:3007/v1/chat/completions\
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json'
```
## Validate MegaService
```bash
curl http://${SEARCH_HOST_IP}:3008/v1/searchqna -H "Content-Type: application/json" -d '{
"messages": "What is the latest news? Give me also the source link.",
"stream": "True"
}'
```

View File

@@ -1,173 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
services:
search-tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: search-tei-embedding-server
ports:
- "3001:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
command: --model-id ${SEARCH_EMBEDDING_MODEL_ID} --auto-truncate
search-embedding:
image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
container_name: search-embedding-server
depends_on:
- search-tei-embedding-service
ports:
- "3002:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_HOST_IP: ${SEARCH_HOST_IP}
TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT}
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
search-web-retriever:
image: ${REGISTRY:-opea}/web-retriever:${TAG:-latest}
container_name: search-web-retriever-server
ports:
- "3003:7077"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT}
GOOGLE_API_KEY: ${SEARCH_GOOGLE_API_KEY}
GOOGLE_CSE_ID: ${SEARCH_GOOGLE_CSE_ID}
restart: unless-stopped
search-tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: search-tei-reranking-server
ports:
- "3004:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${SEARCH_RERANK_MODEL_ID} --auto-truncate
search-reranking:
image: ${REGISTRY:-opea}/reranking:${TAG:-latest}
container_name: search-reranking-server
depends_on:
- search-tei-reranking-service
ports:
- "3005:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${SEARCH_TEI_RERANKING_ENDPOINT}
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
search-tgi-service:
image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
container_name: search-tgi-service
ports:
- "3006:80"
volumes:
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
shm_size: 1g
devices:
- /dev/kfd:/dev/kfd
- /dev/dri/:/dev/dri/
cap_add:
- SYS_PTRACE
group_add:
- video
security_opt:
- seccomp:unconfined
ipc: host
command: --model-id ${SEARCH_LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
search-llm:
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
container_name: search-llm-server
depends_on:
- search-tgi-service
ports:
- "3007:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${SEARCH_TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
LLM_ENDPOINT: ${SEARCH_TGI_LLM_ENDPOINT}
LLM_MODEL_ID: ${SEARCH_LLM_MODEL_ID}
LLM_MODEL: ${SEARCH_LLM_MODEL_ID}
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
OPENAI_API_KEY: ${SEARCH_OPENAI_API_KEY}
restart: unless-stopped
search-backend-server:
image: ${REGISTRY:-opea}/searchqna:${TAG:-latest}
container_name: search-backend-server
depends_on:
- search-tei-embedding-service
- search-embedding
- search-web-retriever
- search-tei-reranking-service
- search-reranking
- search-tgi-service
- search-llm
ports:
- "${SEARCH_BACKEND_SERVICE_PORT:-3008}:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${SEARCH_MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${SEARCH_EMBEDDING_SERVICE_HOST_IP}
- WEB_RETRIEVER_SERVICE_HOST_IP=${SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${SEARCH_RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${SEARCH_LLM_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_PORT=${SEARCH_EMBEDDING_SERVICE_PORT}
- WEB_RETRIEVER_SERVICE_PORT=${SEARCH_WEB_RETRIEVER_SERVICE_PORT}
- RERANK_SERVICE_PORT=${SEARCH_RERANK_SERVICE_PORT}
- LLM_SERVICE_PORT=${SEARCH_LLM_SERVICE_PORT}
ipc: host
restart: always
search-ui-server:
image: ${REGISTRY:-opea}/searchqna-ui:${TAG:-latest}
container_name: search-ui-server
depends_on:
- search-backend-server
ports:
- "${SEARCH_FRONTEND_SERVICE_PORT:-5173}:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- BACKEND_BASE_URL=${SEARCH_BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -1,36 +0,0 @@
#!/usr/bin/env bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
export SEARCH_HOST_IP=10.53.22.29
export SEARCH_EXTERNAL_HOST_IP=68.69.180.77
export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5'
export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${SEARCH_HOST_IP}:3001
export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base'
export SEARCH_TEI_RERANKING_ENDPOINT=http://${SEARCH_HOST_IP}:3004
export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export SEARCH_OPENAI_API_KEY=${OPENAI_API_KEY}
export SEARCH_TGI_LLM_ENDPOINT=http://${SEARCH_HOST_IP}:3006
export SEARCH_LLM_MODEL_ID='Intel/neural-chat-7b-v3-3'
export SEARCH_MEGA_SERVICE_HOST_IP=${SEARCH_EXTERNAL_HOST_IP}
export SEARCH_EMBEDDING_SERVICE_HOST_IP=${SEARCH_HOST_IP}
export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${SEARCH_HOST_IP}
export SEARCH_RERANK_SERVICE_HOST_IP=${SEARCH_HOST_IP}
export SEARCH_LLM_SERVICE_HOST_IP=${SEARCH_HOST_IP}
export SEARCH_EMBEDDING_SERVICE_PORT=3002
export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003
export SEARCH_RERANK_SERVICE_PORT=3005
export SEARCH_LLM_SERVICE_PORT=3007
export SEARCH_FRONTEND_SERVICE_PORT=18143
export SEARCH_BACKEND_SERVICE_PORT=18142
export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${SEARCH_EXTERNAL_HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna
export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY}
export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID}

View File

@@ -1,137 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -xe
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="searchqna searchqna-ui embedding web-retriever reranking llm-textgen"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
docker images && sleep 1s
}
function start_services() {
cd $WORKPATH/docker_compose/amd/gpu/rocm/
export SEARCH_HOST_IP=${ip_address}
export SEARCH_EXTERNAL_HOST_IP=${ip_address}
export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5'
export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${SEARCH_HOST_IP}:3001
export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base'
export SEARCH_TEI_RERANKING_ENDPOINT=http://${SEARCH_HOST_IP}:3004
export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export SEARCH_OPENAI_API_KEY=${OPENAI_API_KEY}
export SEARCH_TGI_LLM_ENDPOINT=http://${SEARCH_HOST_IP}:3006
export SEARCH_LLM_MODEL_ID='Intel/neural-chat-7b-v3-3'
export SEARCH_MEGA_SERVICE_HOST_IP=${SEARCH_EXTERNAL_HOST_IP}
export SEARCH_EMBEDDING_SERVICE_HOST_IP=${SEARCH_HOST_IP}
export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${SEARCH_HOST_IP}
export SEARCH_RERANK_SERVICE_HOST_IP=${SEARCH_HOST_IP}
export SEARCH_LLM_SERVICE_HOST_IP=${SEARCH_HOST_IP}
export SEARCH_EMBEDDING_SERVICE_PORT=3002
export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003
export SEARCH_RERANK_SERVICE_PORT=3005
export SEARCH_LLM_SERVICE_PORT=3007
export SEARCH_FRONTEND_SERVICE_PORT=5173
export SEARCH_BACKEND_SERVICE_PORT=3008
export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${SEARCH_HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna
export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY}
export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID}
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
# Start Docker Containers
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 100 ]]; do
docker logs search-tgi-service > $LOG_PATH/search-tgi-service_start.log
if grep -q Connected $LOG_PATH/search-tgi-service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
}
function validate_megaservice() {
result=$(http_proxy="" curl http://${ip_address}:3008/v1/searchqna -XPOST -d '{"messages": "What is black myth wukong?", "stream": "False"}' -H 'Content-Type: application/json')
echo $result
if [[ $result == *"the"* ]]; then
docker logs search-web-retriever-server
docker logs search-backend-server
echo "Result correct."
else
docker logs search-web-retriever-server
docker logs search-backend-server
echo "Result wrong."
exit 1
fi
}
function validate_frontend() {
cd $WORKPATH/ui/svelte
local conda_env_name="OPEA_e2e"
export PATH=${HOME}/miniconda3/bin/:$PATH
if conda info --envs | grep -q "$conda_env_name"; then
echo "$conda_env_name exist!"
else
conda create -n ${conda_env_name} python=3.12 -y
fi
source activate ${conda_env_name}
sed -i "s/localhost/$ip_address/g" playwright.config.ts
conda install -c conda-forge nodejs=22.6.0 -y
npm install && npm ci && npx playwright install --with-deps
node -v && npm -v && pip list
exit_status=0
npx playwright test || exit_status=$?
if [ $exit_status -ne 0 ]; then
echo "[TEST INFO]: ---------frontend test failed---------"
exit $exit_status
else
echo "[TEST INFO]: ---------frontend test passed---------"
fi
}
function stop_docker() {
cd $WORKPATH/docker_compose/amd/gpu/rocm/
docker compose stop && docker compose rm -f
}
function main() {
stop_docker
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_services
validate_megaservice
validate_frontend
stop_docker
echo y | docker system prune
}
main

View File

@@ -23,7 +23,6 @@
"@tailwindcss/typography": "0.5.7",
"@types/debug": "4.1.7",
"@types/node": "^20.12.13",
"@types/pica": "^9.0.5",
"@typescript-eslint/eslint-plugin": "^5.27.0",
"@typescript-eslint/parser": "^5.27.0",
"autoprefixer": "^10.4.16",
@@ -52,7 +51,6 @@
"flowbite-svelte-icons": "^1.4.0",
"fuse.js": "^6.6.2",
"lodash": "^4.17.21",
"pica": "^9.0.1",
"playwright": "^1.44.0",
"ramda": "^0.29.0",
"sse.js": "^0.6.1",

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 303 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 93 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 230 KiB

View File

@@ -31,7 +31,9 @@
class={msg.role === 0
? "flex w-full gap-3"
: "flex w-full items-center gap-3"}
data-testid={msg.role === 0 ? "display-answer" : "display-question"}
data-testid={msg.role === 0
? "display-answer"
: "display-question"}
>
<div
class={msg.role === 0
@@ -42,15 +44,10 @@
</div>
<div class="group relative flex items-start">
<div class="flex flex-col items-start">
{#if msg.imgSrc}
<img
src={msg.imgSrc}
alt="Uploaded Image"
class="max-w-28 m-2 max-h-28"
/>
{/if}
<img src={msg.imgSrc} alt="Uploaded Image" class="m-2 max-w-28 max-h-28" />
<p
class="max-w-[60vw] items-start whitespace-pre-line break-keep text-[0.8rem] leading-5 sm:max-w-[50rem] xl:max-w-[65vw]"
class="xl:max-w-[65vw] max-w-[60vw] items-start whitespace-pre-line break-keep text-[0.8rem] leading-5 sm:max-w-[50rem]"
>
{@html msg.content}
</p>

View File

@@ -5,98 +5,93 @@
<script>
import { createEventDispatcher } from "svelte";
import extreme_ironing from "$lib/assets/imageData/extreme_ironing.png";
import waterview from "$lib/assets/imageData/waterview.png";
import extreme_ironing from '$lib/assets/imageData/extreme_ironing.jpg';
import waterview from '$lib/assets/imageData/waterview.jpg';
import { base64ImageStore } from "$lib/shared/stores/common/Store";
let dispatch = createEventDispatcher();
let dispatch = createEventDispatcher();
let images = [
{
id: 1,
alt: "Waterview",
imgurl: waterview,
prompt:
"What are the things I should be cautious about when I visit here?",
},
{
id: 0,
alt: "Extreme Ironing",
imgurl: extreme_ironing,
prompt: "What is unusual about this image?",
},
];
let images = [
{
id: 1,
alt: 'Waterview',
imgurl: waterview,
prompt: 'What are the things I should be cautious about when I visit here?'
},
{
id: 0,
alt: 'Extreme Ironing',
imgurl: extreme_ironing,
prompt: 'What is unusual about this image?'
}
];
let currentIndex = 0;
let currentIndex = 0;
function nextImage() {
currentIndex = (currentIndex + 1) % images.length;
}
function nextImage() {
currentIndex = (currentIndex + 1) % images.length;
}
function prevImage() {
currentIndex = (currentIndex - 1 + images.length) % images.length;
}
function prevImage() {
currentIndex = (currentIndex - 1 + images.length) % images.length;
}
async function handleImageClick() {
const imgUrl = images[currentIndex].imgurl;
const base64Data = await convertImageToBase64(imgUrl);
async function handleImageClick() {
const imgUrl = images[currentIndex].imgurl;
const base64Data = await convertImageToBase64(imgUrl);
const currentPrompt = images[currentIndex].prompt;
dispatch("imagePrompt", { content: currentPrompt });
base64ImageStore.set(base64Data);
}
base64ImageStore.set(base64Data);
const currentPrompt = images[currentIndex].prompt;
dispatch("imagePrompt", { content: currentPrompt });
}
async function convertImageToBase64(url) {
const response = await fetch(url);
const blob = await response.blob();
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onloadend = () => resolve(reader.result);
reader.onerror = reject;
reader.readAsDataURL(blob);
});
}
async function convertImageToBase64(url) {
const response = await fetch(url);
const blob = await response.blob();
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onloadend = () => resolve(reader.result);
reader.onerror = reject;
reader.readAsDataURL(blob);
});
}
</script>
<div class="my-2 flex w-full flex-col gap-3 rounded-xl bg-white p-5">
<p>Example</p>
<div class="relative mx-auto w-full max-w-4xl">
<button
class="absolute left-0 top-1/2 z-10 h-8 w-8 -translate-y-1/2 transform rounded-full bg-white/30 group-hover:bg-white/50 group-focus:outline-none group-focus:ring-4 group-focus:ring-white dark:bg-gray-800/30 dark:group-hover:bg-gray-800/60 dark:group-focus:ring-gray-800/70 sm:h-10 sm:w-10"
on:click={prevImage}
aria-label="Previous image"
>
&#10094;
</button>
<div class="flex w-full flex-col gap-3 rounded-xl bg-white p-5 my-2">
<p>Example</p>
<div class="relative w-full max-w-4xl mx-auto">
<button
class="absolute left-0 top-1/2 transform -translate-y-1/2 z-10 w-8 h-8 rounded-full sm:w-10 sm:h-10 bg-white/30 dark:bg-gray-800/30 group-hover:bg-white/50 dark:group-hover:bg-gray-800/60 group-focus:ring-4 group-focus:ring-white dark:group-focus:ring-gray-800/70 group-focus:outline-none"
on:click={prevImage}
aria-label="Previous image"
>
&#10094;
</button>
<div class="relative">
<img
src={images[currentIndex].imgurl}
alt={images[currentIndex].alt}
class="carousel-image h-auto w-full cursor-pointer"
on:click={handleImageClick}
/>
<div
class="absolute bottom-0 left-0 w-full bg-black bg-opacity-55 p-3 text-white"
>
<p>{images[currentIndex].prompt}</p>
</div>
</div>
<div class="relative">
<img
src={images[currentIndex].imgurl}
alt={images[currentIndex].alt}
class="carousel-image w-full h-auto cursor-pointer"
on:click={handleImageClick}
/>
<div class="absolute bottom-0 left-0 bg-opacity-55 bg-black text-white p-3 w-full">
<p>{images[currentIndex].prompt}</p>
</div>
</div>
<button
class="absolute right-0 top-1/2 z-10 h-8 w-8 -translate-y-1/2 transform rounded-full bg-white/30 group-hover:bg-white/50 group-focus:outline-none group-focus:ring-4 group-focus:ring-white dark:bg-gray-800/30 dark:group-hover:bg-gray-800/60 dark:group-focus:ring-gray-800/70 sm:h-10 sm:w-10"
on:click={nextImage}
aria-label="Next image"
>
&#10095;
</button>
</div>
<button
class="absolute right-0 top-1/2 transform -translate-y-1/2 z-10 w-8 h-8 rounded-full sm:w-10 sm:h-10 bg-white/30 dark:bg-gray-800/30 group-hover:bg-white/50 dark:group-hover:bg-gray-800/60 group-focus:ring-4 group-focus:ring-white dark:group-focus:ring-gray-800/70 group-focus:outline-none"
on:click={nextImage}
aria-label="Next image"
>
&#10095;
</button>
</div>
</div>
<style>
.relative img {
object-fit: cover;
}
.relative img {
object-fit: cover;
}
</style>

View File

@@ -6,12 +6,11 @@
<script lang="ts">
import { base64ImageStore } from "$lib/shared/stores/common/Store";
import { Dropzone } from "flowbite-svelte";
import Pica from 'pica';
let value = [];
export let imageUrl = "";
$: if (imageUrl !== "") {
$: if (imageUrl) {
uploadImage();
}
@@ -48,7 +47,7 @@
};
const handleChange = (event) => {
const files = event.target.files;
const files = event.target.files;
if (files.length > 0) {
value = [files[0].name]; // Allow only one file selection
readFileAsBase64(files[0]); // Convert to Base64
@@ -56,73 +55,14 @@
};
const readFileAsBase64 = (file) => {
const reader = new FileReader();
reader.onload = () => {
const base64Data = reader.result;
const fileType = file.type;
if (!fileType.includes("png")) {
convertImageToPNG(base64Data); // Convert if not PNG
} else {
base64ImageStore.set(base64Data); // Store Base64
}
imageUrl = URL.createObjectURL(file); // Create URL for preview
};
reader.readAsDataURL(file); // Read file as Data URL
};
const convertImageToPNG = async (base64Data) => {
if (!base64Data || !base64Data.startsWith("data:image/")) {
console.error("Invalid Base64 data");
return;
}
console.log("Starting image conversion...");
const img = new Image();
img.src = base64Data;
img.onload = async () => {
const canvas = document.createElement("canvas");
const ctx = canvas.getContext("2d");
let width = img.width;
let height = img.height;
// Set resize factor to 1 (no scaling) to keep the original size
const scaleFactor = 0.1; // Resize factor (keep original size)
width = Math.floor(width * scaleFactor);
height = Math.floor(height * scaleFactor);
canvas.width = width;
canvas.height = height;
ctx.drawImage(img, 0, 0, width, height); // Draw the original image (no resizing)
const outputCanvas = document.createElement("canvas");
outputCanvas.width = width;
outputCanvas.height = height;
const pica = new Pica();
try {
// Resize and compress the image using Pica
await pica.resize(canvas, outputCanvas);
// Convert canvas to PNG format with data URL
const pngDataUrl = outputCanvas.toDataURL("image/png", 0.8); // Adjust quality (0.9 is high, between 0-1)
// Store the Base64 PNG image
base64ImageStore.set(pngDataUrl);
} catch (err) {
console.error("Error during image processing:", err);
}
};
img.onerror = (err) => {
console.error("Error loading image:", err);
};
};
const reader = new FileReader();
reader.onload = () => {
const base64Data = reader.result; // Get Base64 data
base64ImageStore.set(base64Data); // Store the Base64 string in the store
imageUrl = URL.createObjectURL(file); // Keep the object URL for preview
};
reader.readAsDataURL(file); // Read the file as a Data URL
};
const showFiles = (files) => {
if (files.length === 1) return files[0];

View File

@@ -2,44 +2,34 @@
A list of released OPEA docker images in https://hub.docker.com/, contains all relevant images from the GenAIExamples, GenAIComps and GenAIInfra projects. Please expect more public available images in the future release.
Take ChatQnA for example. ChatQnA is a chatbot application service based on the Retrieval Augmented Generation (RAG) architecture. It consists of [opea/embedding](https://hub.docker.com/r/opea/embedding), [opea/retriever](https://hub.docker.com/r/opea/retriever-redis), [opea/reranking-tei](https://hub.docker.com/r/opea/reranking-tei), [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen), [opea/dataprep](https://hub.docker.com/r/opea/dataprep), [opea/chatqna](https://hub.docker.com/r/opea/chatqna), [opea/chatqna-ui](https://hub.docker.com/r/opea/chatqna-ui) and [opea/chatqna-conversation-ui](https://hub.docker.com/r/opea/chatqna-conversation-ui) (Optional) multiple microservices. Other services are similar, see the corresponding README for details.
Take ChatQnA for example. ChatQnA is a chatbot application service based on the Retrieval Augmented Generation (RAG) architecture. It consists of [opea/embedding](https://hub.docker.com/r/opea/embedding), [opea/retriever](https://hub.docker.com/r/opea/retriever-redis), [opea/reranking-tei](https://hub.docker.com/r/opea/reranking-tei), [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen), [opea/dataprep](), [opea/chatqna](https://hub.docker.com/r/opea/chatqna), [opea/chatqna-ui](https://hub.docker.com/r/opea/chatqna-ui) and [opea/chatqna-conversation-ui](https://hub.docker.com/r/opea/chatqna-conversation-ui) (Optional) multiple microservices. Other services are similar, see the corresponding README for details.
## Example images
| Example Images | Dockerfile | Description |
| ----------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| [opea/audioqna](https://hub.docker.com/r/opea/audioqna) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/Dockerfile) | The docker image served as an audioqna gateway and using language modeling to generate answers to user queries by converting audio input to text, and then use text-to-speech (TTS) to convert those answers back to speech for interaction. |
| [opea/audioqna-ui](https://hub.docker.com/r/opea/audioqna-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/ui/docker/Dockerfile) | The docker image acted as the audioqna UI entry for enabling seamless interaction with users |
| [opea/audioqna-multilang](https://hub.docker.com/r/opea/audioqna-multilang) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/Dockerfile.multilang) | The docker image served as an audioqna gateway and using language modeling to generate answers to user queries by converting multilingual audio input to text, and then use multilingual text-to-speech (TTS) to convert those answers back to speech for interaction. |
| [opea/avatarchatbot](https://hub.docker.com/r/opea/avatarchatbot) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/AvatarChatbot/Dockerfile) | The docker image served as a avatarchatbot gateway and interacted with users by  understanding their questions and providing relevant answers. |
| [opea/chatqna](https://hub.docker.com/r/opea/chatqna) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/Dockerfile) | The  docker image served as a chatqna gateway and interacted with users by understanding their questions and providing relevant answers. |
| [opea/chatqna-without-rerank](https://hub.docker.com/r/opea/chatqna-without-rerank) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/Dockerfile.without_rerank) | The purpose of the docker image is to provide a user interface for chat-based Q&A using React. It allows for interaction with users and supports continuing conversations with a history that is stored in the browser's local storage. |
| [opea/audioqna](https://hub.docker.com/r/opea/audioqna) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/Dockerfile) | The docker image served as a chatqna gateway and using language modeling to generate answers to user queries by converting audio input to text, and then using text-to-speech (TTS) to convert those answers back to speech for interaction. |
| [opea/chatqna](https://hub.docker.com/r/opea/chatqna) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/Dockerfile) | The docker image served as a chatqna gateway and interacted with users by understanding their questions and providing relevant answers. |
| [opea/chatqna-ui](https://hub.docker.com/r/opea/chatqna-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/ui/docker/Dockerfile) | The docker image acted as the chatqna UI entry for facilitating interaction with users for question answering |
| [opea/chatqna-conversation-ui](https://hub.docker.com/r/opea/chatqna-conversation-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/ui/docker/Dockerfile.react) | The purpose of the docker image is to provide a user interface for chat-based Q&A using React. It allows for interaction with users and supports continuing conversations with a history that is stored in the browser's local storage. |
| [opea/chatqna-guardrails](https://hub.docker.com/r/opea/chatqna-guardrails) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/Dockerfile.guardrails) | This docker image is used to encapsulate chatqna's LLM service to secure model inputs and outputs. Guardrails proactively prevents models from interacting with insecure content and signals in time to stop insecure behavior. |
| [opea/chatqna-without-rerank](https://hub.docker.com/r/opea/chatqna-without-rerank) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/Dockerfile.without_rerank) | This docker image is used as a chatqna-without-rerank gateway to provide the chatqna service without rerank to Xeon customers who cannot run the Rerank service on HPUs, but need high performance and accuracy. |
| [opea/codegen](https://hub.docker.com/r/opea/codegen) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/CodeGen/Dockerfile) | The docker image served as the codegen gateway to provide service of the automatic creation of source code from a higher-level representation |
| [opea/codegen-ui](https://hub.docker.com/r/opea/codegen-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/CodeGen/ui/docker/Dockerfile) | The docker image acted as the codegen UI entry for facilitating interaction with users for automatically generating code from user's description |
| [opea/codegen-react-ui](https://hub.docker.com/r/opea/codegen-react-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/CodeGen/ui/docker/Dockerfile.react) | The purpose of the docker image is to provide a user interface for Codegen using React. It allows generating the appropriate code based on the current user input. |
| [opea/codetrans](https://hub.docker.com/r/opea/codetrans) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/CodeTrans/Dockerfile) | The docker image served as a codetrans gateway to provide service of converting source code written in one programming language into an equivalent version in another programming language |
| [opea/codetrans-ui](https://hub.docker.com/r/opea/codetrans-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/CodeTrans/ui/docker/Dockerfile) | The docker image acted as the codetrans UI entry for facilitating interaction with users for translating one programming language to another one |
| [opea/doc-index-retriever](https://hub.docker.com/r/opea/doc-index-retriever) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/DocIndexRetriever/Dockerfile) | The docker image acts as a DocRetriever gateway, It uses different methods to match user queries with a set of free text records. |
| [opea/doc-index-retriever](https://hub.docker.com/r/opea/doc-index-retriever) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/DocIndexRetriever/Dockerfile) | The docker image acts as a DocRetriever gateway, It uses different methods to match user queries with a set of free text records. |
| [opea/docsum](https://hub.docker.com/r/opea/docsum) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/DocSum/Dockerfile) | The docker image served as a docsum gateway to provide service of capturing the main points and essential details of the original text |
| [opea/docsum-ui](https://hub.docker.com/r/opea/docsum-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/DocSum/ui/docker/Dockerfile) | The docker image acted as the docsum UI entry for facilitating interaction with users for document summarization |
| [opea/docsum-react-ui](https://hub.docker.com/r/opea/docsum-react-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/DocSum/ui/docker/Dockerfile.react) | The purpose of the docker image is to provide a user interface for document summary using React. It allows upload a file or paste text and then click on “Generate Summary” to get a condensed summary of the generated content and automatically scroll to the bottom of the summary. |
| [opea/docsum-gradio-ui](https://hub.docker.com/r/opea/docsum-gradio-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/DocSum/ui/docker/Dockerfile.gradio) | The purpose of the docker image is to provides a user interface for summarizing documents and text using a Dockerized frontend application. Users can upload files or paste text to generate summaries. |
| [opea/edgecraftrag](https://hub.docker.com/r/opea/edgecraftrag) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/EdgeCraftRAG/Dockerfile) | The docker image served as an Edge Craft RAG (EC-RAG) gateway, delivering a customizable and production-ready Retrieval-Augmented Generation system optimized for edge solutions. |
| [opea/edgecraftrag-ui](https://hub.docker.com/r/opea/edgecraftrag-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/EdgeCraftRAG/ui/docker/Dockerfile.ui) | The docker image acted as the Edge Craft RAG (EC-RAG) UI entry. It ensuring high-quality, performant interactions tailored for edge environments. |
| [opea/edgecraftrag-server](https://hub.docker.com/r/opea/edgecraftrag-server) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/EdgeCraftRAG/Dockerfile.server) | The docker image served as an Edge Craft RAG (EC-RAG) server, delivering a customizable and production-ready Retrieval-Augmented Generation system optimized for edge solutions. |
| [opea/faqgen](https://hub.docker.com/r/opea/faqgen) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/FaqGen/Dockerfile) | The docker image served as a faqgen gateway and automatically generating comprehensive, natural sounding Frequently Asked Questions (FAQs) from documents, legal texts, customer inquiries and other sources. |
| [opea/faqgen-ui](https://hub.docker.com/r/opea/faqgen-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/FaqGen/ui/docker/Dockerfile) | The docker image serves as the faqgen UI entry point for easy interaction with users, generating FAQs by pasting in question text. |
| [opea/faqgen-ui](https://hub.docker.com/r/opea/faqgen-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/FaqGen/ui/docker/Dockerfile) | The docker image serves as the docsum UI entry point for easy interaction with users, generating FAQs by pasting in question text. |
| [opea/faqgen-react-ui](https://hub.docker.com/r/opea/faqgen-react-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/FaqGen/ui/docker/Dockerfile.react) | The purpose of the docker image is to provide a user interface for Generate FAQs using React. It allows generating FAQs by uploading files or pasting text. |
| [opea/graphrag](https://hub.docker.com/r/opea/graphrag) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/GraphRAG/Dockerfile) | The docker image served as a GraphRAG gateway, leveraging a knowledge graph derived from source documents to address both local and global queries. |
| [opea/graphrag-ui](https://hub.docker.com/r/opea/graphrag-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/GraphRAG/ui/docker/Dockerfile) | The docker image acted as the GraphRAG UI entry for facilitating interaction with users |
| [opea/graphrag-react-ui](https://hub.docker.com/r/opea/graphrag-react-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/GraphRAG/ui/docker/Dockerfile.react) | The purpose of the docker image is to provide a user interface for GraphRAG using React. |
| [opea/multimodalqna](https://hub.docker.com/r/opea/multimodalqna) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/MultimodalQnA/Dockerfile) | The docker image served as a multimodalqna gateway and dynamically fetches the most relevant multimodal information (frames, transcripts, and/or subtitles) from the user's video collection to solve the problem. |
| [opea/multimodalqna-ui](https://hub.docker.com/r/opea/multimodalqna-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/MultimodalQnA/ui/docker/Dockerfile) | The docker image serves as the multimodalqna UI entry point for easy interaction with users. Answers to questions are generated from videos uploaded by users.. |
| [opea/multimodalqna](https://hub.docker.com/r/opea/multimodalqna) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/MultimodalQnA/Dockerfile) | The docker image served as a multimodalqna gateway and dynamically fetches the most relevant multimodal information (frames, transcripts, and/or subtitles) from the user's video, image, or audio collection to solve the problem. |
| [opea/multimodalqna-ui](https://hub.docker.com/r/opea/multimodalqna-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/MultimodalQnA/ui/docker/Dockerfile) | The docker image serves as the multimodalqna UI entry point for easy interaction with users. Answers to questions are generated from uploaded by users. |
| [opea/productivity-suite-react-ui-server](https://hub.docker.com/r/opea/productivity-suite-react-ui-server) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ProductivitySuite/ui/docker/Dockerfile.react) | The purpose of the docker image is to provide a user interface for Productivity Suite Application using React. It allows interaction by uploading documents and inputs. |
| [opea/searchqna](https://hub.docker.com/r/opea/searchqna) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/SearchQnA/Dockerfile) | The docker image served as the searchqna gateway to provide service of retrieving accurate and relevant answers to user queries from a knowledge base or dataset |
| [opea/searchqna](https://hub.docker.com/r/opea/searchqna/tags) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/SearchQnA/Dockerfile) | The docker image served as the searchqna gateway to provide service of retrieving accurate and relevant answers to user queries from a knowledge base or dataset |
| [opea/searchqna-ui](https://hub.docker.com/r/opea/searchqna-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/SearchQnA/ui/docker/Dockerfile) | The docker image acted as the searchqna UI entry for facilitating interaction with users for question answering |
| [opea/translation](https://hub.docker.com/r/opea/translation) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/Translation/Dockerfile) | The docker image served as the translation gateway to provide service of language translation |
| [opea/translation-ui](https://hub.docker.com/r/opea/translation-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/Translation/ui/docker/Dockerfile) | The docker image acted as the translation UI entry for facilitating interaction with users for language translation |
@@ -50,65 +40,46 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
## Microservice images
| Microservice Images | Dockerfile | Description |
| ------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| [opea/agent](https://hub.docker.com/r/opea/agent) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/agent/src/Dockerfile) | The docker image exposed the OPEA agent microservice for GenAI application use |
| [opea/agent-ui](https://hub.docker.com/r/opea/agent-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/AgentQnA/ui/docker/Dockerfile) | The docker image exposed the OPEA agent microservice UI entry for GenAI application use |
| [opea/asr](https://hub.docker.com/r/opea/asr) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/Dockerfile) | The docker image exposed the OPEA Audio-Speech-Recognition microservice for GenAI application use |
| [opea/animation](https://hub.docker.com/r/opea/animation) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/animation/src/Dockerfile) | The purpose of the Docker image is to expose the OPEA Avatar Animation microservice for GenAI application use. |
| [opea/chathistory-mongo-server](https://hub.docker.com/r/opea/chathistory-mongo-server) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/chathistory/src/Dockerfile) | The docker image exposes OPEA Chat History microservice which based on MongoDB database, designed to allow user to store, retrieve and manage chat conversations |
| [opea/dataprep](https://hub.docker.com/r/opea/dataprep) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/src/Dockerfile) | The docker image exposed the OPEA dataprep microservice for GenAI application use |
| [opea/embedding](https://hub.docker.com/r/opea/embedding) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/src/Dockerfile) | The docker image exposed the OPEA mosec embedding microservice  for GenAI application use |
| [opea/embedding-multimodal-clip](https://hub.docker.com/r/opea/embedding-multimodal-clip) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/clip/src/Dockerfile) | The docker image exposed the OPEA mosec embedding microservice base on Langchain framework for GenAI application use |
| [opea/embedding-multimodal-bridgetower](https://hub.docker.com/r/opea/embedding-multimodal-bridgetower) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/bridgetower/src/Dockerfile) | The docker image exposes OPEA multimodal embedded microservices based on bridgetower for use by GenAI applications |
| [opea/embedding-multimodal-bridgetower-gaudi](https://hub.docker.com/r/opea/embedding-multimodal-bridgetower-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/bridgetower/src/Dockerfile.intel_hpu) | The docker image exposes OPEA multimodal embedded microservices based on bridgetower for use by GenAI applications on the Gaudi |
| [opea/feedbackmanagement-mongo](https://hub.docker.com/r/opea/feedbackmanagement-mongo) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/feedback_management/src/Dockerfile) | The docker image exposes that the OPEA feedback management microservice uses a MongoDB database for GenAI applications. |
| [opea/finetuning](https://hub.docker.com/r/opea/finetuning) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/finetuning/src/Dockerfile) | The docker image exposed the OPEA Fine-tuning microservice for GenAI application use |
| [opea/finetuning-gaudi](https://hub.docker.com/r/opea/finetuning-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/finetuning/src/Dockerfile.intel_hpu) | The docker image exposed the OPEA Fine-tuning microservice for GenAI application use on the Gaudi |
| [opea/guardrails](https://hub.docker.com/r/opea/guardrails) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/guardrails/Dockerfile) | The docker image exposed the OPEA guardrail microservice for GenAI application use |
| [opea/guardrails-toxicity-predictionguard](https://hub.docker.com/r/opea/guardrails-toxicity-predictionguard) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/toxicity_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide toxicity detection for GenAI application use |
| [opea/guardrails-pii-predictionguard](https://hub.docker.com/r/opea/guardrails-pii-predictionguard) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/pii_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide PII detection for GenAI application use |
| [opea/guardrails-injection-predictionguard](https://hub.docker.com/r/opea/guardrails-injection-predictionguard) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/prompt_injection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide injection predictionguard for GenAI application use |
| [opea/guardrails-hallucination-detection](https://hub.docker.com/r/opea/guardrails-hallucination-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/hallucination_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide hallucination detection for GenAI application use |
| [opea/guardrails-factuality-predictionguard](https://hub.docker.com/r/opea/guardrails-factuality-predictionguard) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/factuality_alignment/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide factuality predictionguard for GenAI application use |
| [opea/guardrails-bias-detection](https://hub.docker.com/r/opea/guardrails-bias-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/bias_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide bias detection for GenAI application use |
| [opea/image2image-gaudi](https://hub.docker.com/r/opea/image2image-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/image2image/src/Dockerfile.intel_hpu) | The purpose of the Docker image is to expose the OPEA Image-to-Image microservice for GenAI application use on the Gaudi. |
| [opea/image2image](https://hub.docker.com/r/opea/image2image) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/image2image/src/Dockerfile) | The purpose of the Docker image is to expose the OPEA Image-to-Image microservice for GenAI application use. |
| [opea/image2video-gaudi](https://hub.docker.com/r/opea/image2video-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/image2video/src/Dockerfile.intel_hpu) | The purpose of the Docker image is to expose the OPEA image-to-video microservice for GenAI application use on the Gaudi. |
| [opea/image2video](https://hub.docker.com/r/opea/image2video) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/image2video/src/Dockerfile) | The purpose of the Docker image is to expose the OPEA image-to-video microservice for GenAI application use. |
| [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/text-generation/Dockerfile) | The docker image exposed the OPEA LLM microservice upon textgen docker image for GenAI application use |
| [opea/llm-textgen-gaudi](https://hub.docker.com/r/opea/llm-textgen-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/text-generation/Dockerfile.intel_hpu) | The docker image exposed the OPEA LLM microservice upon textgen docker image for GenAI application use on the Gaudi2 |
| [opea/llm-eval](https://hub.docker.com/r/opea/llm-eval) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/utils/lm-eval/Dockerfile) | The docker image exposed the OPEA LLM microservice upon eval docker image for GenAI application use |
| [opea/llm-docsum](https://hub.docker.com/r/opea/llm-docsum) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/doc-summarization/Dockerfile) | The docker image exposed the OPEA LLM microservice upon docsum docker image for GenAI application use |
| [opea/llm-faqgen](https://hub.docker.com/r/opea/llm-faqgen) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/faq-generation/Dockerfile) | This docker image is designed to build a frequently asked questions microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a FAQ. |
| [opea/lvm](https://hub.docker.com/r/opea/lvm) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/Dockerfile) | The docker image exposed the OPEA large visual model (LVM) microservice for GenAI application use |
| [opea/lvm-llava](https://hub.docker.com/r/opea/lvm-llava) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llava/Dockerfile) | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) server for GenAI application use |
| [opea/lvm-video-llama](https://hub.docker.com/r/opea/lvm-video-llama) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/video-llama/Dockerfile) | The docker image exposed the OPEA microservice running Video-Llama as a large visual model (LVM) for GenAI application use |
| [opea/lvm-predictionguard](https://hub.docker.com/r/opea/lvm-predictionguard) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/predictionguard/Dockerfile) | The docker image exposed the OPEA microservice running predictionguard as a large visual model (LVM) server for GenAI application use |
| [opea/llava-gaudi](https://hub.docker.com/r/opea/llava-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu) | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi2 |
| [opea/lvm-llama-vision](https://hub.docker.com/r/opea/lvm-llama-vision) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile) | The docker image exposed the OPEA microservice running Llama Vision as the base large visual model service for GenAI application use |
| [opea/lvm-llama-vision-tp](https://hub.docker.com/r/opea/lvm-llama-vision-tp) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.tp) | The docker image exposed the OPEA microservice running Llama Vision with deepspeed as the base large visual model service for GenAI application use |
| [opea/lvm-llama-vision-guard](https://hub.docker.com/r/opea/lvm-llama-vision-guard) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile.guard) | The docker image exposed the OPEA microservice running Llama Vision Guard as the base large visual model service for GenAI application use |
| [opea/promptregistry-mongo](https://hub.docker.com/r/opea/promptregistry-mongo) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/prompt_registry/src/Dockerfile) | The docker image exposes the OPEA Prompt Registry microservices which based on MongoDB database, designed to store and retrieve user's preferred prompts |
| [opea/reranking](https://hub.docker.com/r/opea/reranking) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/rerankings/src/Dockerfile) | The docker image exposed the OPEA reranking microservice for GenAI application use |
| [opea/retriever](https://hub.docker.com/r/opea/retriever) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/src/Dockerfile) | The docker image exposed the OPEA retrieval microservice for GenAI application use |
| [opea/text2image](https://hub.docker.com/r/opea/text2image) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/text2image/src/Dockerfile) | The docker image exposed the OPEA text-to-image microservice for GenAI application use |
| [opea/text2image-gaudi](https://hub.docker.com/r/opea/text2image-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/text2image/src/Dockerfile.intel_hpu) | The docker image exposed the OPEA text-to-image microservice for GenAI application use on the Gaudi |
| [opea/text2image-ui](https://hub.docker.com/r/opea/text2image-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/Text2Image/ui/docker/Dockerfile) | The docker image exposed the OPEA text-to-image microservice UI entry for GenAI application use |
| [opea/text2sql](https://hub.docker.com/r/opea/text2sql) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/text2sql/src/Dockerfile) | The docker image exposed the OPEA text to Structured Query Language microservice for GenAI application use |
| [opea/text2sql-react-ui](https://hub.docker.com/r/opea/text2sql-react-ui) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/DBQnA/ui/docker/Dockerfile.react) | The docker image exposed the OPEA text to Structured Query Language microservice react UI entry for GenAI application use |
| [opea/tts](https://hub.docker.com/r/opea/tts) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/Dockerfile) | The docker image exposed the OPEA Text-To-Speech microservice for GenAI application use |
| [opea/speecht5](https://hub.docker.com/r/opea/speecht5) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/integrations/dependency/speecht5/Dockerfile) | The docker image exposed the OPEA SpeechT5 service for GenAI application use |
| [opea/speecht5-gaudi](https://hub.docker.com/r/opea/speecht5-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu) | The docker image exposed the OPEA SpeechT5 service on Gaudi2 for GenAI application use |
| [opea/gpt-sovits](https://hub.docker.com/r/opea/gpt-sovits) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile) | The docker image exposed the OPEA gpt-sovits service for GenAI application use |
| [opea/nginx](https://hub.docker.com/r/opea/nginx) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/nginx/src/Dockerfile) | The docker image exposed the OPEA nginx microservice for GenAI application use |
| [opea/vectorstore-pathway](https://hub.docker.com/r/opea/vectorstore-pathway) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/pathway/src/Dockerfile) | The docker image exposed the OPEA Vectorstores microservice with Pathway for GenAI application use |
| [opea/wav2lip](https://hub.docker.com/r/opea/wav2lip) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/wav2lip/src/Dockerfile) | The docker image exposed the OPEA Generate lip movements from audio files microservice with Pathway for GenAI application use |
| [opea/wav2lip-gaudi](https://hub.docker.com/r/opea/wav2lip-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/wav2lip/src/Dockerfile.intel_hpu) | The docker image exposed the OPEA Generate lip movements from audio files microservice with Pathway for GenAI application use on the Gaudi2 |
| [opea/vllm-arc](https://hub.docker.com/r/opea/vllm-arc) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/vllm/src/Dockerfile.intel_gpu) | The docker image powered by vllm-project for deploying and serving vllm Models on Arc |
| [opea/vllm-openvino](https://hub.docker.com/r/opea/vllm-openvino) | [Link](https://github.com/vllm-project/vllm/blob/main/Dockerfile.openvino) | The docker image powered by vllm-project for deploying and serving vllm Models of the Openvino Framework |
| [opea/vllm-gaudi](https://hub.docker.com/r/opea/vllm-gaudi) | [Link](https://github.com/HabanaAI/vllm-fork/blob/habana_main/Dockerfile.hpu) | The docker image powered by vllm-project for deploying and serving vllm Models on Gaudi2 |
| [opea/vllm](https://hub.docker.com/r/opea/vllm) | [Link](https://github.com/HabanaAI/vllm-fork/blob/habana_main/Dockerfile.cpu) | The docker image powered by vllm-project for deploying and serving vllm Models |
| [opea/whisper-gaudi](https://hub.docker.com/r/opea/whisper-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu) | The docker image exposed the OPEA Whisper service on Gaudi2 for GenAI application use |
| [opea/whisper](https://hub.docker.com/r/opea/whisper) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/integrations/dependency/whisper/Dockerfile) | The docker image exposed the OPEA Whisper service for GenAI application use |
| [opea/web-retriever](https://hub.docker.com/r/opea/web-retriever) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/web_retrievers/src/Dockerfile) | The docker image exposed the OPEA retrieval microservice based on chroma vectordb for GenAI application use |
| Microservice Images | Dockerfile | Description |
| ------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| [opea/agent]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/agent/src/Dockerfile) | The docker image exposed the OPEA agent microservice for GenAI application use |
| [opea/asr](https://hub.docker.com/r/opea/asr) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/Dockerfile) | The docker image exposed the OPEA Audio-Speech-Recognition microservice for GenAI application use |
| [opea/chathistory-mongo-server](https://hub.docker.com/r/opea/chathistory-mongo-server) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/chathistory/src/Dockerfile) | The docker image exposes OPEA Chat History microservice which based on MongoDB database, designed to allow user to store, retrieve and manage chat conversations |
| [opea/dataprep]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/src/Dockerfile) | The docker image exposed the OPEA dataprep microservice based on many vectordbs for GenAI application use |
| [opea/embedding-multimodal-clip](https://hub.docker.com/r/opea/embedding-multimodal-clip) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/clip/src/Dockerfile) | The docker image exposes OPEA multimodal CLIP-based embedded microservices for use by GenAI applications |
| [opea/embedding](https://hub.docker.com/r/opea/embedding) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/src/Dockerfile) | The docker image exposes OPEA multimodal embedded microservices for use by GenAI applications |
| [opea/embedding-multimodal-bridgetower](https://hub.docker.com/r/opea/embedding-multimodal-bridgetower) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/bridgetower/src/Dockerfile) | The docker image exposes OPEA multimodal embedded microservices based on bridgetower for use by GenAI applications |
| [opea/embedding-multimodal-bridgetower-gaudi](https://hub.docker.com/r/opea/embedding-multimodal-bridgetower-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/bridgetower/src/Dockerfile.intel_hpu) | The docker image exposes OPEA multimodal embedded microservices based on bridgetower for use by GenAI applications on the Gaudi |
| [opea/feedbackmanagement](https://hub.docker.com/r/opea/feedbackmanagement) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/feedback_management/src/Dockerfile) | The docker image exposes that the OPEA feedback management microservice uses a MongoDB database for GenAI applications. |
| [opea/finetuning](https://hub.docker.com/r/opea/finetuning) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/finetuning/src/Dockerfile) | The docker image exposed the OPEA Fine-tuning microservice for GenAI application use |
| [opea/finetuning-gaudi](https://hub.docker.com/r/opea/finetuning-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/finetuning/src/Dockerfile.intel_hpu) | The docker image exposed the OPEA Fine-tuning microservice for GenAI application use on the Gaudi |
| [opea/gmcrouter](https://hub.docker.com/r/opea/gmcrouter) | [Link](https://github.com/opea-project/GenAIInfra/blob/main/microservices-connector/Dockerfile.manager) | The docker image served as one of key parts of the OPEA GenAI Microservice Connector(GMC) to route the traffic among the microservices defined in GMC |
| [opea/gmcmanager](https://hub.docker.com/r/opea/gmcmanager) | [Link](https://github.com/opea-project/GenAIInfra/blob/main/microservices-connector/Dockerfile.router) | The docker image served as one of key parts of the OPEA GenAI Microservice Connector(GMC) to be controller manager to handle GMC CRD |
| [opea/guardrails]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/guardrails/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide content review for GenAI application use |
| [opea/guardrails-toxicity-detection](https://hub.docker.com/r/opea/guardrails-toxicity-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/toxicity_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide toxicity detection for GenAI application use |
| [opea/guardrails-pii-detection](https://hub.docker.com/r/opea/guardrails-pii-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/pii_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide PII detection for GenAI application use |
| [opea/llm-docsum]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/doc-summarization/Dockerfile) | This docker image is designed to build a document summarization microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a document summary. |
| [opea/llm-faqgen]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/faq-generation/Dockerfile) | This docker image is designed to build a frequently asked questions microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a FAQ. |
| [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/text-generation/Dockerfile) | The docker image exposed the OPEA LLM microservice upon TGI docker image for GenAI application use |
| [opea/llava-gaudi](https://hub.docker.com/r/opea/llava-hpu) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu) | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi |
| [opea/lvm]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/Dockerfile) | This docker image is designed to build a large visual model (LVM) unified wrapper service |
| [opea/lvm-llava](https://hub.docker.com/r/opea/lvm-llava) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llava/Dockerfile) | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) server for GenAI application use |
| [opea/lvm-video-llama](https://hub.docker.com/r/opea/lvm-video-llama) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/video-llama/Dockerfile) | The docker image exposed the OPEA microservice running Video LLaMA as a large visual model (LVM) server for GenAI application use |
| [opea/lvm-llama-vision]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llama-vision/Dockerfile) | The docker image exposed the OPEA microservice running LLaMA Vision as a large visual model (LVM) server for GenAI application use |
| [opea/lvm-predictionguard](https://hub.docker.com/r/opea/lvm-predictionguard) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/predictionguard/Dockerfile) | The docker image exposed the OPEA microservice running PredictionGuard as a large visual model (LVM) server for GenAI application use |
| [opea/nginx](https://hub.docker.com/r/opea/nginx) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/nginx/src/Dockerfile) | The docker image exposed the OPEA nginx microservice for GenAI application use |
| [opea/pathway](https://hub.docker.com/r/opea/vectorstore-pathway) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/pathway/src/Dockerfile) | The docker image exposed the OPEA Vectorstores microservice with Pathway for GenAI application use |
| [opea/promptregistry-mongo-server](https://hub.docker.com/r/opea/promptregistry-mongo-server) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/prompt_registry/src/Dockerfile) | The docker image exposes the OPEA Prompt Registry microservices which based on MongoDB database, designed to store and retrieve user's preferred prompts |
| [opea/reranking]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/rerankings/src/Dockerfile) | The docker image exposed the OPEA reranking microservice based on tei docker image for GenAI application use |
| [opea/retriever]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/src/Dockerfile) | The docker image exposed the OPEA retrieval microservice based on milvus vectordb for GenAI application use |
| [opea/speecht5](https://hub.docker.com/r/opea/speecht5) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/integrations/dependency/speecht5/Dockerfile) | The docker image exposed the OPEA SpeechT5 service for GenAI application use |
| [opea/speecht5-gaudi](https://hub.docker.com/r/opea/speecht5-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu) | The docker image exposed the OPEA SpeechT5 service on Gaudi2 for GenAI application use |
| [opea/tei-gaudi](https://hub.docker.com/r/opea/tei-gaudi/tags) | [Link](https://github.com/huggingface/tei-gaudi/blob/habana-main/Dockerfile-hpu) | The docker image powered by HuggingFace Text Embedding Inference (TEI) on Gaudi2 for deploying and serving Embedding Models |
| [opea/lvm-video-llama](https://hub.docker.com/r/opea/lvm-video-llama) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/video-llama/Dockerfile) | The docker image exposed the OPEA microservice running Video-Llama as a large visual model (LVM) server for GenAI application use |
| [opea/tts](https://hub.docker.com/r/opea/tts) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/Dockerfile) | The docker image exposed the OPEA Text-To-Speech microservice for GenAI application use |
| [opea/vllm](https://hub.docker.com/r/opea/vllm) | [Link](https://github.com/vllm-project/vllm/blob/main/Dockerfile.cpu) | The docker image powered by vllm-project for deploying and serving vllm Models |
| [opea/vllm-gaudi]() | [Link](https://github.com/HabanaAI/vllm-fork/blob/habana_main/Dockerfile.hpu) | The docker image powered by vllm-fork for deploying and serving vllm-gaudi Models |
| [opea/vllm-openvino](https://hub.docker.com/r/opea/vllm-openvino) | [Link](https://github.com/vllm-project/vllm/blob/main/Dockerfile.openvino) | The docker image powered by vllm-project for deploying and serving vllm Models of the Openvino Framework |
| [opea/web-retriever]() | [Link](https://github.com/opea-project/GenAIComps/tree/main/comps/web_retrievers/src/Dockerfile) | The docker image exposed the OPEA web retrieval microservice based on a search engine and vector DB |
| [opea/whisper](https://hub.docker.com/r/opea/whisper) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/integrations/dependency/whisper/Dockerfile) | The docker image exposed the OPEA Whisper service for GenAI application use |
| [opea/whisper-gaudi](https://hub.docker.com/r/opea/whisper-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu) | The docker image exposed the OPEA Whisper service on Gaudi2 for GenAI application use |

3
version.txt Normal file
View File

@@ -0,0 +1,3 @@
VERSION_MAJOR 1
VERSION_MINOR 2
VERSION_PATCH 0