Refactor AudioQnA/MultiModalQnA/AvatarChatbot (#1310)
Signed-off-by: chensuyue <suyue.chen@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: chensuyue <suyue.chen@intel.com>
This commit is contained in:
@@ -14,27 +14,20 @@ cd GenAIComps
|
||||
### 2. Build ASR Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile.intel_hpu .
|
||||
|
||||
|
||||
docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
|
||||
docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu .
|
||||
```
|
||||
|
||||
### 3. Build LLM Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
```
|
||||
Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/tgi-gaudi:2.0.6 (https://github.com/huggingface/tgi-gaudi)
|
||||
|
||||
### 4. Build TTS Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile.intel_hpu .
|
||||
|
||||
docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
|
||||
docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu .
|
||||
```
|
||||
|
||||
### 6. Build MegaService Docker Image
|
||||
### 5. Build MegaService Docker Image
|
||||
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
|
||||
|
||||
@@ -47,11 +40,8 @@ docker build --no-cache -t opea/audioqna:latest --build-arg https_proxy=$https_p
|
||||
Then run the command `docker images`, you will have following images ready:
|
||||
|
||||
1. `opea/whisper-gaudi:latest`
|
||||
2. `opea/asr:latest`
|
||||
3. `opea/llm-tgi:latest`
|
||||
4. `opea/speecht5-gaudi:latest`
|
||||
5. `opea/tts:latest`
|
||||
6. `opea/audioqna:latest`
|
||||
2. `opea/speecht5-gaudi:latest`
|
||||
3. `opea/audioqna:latest`
|
||||
|
||||
## 🚀 Set the environment variables
|
||||
|
||||
@@ -61,20 +51,18 @@ Before starting the services with `docker compose`, you have to recheck the foll
|
||||
export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your HF token>
|
||||
|
||||
export TGI_LLM_ENDPOINT=http://$host_ip:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export TTS_ENDPOINT=http://$host_ip:7055
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||
export LLM_SERVER_HOST_IP=${host_ip}
|
||||
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_PORT=3006
|
||||
|
||||
export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna
|
||||
```
|
||||
|
||||
## 🚀 Start the MegaService
|
||||
@@ -95,36 +83,18 @@ curl http://${host_ip}:7066/v1/asr \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# asr microservice
|
||||
curl http://${host_ip}:3001/v1/audio/transcriptions \
|
||||
-X POST \
|
||||
-d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tgi service
|
||||
curl http://${host_ip}:3006/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# llm microservice
|
||||
curl http://${host_ip}:3007/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# speecht5 service
|
||||
curl http://${host_ip}:7055/v1/tts \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tts microservice
|
||||
curl http://${host_ip}:3002/v1/audio/speech \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
```
|
||||
|
||||
## 🚀 Test MegaService
|
||||
@@ -134,8 +104,9 @@ base64 string to the megaservice endpoint. The megaservice will return a spoken
|
||||
to the response, decode the base64 string and save it as a .wav file.
|
||||
|
||||
```bash
|
||||
# voice can be "default" or "male"
|
||||
curl http://${host_ip}:3008/v1/audioqna \
|
||||
-X POST \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64, "voice":"default"}' \
|
||||
-H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
|
||||
```
|
||||
|
||||
@@ -18,14 +18,6 @@ services:
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
restart: unless-stopped
|
||||
asr:
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
container_name: asr-service
|
||||
ports:
|
||||
- "3001:9099"
|
||||
ipc: host
|
||||
environment:
|
||||
ASR_ENDPOINT: ${ASR_ENDPOINT}
|
||||
speecht5-service:
|
||||
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
|
||||
container_name: speecht5-service
|
||||
@@ -42,14 +34,6 @@ services:
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
restart: unless-stopped
|
||||
tts:
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
container_name: tts-service
|
||||
ports:
|
||||
- "3002:9088"
|
||||
ipc: host
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
@@ -75,28 +59,13 @@ services:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-gaudi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "3007:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
audioqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||
container_name: audioqna-gaudi-backend-server
|
||||
depends_on:
|
||||
- asr
|
||||
- llm
|
||||
- tts
|
||||
- whisper-service
|
||||
- tgi-service
|
||||
- speecht5-service
|
||||
ports:
|
||||
- "3008:8888"
|
||||
environment:
|
||||
@@ -104,12 +73,12 @@ services:
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
|
||||
- ASR_SERVICE_PORT=${ASR_SERVICE_PORT}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
|
||||
- TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP}
|
||||
- TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
|
||||
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
|
||||
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
|
||||
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
|
||||
- SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
|
||||
- SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
audioqna-gaudi-ui-server:
|
||||
|
||||
@@ -2,6 +2,21 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
|
||||
# export host_ip=<your External Public IP>
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
# <token>
|
||||
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||
export LLM_SERVER_HOST_IP=${host_ip}
|
||||
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_PORT=3006
|
||||
|
||||
export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna
|
||||
|
||||
Reference in New Issue
Block a user