[AudioQnA] Enable vLLM and set it as default LLM serving (#1657)
Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
35c5cf5de8
commit
8fe19291c8
@@ -16,7 +16,7 @@ SPEECHT5_SERVER_HOST_IP = os.getenv("SPEECHT5_SERVER_HOST_IP", "0.0.0.0")
|
||||
SPEECHT5_SERVER_PORT = int(os.getenv("SPEECHT5_SERVER_PORT", 7055))
|
||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 3006))
|
||||
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "Intel/neural-chat-7b-v3-3")
|
||||
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
|
||||
@@ -17,7 +17,7 @@ GPT_SOVITS_SERVER_HOST_IP = os.getenv("GPT_SOVITS_SERVER_HOST_IP", "0.0.0.0")
|
||||
GPT_SOVITS_SERVER_PORT = int(os.getenv("GPT_SOVITS_SERVER_PORT", 9088))
|
||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888))
|
||||
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "Intel/neural-chat-7b-v3-3")
|
||||
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
|
||||
This document outlines the deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server.
|
||||
|
||||
The default pipeline deploys with vLLM as the LLM serving component. It also provides options of using TGI backend for LLM microservice, please refer to [Start the MegaService](#-start-the-megaservice) section in this page.
|
||||
|
||||
Note: The default LLM is `meta-llama/Meta-Llama-3-8B-Instruct`. Before deploying the application, please make sure either you've requested and been granted the access to it on [Huggingface](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) or you've downloaded the model locally from [ModelScope](https://www.modelscope.cn/models).
|
||||
|
||||
## 🚀 Build Docker images
|
||||
|
||||
### 1. Source Code install GenAIComps
|
||||
@@ -17,9 +21,15 @@ cd GenAIComps
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build LLM Image
|
||||
### 3. Build vLLM Image
|
||||
|
||||
Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu (https://github.com/huggingface/text-generation-inference)
|
||||
```bash
|
||||
git clone https://github.com/vllm-project/vllm.git
|
||||
cd ./vllm/
|
||||
VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )"
|
||||
git checkout ${VLLM_VER}
|
||||
docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.cpu -t opea/vllm:latest --shm-size=128g .
|
||||
```
|
||||
|
||||
### 4. Build TTS Image
|
||||
|
||||
@@ -43,9 +53,10 @@ docker build --no-cache -t opea/audioqna:latest --build-arg https_proxy=$https_p
|
||||
Then run the command `docker images`, you will have following images ready:
|
||||
|
||||
1. `opea/whisper:latest`
|
||||
2. `opea/speecht5:latest`
|
||||
3. `opea/audioqna:latest`
|
||||
4. `opea/gpt-sovits:latest` (optional)
|
||||
2. `opea/vllm:latest`
|
||||
3. `opea/speecht5:latest`
|
||||
4. `opea/audioqna:latest`
|
||||
5. `opea/gpt-sovits:latest` (optional)
|
||||
|
||||
## 🚀 Set the environment variables
|
||||
|
||||
@@ -55,7 +66,7 @@ Before starting the services with `docker compose`, you have to recheck the foll
|
||||
export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your HF token>
|
||||
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
@@ -73,40 +84,90 @@ export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna
|
||||
|
||||
or use set_env.sh file to setup environment variables.
|
||||
|
||||
Note: Please replace with host_ip with your external IP address, do not use localhost.
|
||||
Note:
|
||||
|
||||
- Please replace with host_ip with your external IP address, do not use localhost.
|
||||
- If you are in a proxy environment, also set the proxy-related environment variables:
|
||||
|
||||
```
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy",${host_ip},whisper-service,speecht5-service,gpt-sovits-service,tgi-service,vllm-service,audioqna-xeon-backend-server,audioqna-xeon-ui-server
|
||||
```
|
||||
|
||||
## 🚀 Start the MegaService
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/AudioQnA/docker_compose/intel/cpu/xeon/
|
||||
```
|
||||
|
||||
If use vLLM as the LLM serving backend:
|
||||
|
||||
```
|
||||
docker compose up -d
|
||||
|
||||
# multilang tts (optional)
|
||||
docker compose -f compose_multilang.yaml up -d
|
||||
```
|
||||
|
||||
If use TGI as the LLM serving backend:
|
||||
|
||||
```
|
||||
docker compose -f compose_tgi.yaml up -d
|
||||
```
|
||||
|
||||
## 🚀 Test MicroServices
|
||||
|
||||
```bash
|
||||
# whisper service
|
||||
wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav
|
||||
curl http://${host_ip}:7066/v1/audio/transcriptions \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F file="@./sample.wav" \
|
||||
-F model="openai/whisper-small"
|
||||
1. Whisper Service
|
||||
|
||||
# tgi service
|
||||
curl http://${host_ip}:3006/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```bash
|
||||
wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav
|
||||
curl http://${host_ip}:${WHISPER_SERVER_PORT}/v1/audio/transcriptions \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F file="@./sample.wav" \
|
||||
-F model="openai/whisper-small"
|
||||
```
|
||||
|
||||
# speecht5 service
|
||||
curl http://${host_ip}:7055/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3
|
||||
2. LLM backend Service
|
||||
|
||||
# gpt-sovits service (optional)
|
||||
curl http://${host_ip}:9880/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3
|
||||
```
|
||||
In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready and the container (`vllm-service` or `tgi-service`) status shown via `docker ps` will be `healthy`. Before that, the status will be `health: starting`.
|
||||
|
||||
Or try the command below to check whether the LLM serving is ready.
|
||||
|
||||
```bash
|
||||
# vLLM service
|
||||
docker logs vllm-service 2>&1 | grep complete
|
||||
# If the service is ready, you will get the response like below.
|
||||
INFO: Application startup complete.
|
||||
```
|
||||
|
||||
```bash
|
||||
# TGI service
|
||||
docker logs tgi-service | grep Connected
|
||||
# If the service is ready, you will get the response like below.
|
||||
2024-09-03T02:47:53.402023Z INFO text_generation_router::server: router/src/server.rs:2311: Connected
|
||||
```
|
||||
|
||||
Then try the `cURL` command below to validate services.
|
||||
|
||||
```bash
|
||||
# either vLLM or TGI service
|
||||
curl http://${host_ip}:${LLM_SERVER_PORT}/v1/chat/completions \
|
||||
-X POST \
|
||||
-d '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. TTS Service
|
||||
|
||||
```
|
||||
# speecht5 service
|
||||
curl http://${host_ip}:${SPEECHT5_SERVER_PORT}/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3
|
||||
|
||||
# gpt-sovits service (optional)
|
||||
curl http://${host_ip}:${GPT_SOVITS_SERVER_PORT}/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3
|
||||
```
|
||||
|
||||
## 🚀 Test MegaService
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ services:
|
||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||
container_name: whisper-service
|
||||
ports:
|
||||
- "7066:7066"
|
||||
- ${WHISPER_SERVER_PORT:-7066}:7066
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
@@ -17,38 +17,41 @@ services:
|
||||
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
|
||||
container_name: speecht5-service
|
||||
ports:
|
||||
- "7055:7055"
|
||||
- ${SPEECHT5_SERVER_PORT:-7055}:7055
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-service
|
||||
vllm-service:
|
||||
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
|
||||
container_name: vllm-service
|
||||
ports:
|
||||
- "3006:80"
|
||||
- ${LLM_SERVER_PORT:-3006}:80
|
||||
volumes:
|
||||
- "${MODEL_CACHE:-./data}:/data"
|
||||
shm_size: 1g
|
||||
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
|
||||
shm_size: 128g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
LLM_SERVER_PORT: ${LLM_SERVER_PORT}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"]
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 100
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80
|
||||
audioqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||
container_name: audioqna-xeon-backend-server
|
||||
depends_on:
|
||||
- whisper-service
|
||||
- tgi-service
|
||||
- vllm-service
|
||||
- speecht5-service
|
||||
ports:
|
||||
- "3008:8888"
|
||||
|
||||
@@ -6,7 +6,7 @@ services:
|
||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||
container_name: whisper-service
|
||||
ports:
|
||||
- "7066:7066"
|
||||
- ${WHISPER_SERVER_PORT:-7066}:7066
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
@@ -18,27 +18,35 @@ services:
|
||||
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
|
||||
container_name: gpt-sovits-service
|
||||
ports:
|
||||
- "9880:9880"
|
||||
- ${GPT_SOVITS_SERVER_PORT:-9880}:9880
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-service
|
||||
vllm-service:
|
||||
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
|
||||
container_name: vllm-service
|
||||
ports:
|
||||
- "3006:80"
|
||||
- ${LLM_SERVER_PORT:-3006}:80
|
||||
volumes:
|
||||
- "${MODEL_CACHE:-./data}:/data"
|
||||
shm_size: 1g
|
||||
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
|
||||
shm_size: 128g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
LLM_SERVER_PORT: ${LLM_SERVER_PORT}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 100
|
||||
command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80
|
||||
audioqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/audioqna-multilang:${TAG:-latest}
|
||||
container_name: audioqna-xeon-backend-server
|
||||
|
||||
87
AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
Normal file
87
AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
Normal file
@@ -0,0 +1,87 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
whisper-service:
|
||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||
container_name: whisper-service
|
||||
ports:
|
||||
- ${WHISPER_SERVER_PORT:-7066}:7066
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
speecht5-service:
|
||||
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
|
||||
container_name: speecht5-service
|
||||
ports:
|
||||
- ${SPEECHT5_SERVER_PORT:-7055}:7055
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- ${LLM_SERVER_PORT:-3006}:80
|
||||
volumes:
|
||||
- "${MODEL_CACHE:-./data}:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
LLM_SERVER_PORT: ${LLM_SERVER_PORT}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 100
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
audioqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||
container_name: audioqna-xeon-backend-server
|
||||
depends_on:
|
||||
- whisper-service
|
||||
- tgi-service
|
||||
- speecht5-service
|
||||
ports:
|
||||
- "3008:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
|
||||
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
|
||||
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
|
||||
- LLM_MODEL_ID=${LLM_MODEL_ID}
|
||||
- SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
|
||||
- SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
audioqna-xeon-ui-server:
|
||||
image: ${REGISTRY:-opea}/audioqna-ui:${TAG:-latest}
|
||||
container_name: audioqna-xeon-ui-server
|
||||
depends_on:
|
||||
- audioqna-xeon-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- CHAT_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
@@ -8,7 +8,7 @@ export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
# <token>
|
||||
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
|
||||
This document outlines the deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server.
|
||||
|
||||
The default pipeline deploys with vLLM as the LLM serving component. It also provides options of using TGI backend for LLM microservice, please refer to [Start the MegaService](#-start-the-megaservice) section in this page.
|
||||
|
||||
Note: The default LLM is `meta-llama/Meta-Llama-3-8B-Instruct`. Before deploying the application, please make sure either you've requested and been granted the access to it on [Huggingface](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) or you've downloaded the model locally from [ModelScope](https://www.modelscope.cn/models).
|
||||
|
||||
## 🚀 Build Docker images
|
||||
|
||||
### 1. Source Code install GenAIComps
|
||||
@@ -17,9 +21,13 @@ cd GenAIComps
|
||||
docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu .
|
||||
```
|
||||
|
||||
### 3. Build LLM Image
|
||||
### 3. Build vLLM Image
|
||||
|
||||
Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/tgi-gaudi:2.0.6 (https://github.com/huggingface/tgi-gaudi)
|
||||
git clone https://github.com/HabanaAI/vllm-fork.git
|
||||
cd vllm-fork/
|
||||
VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
|
||||
git checkout ${VLLM_VER}
|
||||
docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g .
|
||||
|
||||
### 4. Build TTS Image
|
||||
|
||||
@@ -40,8 +48,9 @@ docker build --no-cache -t opea/audioqna:latest --build-arg https_proxy=$https_p
|
||||
Then run the command `docker images`, you will have following images ready:
|
||||
|
||||
1. `opea/whisper-gaudi:latest`
|
||||
2. `opea/speecht5-gaudi:latest`
|
||||
3. `opea/audioqna:latest`
|
||||
2. `opea/vllm-gaudi:latest`
|
||||
3. `opea/speecht5-gaudi:latest`
|
||||
4. `opea/audioqna:latest`
|
||||
|
||||
## 🚀 Set the environment variables
|
||||
|
||||
@@ -51,7 +60,12 @@ Before starting the services with `docker compose`, you have to recheck the foll
|
||||
export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your HF token>
|
||||
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
# set vLLM parameters
|
||||
export NUM_CARDS=1
|
||||
export BLOCK_SIZE=128
|
||||
export MAX_NUM_SEQS=256
|
||||
export MAX_SEQ_LEN_TO_CAPTURE=2048
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
@@ -65,37 +79,90 @@ export LLM_SERVER_PORT=3006
|
||||
export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna
|
||||
```
|
||||
|
||||
or use set_env.sh file to setup environment variables.
|
||||
|
||||
Note:
|
||||
|
||||
- Please replace with host_ip with your external IP address, do not use localhost.
|
||||
- If you are in a proxy environment, also set the proxy-related environment variables:
|
||||
|
||||
```
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy",${host_ip},whisper-service,speecht5-service,tgi-service,vllm-service,audioqna-gaudi-backend-server,audioqna-gaudi-ui-server
|
||||
```
|
||||
|
||||
## 🚀 Start the MegaService
|
||||
|
||||
> **_NOTE:_** Users will need at least three Gaudi cards for AudioQnA.
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/AudioQnA/docker_compose/intel/hpu/gaudi/
|
||||
```
|
||||
|
||||
If use vLLM as the LLM serving backend:
|
||||
|
||||
```
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
If use TGI as the LLM serving backend:
|
||||
|
||||
```
|
||||
docker compose -f compose_tgi.yaml up -d
|
||||
```
|
||||
|
||||
## 🚀 Test MicroServices
|
||||
|
||||
```bash
|
||||
# whisper service
|
||||
curl http://${host_ip}:7066/v1/asr \
|
||||
-X POST \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
1. Whisper Service
|
||||
|
||||
# tgi service
|
||||
curl http://${host_ip}:3006/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```bash
|
||||
curl http://${host_ip}:${WHISPER_SERVER_PORT}/v1/asr \
|
||||
-X POST \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
# speecht5 service
|
||||
curl http://${host_ip}:7055/v1/tts \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
2. LLM backend Service
|
||||
|
||||
```
|
||||
In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready and the container (`vllm-gaudi-service` or `tgi-gaudi-service`) status shown via `docker ps` will be `healthy`. Before that, the status will be `health: starting`.
|
||||
|
||||
Or try the command below to check whether the LLM serving is ready.
|
||||
|
||||
```bash
|
||||
# vLLM service
|
||||
docker logs vllm-gaudi-service 2>&1 | grep complete
|
||||
# If the service is ready, you will get the response like below.
|
||||
INFO: Application startup complete.
|
||||
```
|
||||
|
||||
```bash
|
||||
# TGI service
|
||||
docker logs tgi-gaudi-service | grep Connected
|
||||
# If the service is ready, you will get the response like below.
|
||||
2024-09-03T02:47:53.402023Z INFO text_generation_router::server: router/src/server.rs:2311: Connected
|
||||
```
|
||||
|
||||
Then try the `cURL` command below to validate services.
|
||||
|
||||
```bash
|
||||
# either vLLM or TGI service
|
||||
curl http://${host_ip}:${LLM_SERVER_PORT}/v1/chat/completions \
|
||||
-X POST \
|
||||
-d '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. TTS Service
|
||||
|
||||
```
|
||||
# speecht5 service
|
||||
curl http://${host_ip}:${SPEECHT5_SERVER_PORT}/v1/tts
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
## 🚀 Test MegaService
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ services:
|
||||
image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
|
||||
container_name: whisper-service
|
||||
ports:
|
||||
- "7066:7066"
|
||||
- ${WHISPER_SERVER_PORT:-7066}:7066
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
@@ -22,7 +22,7 @@ services:
|
||||
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
|
||||
container_name: speecht5-service
|
||||
ports:
|
||||
- "7055:7055"
|
||||
- ${SPEECHT5_SERVER_PORT:-7055}:7055
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
@@ -34,28 +34,27 @@ services:
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||
container_name: tgi-gaudi-server
|
||||
vllm-service:
|
||||
image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
|
||||
container_name: vllm-gaudi-service
|
||||
ports:
|
||||
- "3006:80"
|
||||
- ${LLM_SERVER_PORT:-3006}:80
|
||||
volumes:
|
||||
- "${MODEL_CACHE:-./data}:/data"
|
||||
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
LLM_SERVER_PORT: ${LLM_SERVER_PORT}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"]
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 100
|
||||
@@ -63,13 +62,13 @@ services:
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
|
||||
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq_len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE}
|
||||
audioqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||
container_name: audioqna-gaudi-backend-server
|
||||
depends_on:
|
||||
- whisper-service
|
||||
- tgi-service
|
||||
- vllm-service
|
||||
- speecht5-service
|
||||
ports:
|
||||
- "3008:8888"
|
||||
|
||||
108
AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
Normal file
108
AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
Normal file
@@ -0,0 +1,108 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
whisper-service:
|
||||
image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
|
||||
container_name: whisper-service
|
||||
ports:
|
||||
- ${WHISPER_SERVER_PORT:-7066}:7066
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
restart: unless-stopped
|
||||
speecht5-service:
|
||||
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
|
||||
container_name: speecht5-service
|
||||
ports:
|
||||
- ${SPEECHT5_SERVER_PORT:-7055}:7055
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||
container_name: tgi-gaudi-service
|
||||
ports:
|
||||
- ${LLM_SERVER_PORT:-3006}:80
|
||||
volumes:
|
||||
- "${MODEL_CACHE:-./data}:/data"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
LLM_SERVER_PORT: ${LLM_SERVER_PORT}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 100
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
|
||||
audioqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||
container_name: audioqna-gaudi-backend-server
|
||||
depends_on:
|
||||
- whisper-service
|
||||
- tgi-service
|
||||
- speecht5-service
|
||||
ports:
|
||||
- "3008:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
|
||||
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
|
||||
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
|
||||
- LLM_MODEL_ID=${LLM_MODEL_ID}
|
||||
- SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
|
||||
- SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
audioqna-gaudi-ui-server:
|
||||
image: ${REGISTRY:-opea}/audioqna-ui:${TAG:-latest}
|
||||
container_name: audioqna-gaudi-ui-server
|
||||
depends_on:
|
||||
- audioqna-gaudi-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- CHAT_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
@@ -8,7 +8,13 @@ export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
# <token>
|
||||
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
|
||||
# set vLLM parameters
|
||||
export NUM_CARDS=1
|
||||
export BLOCK_SIZE=128
|
||||
export MAX_NUM_SEQS=256
|
||||
export MAX_SEQ_LEN_TO_CAPTURE=2048
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
|
||||
@@ -71,3 +71,15 @@ services:
|
||||
dockerfile: comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
|
||||
vllm:
|
||||
build:
|
||||
context: vllm
|
||||
dockerfile: Dockerfile.cpu
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
|
||||
vllm-gaudi:
|
||||
build:
|
||||
context: vllm-fork
|
||||
dockerfile: Dockerfile.hpu
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
|
||||
|
||||
@@ -31,18 +31,27 @@ function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
git clone https://github.com/HabanaAI/vllm-fork.git
|
||||
cd vllm-fork/
|
||||
VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
|
||||
echo "Check out vLLM tag ${VLLM_VER}"
|
||||
git checkout ${VLLM_VER} &> /dev/null && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi"
|
||||
service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi vllm-gaudi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
|
||||
export NUM_CARDS=1
|
||||
export BLOCK_SIZE=128
|
||||
export MAX_NUM_SEQS=256
|
||||
export MAX_SEQ_LEN_TO_CAPTURE=2048
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export WHISPER_SERVER_HOST_IP=${ip_address}
|
||||
@@ -61,8 +70,8 @@ function start_services() {
|
||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
n=0
|
||||
until [[ "$n" -ge 200 ]]; do
|
||||
docker logs tgi-gaudi-server > $LOG_PATH/tgi_service_start.log
|
||||
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
|
||||
docker logs vllm-gaudi-service > $LOG_PATH/vllm_service_start.log 2>&1
|
||||
if grep -q complete $LOG_PATH/vllm_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
@@ -86,7 +95,7 @@ function validate_megaservice() {
|
||||
# always print the log
|
||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||
docker logs speecht5-service > $LOG_PATH/tts-service.log
|
||||
docker logs tgi-gaudi-server > $LOG_PATH/tgi-gaudi-server.log
|
||||
docker logs vllm-gaudi-service > $LOG_PATH/vllm-gaudi-service.log
|
||||
docker logs audioqna-gaudi-backend-server > $LOG_PATH/audioqna-gaudi-backend-server.log
|
||||
echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3
|
||||
|
||||
@@ -126,7 +135,7 @@ function validate_megaservice() {
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
docker compose stop && docker compose rm -f
|
||||
docker compose -f compose.yaml stop && docker compose rm -f
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
@@ -31,18 +31,23 @@ function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
git clone https://github.com/vllm-project/vllm.git
|
||||
cd ./vllm/
|
||||
VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )"
|
||||
echo "Check out vLLM tag ${VLLM_VER}"
|
||||
git checkout ${VLLM_VER} &> /dev/null && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="audioqna audioqna-ui whisper speecht5"
|
||||
service_list="audioqna audioqna-ui whisper speecht5 vllm"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export WHISPER_SERVER_HOST_IP=${ip_address}
|
||||
@@ -62,8 +67,8 @@ function start_services() {
|
||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
n=0
|
||||
until [[ "$n" -ge 200 ]]; do
|
||||
docker logs tgi-service > $LOG_PATH/tgi_service_start.log
|
||||
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
|
||||
docker logs vllm-service > $LOG_PATH/vllm_service_start.log 2>&1
|
||||
if grep -q complete $LOG_PATH/vllm_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
@@ -77,7 +82,7 @@ function validate_megaservice() {
|
||||
# always print the log
|
||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||
docker logs speecht5-service > $LOG_PATH/tts-service.log
|
||||
docker logs tgi-service > $LOG_PATH/tgi-service.log
|
||||
docker logs vllm-service > $LOG_PATH/vllm-service.log
|
||||
docker logs audioqna-xeon-backend-server > $LOG_PATH/audioqna-xeon-backend-server.log
|
||||
echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3
|
||||
|
||||
@@ -117,7 +122,7 @@ function validate_megaservice() {
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||
docker compose stop && docker compose rm -f
|
||||
docker compose -f compose.yaml stop && docker compose rm -f
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
146
AudioQnA/tests/test_compose_tgi_on_gaudi.sh
Normal file
146
AudioQnA/tests/test_compose_tgi_on_gaudi.sh
Normal file
@@ -0,0 +1,146 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||
export REGISTRY=${IMAGE_REPO}
|
||||
export TAG=${IMAGE_TAG}
|
||||
export MODEL_CACHE=${model_cache:-"./data"}
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
function build_docker_images() {
|
||||
opea_branch=${opea_branch:-"main"}
|
||||
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
|
||||
if [[ "${opea_branch}" != "main" ]]; then
|
||||
cd $WORKPATH
|
||||
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
|
||||
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
|
||||
find . -type f -name "Dockerfile*" | while read -r file; do
|
||||
echo "Processing file: $file"
|
||||
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
|
||||
done
|
||||
fi
|
||||
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export WHISPER_SERVER_HOST_IP=${ip_address}
|
||||
export SPEECHT5_SERVER_HOST_IP=${ip_address}
|
||||
export LLM_SERVER_HOST_IP=${ip_address}
|
||||
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_PORT=3006
|
||||
|
||||
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
||||
export host_ip=${ip_address}
|
||||
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
n=0
|
||||
until [[ "$n" -ge 200 ]]; do
|
||||
docker logs tgi-gaudi-service > $LOG_PATH/tgi_service_start.log
|
||||
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs whisper-service > $LOG_PATH/whisper_service_start.log
|
||||
if grep -q "Uvicorn server setup on port" $LOG_PATH/whisper_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
function validate_megaservice() {
|
||||
response=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json')
|
||||
# always print the log
|
||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||
docker logs speecht5-service > $LOG_PATH/tts-service.log
|
||||
docker logs tgi-gaudi-service > $LOG_PATH/tgi-gaudi-service.log
|
||||
docker logs audioqna-gaudi-backend-server > $LOG_PATH/audioqna-gaudi-backend-server.log
|
||||
echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3
|
||||
|
||||
if [[ $(file speech.mp3) == *"RIFF"* ]]; then
|
||||
echo "Result correct."
|
||||
else
|
||||
echo "Result wrong."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
#function validate_frontend() {
|
||||
# cd $WORKPATH/ui/svelte
|
||||
# local conda_env_name="OPEA_e2e"
|
||||
# export PATH=${HOME}/miniforge3/bin/:$PATH
|
||||
## conda remove -n ${conda_env_name} --all -y
|
||||
## conda create -n ${conda_env_name} python=3.12 -y
|
||||
# source activate ${conda_env_name}
|
||||
#
|
||||
# sed -i "s/localhost/$ip_address/g" playwright.config.ts
|
||||
#
|
||||
## conda install -c conda-forge nodejs=22.6.0 -y
|
||||
# npm install && npm ci && npx playwright install --with-deps
|
||||
# node -v && npm -v && pip list
|
||||
#
|
||||
# exit_status=0
|
||||
# npx playwright test || exit_status=$?
|
||||
#
|
||||
# if [ $exit_status -ne 0 ]; then
|
||||
# echo "[TEST INFO]: ---------frontend test failed---------"
|
||||
# exit $exit_status
|
||||
# else
|
||||
# echo "[TEST INFO]: ---------frontend test passed---------"
|
||||
# fi
|
||||
#}
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
docker compose -f compose_tgi.yaml stop && docker compose rm -f
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
stop_docker
|
||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||
start_services
|
||||
|
||||
validate_megaservice
|
||||
# validate_frontend
|
||||
|
||||
stop_docker
|
||||
echo y | docker system prune
|
||||
|
||||
}
|
||||
|
||||
main
|
||||
137
AudioQnA/tests/test_compose_tgi_on_xeon.sh
Normal file
137
AudioQnA/tests/test_compose_tgi_on_xeon.sh
Normal file
@@ -0,0 +1,137 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||
export REGISTRY=${IMAGE_REPO}
|
||||
export TAG=${IMAGE_TAG}
|
||||
export MODEL_CACHE=${model_cache:-"./data"}
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
function build_docker_images() {
|
||||
opea_branch=${opea_branch:-"main"}
|
||||
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
|
||||
if [[ "${opea_branch}" != "main" ]]; then
|
||||
cd $WORKPATH
|
||||
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
|
||||
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
|
||||
find . -type f -name "Dockerfile*" | while read -r file; do
|
||||
echo "Processing file: $file"
|
||||
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
|
||||
done
|
||||
fi
|
||||
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="audioqna audioqna-ui whisper speecht5"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export WHISPER_SERVER_HOST_IP=${ip_address}
|
||||
export SPEECHT5_SERVER_HOST_IP=${ip_address}
|
||||
export LLM_SERVER_HOST_IP=${ip_address}
|
||||
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_PORT=3006
|
||||
|
||||
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
||||
export host_ip=${ip_address}
|
||||
|
||||
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
n=0
|
||||
until [[ "$n" -ge 200 ]]; do
|
||||
docker logs tgi-service > $LOG_PATH/tgi_service_start.log
|
||||
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
function validate_megaservice() {
|
||||
response=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json')
|
||||
# always print the log
|
||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||
docker logs speecht5-service > $LOG_PATH/tts-service.log
|
||||
docker logs tgi-service > $LOG_PATH/tgi-service.log
|
||||
docker logs audioqna-xeon-backend-server > $LOG_PATH/audioqna-xeon-backend-server.log
|
||||
echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3
|
||||
|
||||
if [[ $(file speech.mp3) == *"RIFF"* ]]; then
|
||||
echo "Result correct."
|
||||
else
|
||||
echo "Result wrong."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
#function validate_frontend() {
|
||||
# cd $WORKPATH/ui/svelte
|
||||
# local conda_env_name="OPEA_e2e"
|
||||
# export PATH=${HOME}/miniforge3/bin/:$PATH
|
||||
## conda remove -n ${conda_env_name} --all -y
|
||||
## conda create -n ${conda_env_name} python=3.12 -y
|
||||
# source activate ${conda_env_name}
|
||||
#
|
||||
# sed -i "s/localhost/$ip_address/g" playwright.config.ts
|
||||
#
|
||||
## conda install -c conda-forge nodejs=22.6.0 -y
|
||||
# npm install && npm ci && npx playwright install --with-deps
|
||||
# node -v && npm -v && pip list
|
||||
#
|
||||
# exit_status=0
|
||||
# npx playwright test || exit_status=$?
|
||||
#
|
||||
# if [ $exit_status -ne 0 ]; then
|
||||
# echo "[TEST INFO]: ---------frontend test failed---------"
|
||||
# exit $exit_status
|
||||
# else
|
||||
# echo "[TEST INFO]: ---------frontend test passed---------"
|
||||
# fi
|
||||
#}
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||
docker compose -f compose_tgi.yaml stop && docker compose rm -f
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
stop_docker
|
||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||
start_services
|
||||
|
||||
validate_megaservice
|
||||
# validate_frontend
|
||||
|
||||
stop_docker
|
||||
echo y | docker system prune
|
||||
|
||||
}
|
||||
|
||||
main
|
||||
Reference in New Issue
Block a user