[AudioQnA] Enable vLLM and set it as default LLM serving (#1657)
Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
35c5cf5de8
commit
8fe19291c8
@@ -16,7 +16,7 @@ SPEECHT5_SERVER_HOST_IP = os.getenv("SPEECHT5_SERVER_HOST_IP", "0.0.0.0")
|
|||||||
SPEECHT5_SERVER_PORT = int(os.getenv("SPEECHT5_SERVER_PORT", 7055))
|
SPEECHT5_SERVER_PORT = int(os.getenv("SPEECHT5_SERVER_PORT", 7055))
|
||||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 3006))
|
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 3006))
|
||||||
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "Intel/neural-chat-7b-v3-3")
|
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
|
||||||
|
|
||||||
|
|
||||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ GPT_SOVITS_SERVER_HOST_IP = os.getenv("GPT_SOVITS_SERVER_HOST_IP", "0.0.0.0")
|
|||||||
GPT_SOVITS_SERVER_PORT = int(os.getenv("GPT_SOVITS_SERVER_PORT", 9088))
|
GPT_SOVITS_SERVER_PORT = int(os.getenv("GPT_SOVITS_SERVER_PORT", 9088))
|
||||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888))
|
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888))
|
||||||
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "Intel/neural-chat-7b-v3-3")
|
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
|
||||||
|
|
||||||
|
|
||||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||||
|
|||||||
@@ -2,6 +2,10 @@
|
|||||||
|
|
||||||
This document outlines the deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server.
|
This document outlines the deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server.
|
||||||
|
|
||||||
|
The default pipeline deploys with vLLM as the LLM serving component. It also provides options of using TGI backend for LLM microservice, please refer to [Start the MegaService](#-start-the-megaservice) section in this page.
|
||||||
|
|
||||||
|
Note: The default LLM is `meta-llama/Meta-Llama-3-8B-Instruct`. Before deploying the application, please make sure either you've requested and been granted the access to it on [Huggingface](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) or you've downloaded the model locally from [ModelScope](https://www.modelscope.cn/models).
|
||||||
|
|
||||||
## 🚀 Build Docker images
|
## 🚀 Build Docker images
|
||||||
|
|
||||||
### 1. Source Code install GenAIComps
|
### 1. Source Code install GenAIComps
|
||||||
@@ -17,9 +21,15 @@ cd GenAIComps
|
|||||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
|
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
|
||||||
```
|
```
|
||||||
|
|
||||||
### 3. Build LLM Image
|
### 3. Build vLLM Image
|
||||||
|
|
||||||
Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu (https://github.com/huggingface/text-generation-inference)
|
```bash
|
||||||
|
git clone https://github.com/vllm-project/vllm.git
|
||||||
|
cd ./vllm/
|
||||||
|
VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )"
|
||||||
|
git checkout ${VLLM_VER}
|
||||||
|
docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.cpu -t opea/vllm:latest --shm-size=128g .
|
||||||
|
```
|
||||||
|
|
||||||
### 4. Build TTS Image
|
### 4. Build TTS Image
|
||||||
|
|
||||||
@@ -43,9 +53,10 @@ docker build --no-cache -t opea/audioqna:latest --build-arg https_proxy=$https_p
|
|||||||
Then run the command `docker images`, you will have following images ready:
|
Then run the command `docker images`, you will have following images ready:
|
||||||
|
|
||||||
1. `opea/whisper:latest`
|
1. `opea/whisper:latest`
|
||||||
2. `opea/speecht5:latest`
|
2. `opea/vllm:latest`
|
||||||
3. `opea/audioqna:latest`
|
3. `opea/speecht5:latest`
|
||||||
4. `opea/gpt-sovits:latest` (optional)
|
4. `opea/audioqna:latest`
|
||||||
|
5. `opea/gpt-sovits:latest` (optional)
|
||||||
|
|
||||||
## 🚀 Set the environment variables
|
## 🚀 Set the environment variables
|
||||||
|
|
||||||
@@ -55,7 +66,7 @@ Before starting the services with `docker compose`, you have to recheck the foll
|
|||||||
export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
|
export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
|
||||||
export HUGGINGFACEHUB_API_TOKEN=<your HF token>
|
export HUGGINGFACEHUB_API_TOKEN=<your HF token>
|
||||||
|
|
||||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||||
|
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||||
@@ -73,40 +84,90 @@ export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna
|
|||||||
|
|
||||||
or use set_env.sh file to setup environment variables.
|
or use set_env.sh file to setup environment variables.
|
||||||
|
|
||||||
Note: Please replace with host_ip with your external IP address, do not use localhost.
|
Note:
|
||||||
|
|
||||||
|
- Please replace with host_ip with your external IP address, do not use localhost.
|
||||||
|
- If you are in a proxy environment, also set the proxy-related environment variables:
|
||||||
|
|
||||||
|
```
|
||||||
|
export http_proxy="Your_HTTP_Proxy"
|
||||||
|
export https_proxy="Your_HTTPs_Proxy"
|
||||||
|
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||||
|
export no_proxy="Your_No_Proxy",${host_ip},whisper-service,speecht5-service,gpt-sovits-service,tgi-service,vllm-service,audioqna-xeon-backend-server,audioqna-xeon-ui-server
|
||||||
|
```
|
||||||
|
|
||||||
## 🚀 Start the MegaService
|
## 🚀 Start the MegaService
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd GenAIExamples/AudioQnA/docker_compose/intel/cpu/xeon/
|
cd GenAIExamples/AudioQnA/docker_compose/intel/cpu/xeon/
|
||||||
|
```
|
||||||
|
|
||||||
|
If use vLLM as the LLM serving backend:
|
||||||
|
|
||||||
|
```
|
||||||
docker compose up -d
|
docker compose up -d
|
||||||
|
|
||||||
# multilang tts (optional)
|
# multilang tts (optional)
|
||||||
docker compose -f compose_multilang.yaml up -d
|
docker compose -f compose_multilang.yaml up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If use TGI as the LLM serving backend:
|
||||||
|
|
||||||
|
```
|
||||||
|
docker compose -f compose_tgi.yaml up -d
|
||||||
|
```
|
||||||
|
|
||||||
## 🚀 Test MicroServices
|
## 🚀 Test MicroServices
|
||||||
|
|
||||||
```bash
|
1. Whisper Service
|
||||||
# whisper service
|
|
||||||
wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav
|
|
||||||
curl http://${host_ip}:7066/v1/audio/transcriptions \
|
|
||||||
-H "Content-Type: multipart/form-data" \
|
|
||||||
-F file="@./sample.wav" \
|
|
||||||
-F model="openai/whisper-small"
|
|
||||||
|
|
||||||
# tgi service
|
```bash
|
||||||
curl http://${host_ip}:3006/generate \
|
wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav
|
||||||
-X POST \
|
curl http://${host_ip}:${WHISPER_SERVER_PORT}/v1/audio/transcriptions \
|
||||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
-H "Content-Type: multipart/form-data" \
|
||||||
-H 'Content-Type: application/json'
|
-F file="@./sample.wav" \
|
||||||
|
-F model="openai/whisper-small"
|
||||||
|
```
|
||||||
|
|
||||||
# speecht5 service
|
2. LLM backend Service
|
||||||
curl http://${host_ip}:7055/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3
|
|
||||||
|
|
||||||
# gpt-sovits service (optional)
|
In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready and the container (`vllm-service` or `tgi-service`) status shown via `docker ps` will be `healthy`. Before that, the status will be `health: starting`.
|
||||||
curl http://${host_ip}:9880/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3
|
|
||||||
```
|
Or try the command below to check whether the LLM serving is ready.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# vLLM service
|
||||||
|
docker logs vllm-service 2>&1 | grep complete
|
||||||
|
# If the service is ready, you will get the response like below.
|
||||||
|
INFO: Application startup complete.
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# TGI service
|
||||||
|
docker logs tgi-service | grep Connected
|
||||||
|
# If the service is ready, you will get the response like below.
|
||||||
|
2024-09-03T02:47:53.402023Z INFO text_generation_router::server: router/src/server.rs:2311: Connected
|
||||||
|
```
|
||||||
|
|
||||||
|
Then try the `cURL` command below to validate services.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# either vLLM or TGI service
|
||||||
|
curl http://${host_ip}:${LLM_SERVER_PORT}/v1/chat/completions \
|
||||||
|
-X POST \
|
||||||
|
-d '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
|
||||||
|
-H 'Content-Type: application/json'
|
||||||
|
```
|
||||||
|
|
||||||
|
3. TTS Service
|
||||||
|
|
||||||
|
```
|
||||||
|
# speecht5 service
|
||||||
|
curl http://${host_ip}:${SPEECHT5_SERVER_PORT}/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3
|
||||||
|
|
||||||
|
# gpt-sovits service (optional)
|
||||||
|
curl http://${host_ip}:${GPT_SOVITS_SERVER_PORT}/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3
|
||||||
|
```
|
||||||
|
|
||||||
## 🚀 Test MegaService
|
## 🚀 Test MegaService
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ services:
|
|||||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||||
container_name: whisper-service
|
container_name: whisper-service
|
||||||
ports:
|
ports:
|
||||||
- "7066:7066"
|
- ${WHISPER_SERVER_PORT:-7066}:7066
|
||||||
ipc: host
|
ipc: host
|
||||||
environment:
|
environment:
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
@@ -17,38 +17,41 @@ services:
|
|||||||
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
|
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
|
||||||
container_name: speecht5-service
|
container_name: speecht5-service
|
||||||
ports:
|
ports:
|
||||||
- "7055:7055"
|
- ${SPEECHT5_SERVER_PORT:-7055}:7055
|
||||||
ipc: host
|
ipc: host
|
||||||
environment:
|
environment:
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
tgi-service:
|
vllm-service:
|
||||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
|
||||||
container_name: tgi-service
|
container_name: vllm-service
|
||||||
ports:
|
ports:
|
||||||
- "3006:80"
|
- ${LLM_SERVER_PORT:-3006}:80
|
||||||
volumes:
|
volumes:
|
||||||
- "${MODEL_CACHE:-./data}:/data"
|
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
|
||||||
shm_size: 1g
|
shm_size: 128g
|
||||||
environment:
|
environment:
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||||
|
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||||
|
LLM_SERVER_PORT: ${LLM_SERVER_PORT}
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"]
|
test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"]
|
||||||
interval: 10s
|
interval: 10s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 100
|
retries: 100
|
||||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80
|
||||||
audioqna-xeon-backend-server:
|
audioqna-xeon-backend-server:
|
||||||
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||||
container_name: audioqna-xeon-backend-server
|
container_name: audioqna-xeon-backend-server
|
||||||
depends_on:
|
depends_on:
|
||||||
- whisper-service
|
- whisper-service
|
||||||
- tgi-service
|
- vllm-service
|
||||||
- speecht5-service
|
- speecht5-service
|
||||||
ports:
|
ports:
|
||||||
- "3008:8888"
|
- "3008:8888"
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ services:
|
|||||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||||
container_name: whisper-service
|
container_name: whisper-service
|
||||||
ports:
|
ports:
|
||||||
- "7066:7066"
|
- ${WHISPER_SERVER_PORT:-7066}:7066
|
||||||
ipc: host
|
ipc: host
|
||||||
environment:
|
environment:
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
@@ -18,27 +18,35 @@ services:
|
|||||||
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
|
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
|
||||||
container_name: gpt-sovits-service
|
container_name: gpt-sovits-service
|
||||||
ports:
|
ports:
|
||||||
- "9880:9880"
|
- ${GPT_SOVITS_SERVER_PORT:-9880}:9880
|
||||||
ipc: host
|
ipc: host
|
||||||
environment:
|
environment:
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
tgi-service:
|
vllm-service:
|
||||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
|
||||||
container_name: tgi-service
|
container_name: vllm-service
|
||||||
ports:
|
ports:
|
||||||
- "3006:80"
|
- ${LLM_SERVER_PORT:-3006}:80
|
||||||
volumes:
|
volumes:
|
||||||
- "${MODEL_CACHE:-./data}:/data"
|
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
|
||||||
shm_size: 1g
|
shm_size: 128g
|
||||||
environment:
|
environment:
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||||
|
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||||
|
LLM_SERVER_PORT: ${LLM_SERVER_PORT}
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 100
|
||||||
|
command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80
|
||||||
audioqna-xeon-backend-server:
|
audioqna-xeon-backend-server:
|
||||||
image: ${REGISTRY:-opea}/audioqna-multilang:${TAG:-latest}
|
image: ${REGISTRY:-opea}/audioqna-multilang:${TAG:-latest}
|
||||||
container_name: audioqna-xeon-backend-server
|
container_name: audioqna-xeon-backend-server
|
||||||
|
|||||||
87
AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
Normal file
87
AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
services:
|
||||||
|
whisper-service:
|
||||||
|
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||||
|
container_name: whisper-service
|
||||||
|
ports:
|
||||||
|
- ${WHISPER_SERVER_PORT:-7066}:7066
|
||||||
|
ipc: host
|
||||||
|
environment:
|
||||||
|
no_proxy: ${no_proxy}
|
||||||
|
http_proxy: ${http_proxy}
|
||||||
|
https_proxy: ${https_proxy}
|
||||||
|
restart: unless-stopped
|
||||||
|
speecht5-service:
|
||||||
|
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
|
||||||
|
container_name: speecht5-service
|
||||||
|
ports:
|
||||||
|
- ${SPEECHT5_SERVER_PORT:-7055}:7055
|
||||||
|
ipc: host
|
||||||
|
environment:
|
||||||
|
no_proxy: ${no_proxy}
|
||||||
|
http_proxy: ${http_proxy}
|
||||||
|
https_proxy: ${https_proxy}
|
||||||
|
restart: unless-stopped
|
||||||
|
tgi-service:
|
||||||
|
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||||
|
container_name: tgi-service
|
||||||
|
ports:
|
||||||
|
- ${LLM_SERVER_PORT:-3006}:80
|
||||||
|
volumes:
|
||||||
|
- "${MODEL_CACHE:-./data}:/data"
|
||||||
|
shm_size: 1g
|
||||||
|
environment:
|
||||||
|
no_proxy: ${no_proxy}
|
||||||
|
http_proxy: ${http_proxy}
|
||||||
|
https_proxy: ${https_proxy}
|
||||||
|
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
LLM_SERVER_PORT: ${LLM_SERVER_PORT}
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 100
|
||||||
|
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||||
|
audioqna-xeon-backend-server:
|
||||||
|
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||||
|
container_name: audioqna-xeon-backend-server
|
||||||
|
depends_on:
|
||||||
|
- whisper-service
|
||||||
|
- tgi-service
|
||||||
|
- speecht5-service
|
||||||
|
ports:
|
||||||
|
- "3008:8888"
|
||||||
|
environment:
|
||||||
|
- no_proxy=${no_proxy}
|
||||||
|
- https_proxy=${https_proxy}
|
||||||
|
- http_proxy=${http_proxy}
|
||||||
|
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||||
|
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
|
||||||
|
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
|
||||||
|
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
|
||||||
|
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
|
||||||
|
- LLM_MODEL_ID=${LLM_MODEL_ID}
|
||||||
|
- SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
|
||||||
|
- SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
|
||||||
|
ipc: host
|
||||||
|
restart: always
|
||||||
|
audioqna-xeon-ui-server:
|
||||||
|
image: ${REGISTRY:-opea}/audioqna-ui:${TAG:-latest}
|
||||||
|
container_name: audioqna-xeon-ui-server
|
||||||
|
depends_on:
|
||||||
|
- audioqna-xeon-backend-server
|
||||||
|
ports:
|
||||||
|
- "5173:5173"
|
||||||
|
environment:
|
||||||
|
- no_proxy=${no_proxy}
|
||||||
|
- https_proxy=${https_proxy}
|
||||||
|
- http_proxy=${http_proxy}
|
||||||
|
- CHAT_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||||
|
ipc: host
|
||||||
|
restart: always
|
||||||
|
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
driver: bridge
|
||||||
@@ -8,7 +8,7 @@ export host_ip=$(hostname -I | awk '{print $1}')
|
|||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
# <token>
|
# <token>
|
||||||
|
|
||||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||||
|
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||||
|
|||||||
@@ -2,6 +2,10 @@
|
|||||||
|
|
||||||
This document outlines the deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server.
|
This document outlines the deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server.
|
||||||
|
|
||||||
|
The default pipeline deploys with vLLM as the LLM serving component. It also provides options of using TGI backend for LLM microservice, please refer to [Start the MegaService](#-start-the-megaservice) section in this page.
|
||||||
|
|
||||||
|
Note: The default LLM is `meta-llama/Meta-Llama-3-8B-Instruct`. Before deploying the application, please make sure either you've requested and been granted the access to it on [Huggingface](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) or you've downloaded the model locally from [ModelScope](https://www.modelscope.cn/models).
|
||||||
|
|
||||||
## 🚀 Build Docker images
|
## 🚀 Build Docker images
|
||||||
|
|
||||||
### 1. Source Code install GenAIComps
|
### 1. Source Code install GenAIComps
|
||||||
@@ -17,9 +21,13 @@ cd GenAIComps
|
|||||||
docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu .
|
docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu .
|
||||||
```
|
```
|
||||||
|
|
||||||
### 3. Build LLM Image
|
### 3. Build vLLM Image
|
||||||
|
|
||||||
Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/tgi-gaudi:2.0.6 (https://github.com/huggingface/tgi-gaudi)
|
git clone https://github.com/HabanaAI/vllm-fork.git
|
||||||
|
cd vllm-fork/
|
||||||
|
VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
|
||||||
|
git checkout ${VLLM_VER}
|
||||||
|
docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g .
|
||||||
|
|
||||||
### 4. Build TTS Image
|
### 4. Build TTS Image
|
||||||
|
|
||||||
@@ -40,8 +48,9 @@ docker build --no-cache -t opea/audioqna:latest --build-arg https_proxy=$https_p
|
|||||||
Then run the command `docker images`, you will have following images ready:
|
Then run the command `docker images`, you will have following images ready:
|
||||||
|
|
||||||
1. `opea/whisper-gaudi:latest`
|
1. `opea/whisper-gaudi:latest`
|
||||||
2. `opea/speecht5-gaudi:latest`
|
2. `opea/vllm-gaudi:latest`
|
||||||
3. `opea/audioqna:latest`
|
3. `opea/speecht5-gaudi:latest`
|
||||||
|
4. `opea/audioqna:latest`
|
||||||
|
|
||||||
## 🚀 Set the environment variables
|
## 🚀 Set the environment variables
|
||||||
|
|
||||||
@@ -51,7 +60,12 @@ Before starting the services with `docker compose`, you have to recheck the foll
|
|||||||
export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
|
export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
|
||||||
export HUGGINGFACEHUB_API_TOKEN=<your HF token>
|
export HUGGINGFACEHUB_API_TOKEN=<your HF token>
|
||||||
|
|
||||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||||
|
# set vLLM parameters
|
||||||
|
export NUM_CARDS=1
|
||||||
|
export BLOCK_SIZE=128
|
||||||
|
export MAX_NUM_SEQS=256
|
||||||
|
export MAX_SEQ_LEN_TO_CAPTURE=2048
|
||||||
|
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||||
@@ -65,37 +79,90 @@ export LLM_SERVER_PORT=3006
|
|||||||
export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna
|
export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna
|
||||||
```
|
```
|
||||||
|
|
||||||
|
or use set_env.sh file to setup environment variables.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
|
||||||
|
- Please replace with host_ip with your external IP address, do not use localhost.
|
||||||
|
- If you are in a proxy environment, also set the proxy-related environment variables:
|
||||||
|
|
||||||
|
```
|
||||||
|
export http_proxy="Your_HTTP_Proxy"
|
||||||
|
export https_proxy="Your_HTTPs_Proxy"
|
||||||
|
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||||
|
export no_proxy="Your_No_Proxy",${host_ip},whisper-service,speecht5-service,tgi-service,vllm-service,audioqna-gaudi-backend-server,audioqna-gaudi-ui-server
|
||||||
|
```
|
||||||
|
|
||||||
## 🚀 Start the MegaService
|
## 🚀 Start the MegaService
|
||||||
|
|
||||||
> **_NOTE:_** Users will need at least three Gaudi cards for AudioQnA.
|
> **_NOTE:_** Users will need at least three Gaudi cards for AudioQnA.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd GenAIExamples/AudioQnA/docker_compose/intel/hpu/gaudi/
|
cd GenAIExamples/AudioQnA/docker_compose/intel/hpu/gaudi/
|
||||||
|
```
|
||||||
|
|
||||||
|
If use vLLM as the LLM serving backend:
|
||||||
|
|
||||||
|
```
|
||||||
docker compose up -d
|
docker compose up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If use TGI as the LLM serving backend:
|
||||||
|
|
||||||
|
```
|
||||||
|
docker compose -f compose_tgi.yaml up -d
|
||||||
|
```
|
||||||
|
|
||||||
## 🚀 Test MicroServices
|
## 🚀 Test MicroServices
|
||||||
|
|
||||||
```bash
|
1. Whisper Service
|
||||||
# whisper service
|
|
||||||
curl http://${host_ip}:7066/v1/asr \
|
|
||||||
-X POST \
|
|
||||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
|
||||||
-H 'Content-Type: application/json'
|
|
||||||
|
|
||||||
# tgi service
|
```bash
|
||||||
curl http://${host_ip}:3006/generate \
|
curl http://${host_ip}:${WHISPER_SERVER_PORT}/v1/asr \
|
||||||
-X POST \
|
-X POST \
|
||||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||||
-H 'Content-Type: application/json'
|
-H 'Content-Type: application/json'
|
||||||
|
```
|
||||||
|
|
||||||
# speecht5 service
|
2. LLM backend Service
|
||||||
curl http://${host_ip}:7055/v1/tts \
|
|
||||||
-X POST \
|
|
||||||
-d '{"text": "Who are you?"}' \
|
|
||||||
-H 'Content-Type: application/json'
|
|
||||||
|
|
||||||
```
|
In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready and the container (`vllm-gaudi-service` or `tgi-gaudi-service`) status shown via `docker ps` will be `healthy`. Before that, the status will be `health: starting`.
|
||||||
|
|
||||||
|
Or try the command below to check whether the LLM serving is ready.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# vLLM service
|
||||||
|
docker logs vllm-gaudi-service 2>&1 | grep complete
|
||||||
|
# If the service is ready, you will get the response like below.
|
||||||
|
INFO: Application startup complete.
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# TGI service
|
||||||
|
docker logs tgi-gaudi-service | grep Connected
|
||||||
|
# If the service is ready, you will get the response like below.
|
||||||
|
2024-09-03T02:47:53.402023Z INFO text_generation_router::server: router/src/server.rs:2311: Connected
|
||||||
|
```
|
||||||
|
|
||||||
|
Then try the `cURL` command below to validate services.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# either vLLM or TGI service
|
||||||
|
curl http://${host_ip}:${LLM_SERVER_PORT}/v1/chat/completions \
|
||||||
|
-X POST \
|
||||||
|
-d '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
|
||||||
|
-H 'Content-Type: application/json'
|
||||||
|
```
|
||||||
|
|
||||||
|
3. TTS Service
|
||||||
|
|
||||||
|
```
|
||||||
|
# speecht5 service
|
||||||
|
curl http://${host_ip}:${SPEECHT5_SERVER_PORT}/v1/tts
|
||||||
|
-X POST \
|
||||||
|
-d '{"text": "Who are you?"}' \
|
||||||
|
-H 'Content-Type: application/json'
|
||||||
|
```
|
||||||
|
|
||||||
## 🚀 Test MegaService
|
## 🚀 Test MegaService
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ services:
|
|||||||
image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
|
image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
|
||||||
container_name: whisper-service
|
container_name: whisper-service
|
||||||
ports:
|
ports:
|
||||||
- "7066:7066"
|
- ${WHISPER_SERVER_PORT:-7066}:7066
|
||||||
ipc: host
|
ipc: host
|
||||||
environment:
|
environment:
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
@@ -22,7 +22,7 @@ services:
|
|||||||
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
|
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
|
||||||
container_name: speecht5-service
|
container_name: speecht5-service
|
||||||
ports:
|
ports:
|
||||||
- "7055:7055"
|
- ${SPEECHT5_SERVER_PORT:-7055}:7055
|
||||||
ipc: host
|
ipc: host
|
||||||
environment:
|
environment:
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
@@ -34,28 +34,27 @@ services:
|
|||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
tgi-service:
|
vllm-service:
|
||||||
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
|
image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
|
||||||
container_name: tgi-gaudi-server
|
container_name: vllm-gaudi-service
|
||||||
ports:
|
ports:
|
||||||
- "3006:80"
|
- ${LLM_SERVER_PORT:-3006}:80
|
||||||
volumes:
|
volumes:
|
||||||
- "${MODEL_CACHE:-./data}:/data"
|
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
|
||||||
environment:
|
environment:
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
HABANA_VISIBLE_DEVICES: all
|
HABANA_VISIBLE_DEVICES: all
|
||||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
ENABLE_HPU_GRAPH: true
|
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||||
LIMIT_HPU_GRAPH: true
|
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||||
USE_FLASH_ATTENTION: true
|
LLM_SERVER_PORT: ${LLM_SERVER_PORT}
|
||||||
FLASH_ATTENTION_RECOMPUTE: true
|
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"]
|
test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"]
|
||||||
interval: 10s
|
interval: 10s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 100
|
retries: 100
|
||||||
@@ -63,13 +62,13 @@ services:
|
|||||||
cap_add:
|
cap_add:
|
||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
ipc: host
|
ipc: host
|
||||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
|
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq_len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE}
|
||||||
audioqna-gaudi-backend-server:
|
audioqna-gaudi-backend-server:
|
||||||
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||||
container_name: audioqna-gaudi-backend-server
|
container_name: audioqna-gaudi-backend-server
|
||||||
depends_on:
|
depends_on:
|
||||||
- whisper-service
|
- whisper-service
|
||||||
- tgi-service
|
- vllm-service
|
||||||
- speecht5-service
|
- speecht5-service
|
||||||
ports:
|
ports:
|
||||||
- "3008:8888"
|
- "3008:8888"
|
||||||
|
|||||||
108
AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
Normal file
108
AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
services:
|
||||||
|
whisper-service:
|
||||||
|
image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
|
||||||
|
container_name: whisper-service
|
||||||
|
ports:
|
||||||
|
- ${WHISPER_SERVER_PORT:-7066}:7066
|
||||||
|
ipc: host
|
||||||
|
environment:
|
||||||
|
no_proxy: ${no_proxy}
|
||||||
|
http_proxy: ${http_proxy}
|
||||||
|
https_proxy: ${https_proxy}
|
||||||
|
HABANA_VISIBLE_DEVICES: all
|
||||||
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
|
runtime: habana
|
||||||
|
cap_add:
|
||||||
|
- SYS_NICE
|
||||||
|
restart: unless-stopped
|
||||||
|
speecht5-service:
|
||||||
|
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
|
||||||
|
container_name: speecht5-service
|
||||||
|
ports:
|
||||||
|
- ${SPEECHT5_SERVER_PORT:-7055}:7055
|
||||||
|
ipc: host
|
||||||
|
environment:
|
||||||
|
no_proxy: ${no_proxy}
|
||||||
|
http_proxy: ${http_proxy}
|
||||||
|
https_proxy: ${https_proxy}
|
||||||
|
HABANA_VISIBLE_DEVICES: all
|
||||||
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
|
runtime: habana
|
||||||
|
cap_add:
|
||||||
|
- SYS_NICE
|
||||||
|
restart: unless-stopped
|
||||||
|
tgi-service:
|
||||||
|
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||||
|
container_name: tgi-gaudi-service
|
||||||
|
ports:
|
||||||
|
- ${LLM_SERVER_PORT:-3006}:80
|
||||||
|
volumes:
|
||||||
|
- "${MODEL_CACHE:-./data}:/data"
|
||||||
|
environment:
|
||||||
|
no_proxy: ${no_proxy}
|
||||||
|
http_proxy: ${http_proxy}
|
||||||
|
https_proxy: ${https_proxy}
|
||||||
|
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||||
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
|
HABANA_VISIBLE_DEVICES: all
|
||||||
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
|
ENABLE_HPU_GRAPH: true
|
||||||
|
LIMIT_HPU_GRAPH: true
|
||||||
|
USE_FLASH_ATTENTION: true
|
||||||
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
|
LLM_SERVER_PORT: ${LLM_SERVER_PORT}
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 100
|
||||||
|
runtime: habana
|
||||||
|
cap_add:
|
||||||
|
- SYS_NICE
|
||||||
|
ipc: host
|
||||||
|
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
|
||||||
|
audioqna-gaudi-backend-server:
|
||||||
|
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||||
|
container_name: audioqna-gaudi-backend-server
|
||||||
|
depends_on:
|
||||||
|
- whisper-service
|
||||||
|
- tgi-service
|
||||||
|
- speecht5-service
|
||||||
|
ports:
|
||||||
|
- "3008:8888"
|
||||||
|
environment:
|
||||||
|
- no_proxy=${no_proxy}
|
||||||
|
- https_proxy=${https_proxy}
|
||||||
|
- http_proxy=${http_proxy}
|
||||||
|
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||||
|
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
|
||||||
|
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
|
||||||
|
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
|
||||||
|
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
|
||||||
|
- LLM_MODEL_ID=${LLM_MODEL_ID}
|
||||||
|
- SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
|
||||||
|
- SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
|
||||||
|
ipc: host
|
||||||
|
restart: always
|
||||||
|
audioqna-gaudi-ui-server:
|
||||||
|
image: ${REGISTRY:-opea}/audioqna-ui:${TAG:-latest}
|
||||||
|
container_name: audioqna-gaudi-ui-server
|
||||||
|
depends_on:
|
||||||
|
- audioqna-gaudi-backend-server
|
||||||
|
ports:
|
||||||
|
- "5173:5173"
|
||||||
|
environment:
|
||||||
|
- no_proxy=${no_proxy}
|
||||||
|
- https_proxy=${https_proxy}
|
||||||
|
- http_proxy=${http_proxy}
|
||||||
|
- CHAT_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||||
|
ipc: host
|
||||||
|
restart: always
|
||||||
|
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
driver: bridge
|
||||||
@@ -8,7 +8,13 @@ export host_ip=$(hostname -I | awk '{print $1}')
|
|||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
# <token>
|
# <token>
|
||||||
|
|
||||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||||
|
|
||||||
|
# set vLLM parameters
|
||||||
|
export NUM_CARDS=1
|
||||||
|
export BLOCK_SIZE=128
|
||||||
|
export MAX_NUM_SEQS=256
|
||||||
|
export MAX_SEQ_LEN_TO_CAPTURE=2048
|
||||||
|
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||||
|
|||||||
@@ -71,3 +71,15 @@ services:
|
|||||||
dockerfile: comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile
|
dockerfile: comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile
|
||||||
extends: audioqna
|
extends: audioqna
|
||||||
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
|
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
|
||||||
|
vllm:
|
||||||
|
build:
|
||||||
|
context: vllm
|
||||||
|
dockerfile: Dockerfile.cpu
|
||||||
|
extends: audioqna
|
||||||
|
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
|
||||||
|
vllm-gaudi:
|
||||||
|
build:
|
||||||
|
context: vllm-fork
|
||||||
|
dockerfile: Dockerfile.hpu
|
||||||
|
extends: audioqna
|
||||||
|
image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
|
||||||
|
|||||||
@@ -31,18 +31,27 @@ function build_docker_images() {
|
|||||||
cd $WORKPATH/docker_image_build
|
cd $WORKPATH/docker_image_build
|
||||||
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
||||||
|
|
||||||
|
git clone https://github.com/HabanaAI/vllm-fork.git
|
||||||
|
cd vllm-fork/
|
||||||
|
VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
|
||||||
|
echo "Check out vLLM tag ${VLLM_VER}"
|
||||||
|
git checkout ${VLLM_VER} &> /dev/null && cd ../
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi"
|
service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi vllm-gaudi"
|
||||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||||
|
|
||||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
|
||||||
docker images && sleep 1s
|
docker images && sleep 1s
|
||||||
}
|
}
|
||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
export NUM_CARDS=1
|
||||||
|
export BLOCK_SIZE=128
|
||||||
|
export MAX_NUM_SEQS=256
|
||||||
|
export MAX_SEQ_LEN_TO_CAPTURE=2048
|
||||||
|
|
||||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||||
export WHISPER_SERVER_HOST_IP=${ip_address}
|
export WHISPER_SERVER_HOST_IP=${ip_address}
|
||||||
@@ -61,8 +70,8 @@ function start_services() {
|
|||||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
n=0
|
n=0
|
||||||
until [[ "$n" -ge 200 ]]; do
|
until [[ "$n" -ge 200 ]]; do
|
||||||
docker logs tgi-gaudi-server > $LOG_PATH/tgi_service_start.log
|
docker logs vllm-gaudi-service > $LOG_PATH/vllm_service_start.log 2>&1
|
||||||
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
|
if grep -q complete $LOG_PATH/vllm_service_start.log; then
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
sleep 5s
|
sleep 5s
|
||||||
@@ -86,7 +95,7 @@ function validate_megaservice() {
|
|||||||
# always print the log
|
# always print the log
|
||||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||||
docker logs speecht5-service > $LOG_PATH/tts-service.log
|
docker logs speecht5-service > $LOG_PATH/tts-service.log
|
||||||
docker logs tgi-gaudi-server > $LOG_PATH/tgi-gaudi-server.log
|
docker logs vllm-gaudi-service > $LOG_PATH/vllm-gaudi-service.log
|
||||||
docker logs audioqna-gaudi-backend-server > $LOG_PATH/audioqna-gaudi-backend-server.log
|
docker logs audioqna-gaudi-backend-server > $LOG_PATH/audioqna-gaudi-backend-server.log
|
||||||
echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3
|
echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3
|
||||||
|
|
||||||
@@ -126,7 +135,7 @@ function validate_megaservice() {
|
|||||||
|
|
||||||
function stop_docker() {
|
function stop_docker() {
|
||||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||||
docker compose stop && docker compose rm -f
|
docker compose -f compose.yaml stop && docker compose rm -f
|
||||||
}
|
}
|
||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
|
|||||||
@@ -31,18 +31,23 @@ function build_docker_images() {
|
|||||||
cd $WORKPATH/docker_image_build
|
cd $WORKPATH/docker_image_build
|
||||||
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
||||||
|
|
||||||
|
git clone https://github.com/vllm-project/vllm.git
|
||||||
|
cd ./vllm/
|
||||||
|
VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )"
|
||||||
|
echo "Check out vLLM tag ${VLLM_VER}"
|
||||||
|
git checkout ${VLLM_VER} &> /dev/null && cd ../
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="audioqna audioqna-ui whisper speecht5"
|
service_list="audioqna audioqna-ui whisper speecht5 vllm"
|
||||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||||
|
|
||||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
|
||||||
docker images && sleep 1s
|
docker images && sleep 1s
|
||||||
}
|
}
|
||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||||
export WHISPER_SERVER_HOST_IP=${ip_address}
|
export WHISPER_SERVER_HOST_IP=${ip_address}
|
||||||
@@ -62,8 +67,8 @@ function start_services() {
|
|||||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
n=0
|
n=0
|
||||||
until [[ "$n" -ge 200 ]]; do
|
until [[ "$n" -ge 200 ]]; do
|
||||||
docker logs tgi-service > $LOG_PATH/tgi_service_start.log
|
docker logs vllm-service > $LOG_PATH/vllm_service_start.log 2>&1
|
||||||
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
|
if grep -q complete $LOG_PATH/vllm_service_start.log; then
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
sleep 5s
|
sleep 5s
|
||||||
@@ -77,7 +82,7 @@ function validate_megaservice() {
|
|||||||
# always print the log
|
# always print the log
|
||||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||||
docker logs speecht5-service > $LOG_PATH/tts-service.log
|
docker logs speecht5-service > $LOG_PATH/tts-service.log
|
||||||
docker logs tgi-service > $LOG_PATH/tgi-service.log
|
docker logs vllm-service > $LOG_PATH/vllm-service.log
|
||||||
docker logs audioqna-xeon-backend-server > $LOG_PATH/audioqna-xeon-backend-server.log
|
docker logs audioqna-xeon-backend-server > $LOG_PATH/audioqna-xeon-backend-server.log
|
||||||
echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3
|
echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3
|
||||||
|
|
||||||
@@ -117,7 +122,7 @@ function validate_megaservice() {
|
|||||||
|
|
||||||
function stop_docker() {
|
function stop_docker() {
|
||||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||||
docker compose stop && docker compose rm -f
|
docker compose -f compose.yaml stop && docker compose rm -f
|
||||||
}
|
}
|
||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
|
|||||||
146
AudioQnA/tests/test_compose_tgi_on_gaudi.sh
Normal file
146
AudioQnA/tests/test_compose_tgi_on_gaudi.sh
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
set -e
|
||||||
|
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||||
|
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||||
|
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||||
|
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||||
|
export REGISTRY=${IMAGE_REPO}
|
||||||
|
export TAG=${IMAGE_TAG}
|
||||||
|
export MODEL_CACHE=${model_cache:-"./data"}
|
||||||
|
|
||||||
|
WORKPATH=$(dirname "$PWD")
|
||||||
|
LOG_PATH="$WORKPATH/tests"
|
||||||
|
ip_address=$(hostname -I | awk '{print $1}')
|
||||||
|
|
||||||
|
function build_docker_images() {
|
||||||
|
opea_branch=${opea_branch:-"main"}
|
||||||
|
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
|
||||||
|
if [[ "${opea_branch}" != "main" ]]; then
|
||||||
|
cd $WORKPATH
|
||||||
|
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
|
||||||
|
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
|
||||||
|
find . -type f -name "Dockerfile*" | while read -r file; do
|
||||||
|
echo "Processing file: $file"
|
||||||
|
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
cd $WORKPATH/docker_image_build
|
||||||
|
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
||||||
|
|
||||||
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
|
service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi"
|
||||||
|
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||||
|
|
||||||
|
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||||
|
docker images && sleep 1s
|
||||||
|
}
|
||||||
|
|
||||||
|
function start_services() {
|
||||||
|
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||||
|
export WHISPER_SERVER_HOST_IP=${ip_address}
|
||||||
|
export SPEECHT5_SERVER_HOST_IP=${ip_address}
|
||||||
|
export LLM_SERVER_HOST_IP=${ip_address}
|
||||||
|
|
||||||
|
export WHISPER_SERVER_PORT=7066
|
||||||
|
export SPEECHT5_SERVER_PORT=7055
|
||||||
|
export LLM_SERVER_PORT=3006
|
||||||
|
|
||||||
|
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
||||||
|
export host_ip=${ip_address}
|
||||||
|
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||||
|
|
||||||
|
# Start Docker Containers
|
||||||
|
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
n=0
|
||||||
|
until [[ "$n" -ge 200 ]]; do
|
||||||
|
docker logs tgi-gaudi-service > $LOG_PATH/tgi_service_start.log
|
||||||
|
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 5s
|
||||||
|
n=$((n+1))
|
||||||
|
done
|
||||||
|
|
||||||
|
n=0
|
||||||
|
until [[ "$n" -ge 100 ]]; do
|
||||||
|
docker logs whisper-service > $LOG_PATH/whisper_service_start.log
|
||||||
|
if grep -q "Uvicorn server setup on port" $LOG_PATH/whisper_service_start.log; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 5s
|
||||||
|
n=$((n+1))
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function validate_megaservice() {
|
||||||
|
response=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json')
|
||||||
|
# always print the log
|
||||||
|
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||||
|
docker logs speecht5-service > $LOG_PATH/tts-service.log
|
||||||
|
docker logs tgi-gaudi-service > $LOG_PATH/tgi-gaudi-service.log
|
||||||
|
docker logs audioqna-gaudi-backend-server > $LOG_PATH/audioqna-gaudi-backend-server.log
|
||||||
|
echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3
|
||||||
|
|
||||||
|
if [[ $(file speech.mp3) == *"RIFF"* ]]; then
|
||||||
|
echo "Result correct."
|
||||||
|
else
|
||||||
|
echo "Result wrong."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#function validate_frontend() {
|
||||||
|
# cd $WORKPATH/ui/svelte
|
||||||
|
# local conda_env_name="OPEA_e2e"
|
||||||
|
# export PATH=${HOME}/miniforge3/bin/:$PATH
|
||||||
|
## conda remove -n ${conda_env_name} --all -y
|
||||||
|
## conda create -n ${conda_env_name} python=3.12 -y
|
||||||
|
# source activate ${conda_env_name}
|
||||||
|
#
|
||||||
|
# sed -i "s/localhost/$ip_address/g" playwright.config.ts
|
||||||
|
#
|
||||||
|
## conda install -c conda-forge nodejs=22.6.0 -y
|
||||||
|
# npm install && npm ci && npx playwright install --with-deps
|
||||||
|
# node -v && npm -v && pip list
|
||||||
|
#
|
||||||
|
# exit_status=0
|
||||||
|
# npx playwright test || exit_status=$?
|
||||||
|
#
|
||||||
|
# if [ $exit_status -ne 0 ]; then
|
||||||
|
# echo "[TEST INFO]: ---------frontend test failed---------"
|
||||||
|
# exit $exit_status
|
||||||
|
# else
|
||||||
|
# echo "[TEST INFO]: ---------frontend test passed---------"
|
||||||
|
# fi
|
||||||
|
#}
|
||||||
|
|
||||||
|
function stop_docker() {
|
||||||
|
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||||
|
docker compose -f compose_tgi.yaml stop && docker compose rm -f
|
||||||
|
}
|
||||||
|
|
||||||
|
function main() {
|
||||||
|
|
||||||
|
stop_docker
|
||||||
|
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||||
|
start_services
|
||||||
|
|
||||||
|
validate_megaservice
|
||||||
|
# validate_frontend
|
||||||
|
|
||||||
|
stop_docker
|
||||||
|
echo y | docker system prune
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
main
|
||||||
137
AudioQnA/tests/test_compose_tgi_on_xeon.sh
Normal file
137
AudioQnA/tests/test_compose_tgi_on_xeon.sh
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
set -e
|
||||||
|
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||||
|
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||||
|
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||||
|
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||||
|
export REGISTRY=${IMAGE_REPO}
|
||||||
|
export TAG=${IMAGE_TAG}
|
||||||
|
export MODEL_CACHE=${model_cache:-"./data"}
|
||||||
|
|
||||||
|
WORKPATH=$(dirname "$PWD")
|
||||||
|
LOG_PATH="$WORKPATH/tests"
|
||||||
|
ip_address=$(hostname -I | awk '{print $1}')
|
||||||
|
|
||||||
|
function build_docker_images() {
|
||||||
|
opea_branch=${opea_branch:-"main"}
|
||||||
|
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
|
||||||
|
if [[ "${opea_branch}" != "main" ]]; then
|
||||||
|
cd $WORKPATH
|
||||||
|
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
|
||||||
|
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
|
||||||
|
find . -type f -name "Dockerfile*" | while read -r file; do
|
||||||
|
echo "Processing file: $file"
|
||||||
|
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
cd $WORKPATH/docker_image_build
|
||||||
|
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
||||||
|
|
||||||
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
|
service_list="audioqna audioqna-ui whisper speecht5"
|
||||||
|
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||||
|
|
||||||
|
docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||||
|
docker images && sleep 1s
|
||||||
|
}
|
||||||
|
|
||||||
|
function start_services() {
|
||||||
|
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||||
|
export WHISPER_SERVER_HOST_IP=${ip_address}
|
||||||
|
export SPEECHT5_SERVER_HOST_IP=${ip_address}
|
||||||
|
export LLM_SERVER_HOST_IP=${ip_address}
|
||||||
|
|
||||||
|
export WHISPER_SERVER_PORT=7066
|
||||||
|
export SPEECHT5_SERVER_PORT=7055
|
||||||
|
export LLM_SERVER_PORT=3006
|
||||||
|
|
||||||
|
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
||||||
|
export host_ip=${ip_address}
|
||||||
|
|
||||||
|
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||||
|
|
||||||
|
# Start Docker Containers
|
||||||
|
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
n=0
|
||||||
|
until [[ "$n" -ge 200 ]]; do
|
||||||
|
docker logs tgi-service > $LOG_PATH/tgi_service_start.log
|
||||||
|
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 5s
|
||||||
|
n=$((n+1))
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function validate_megaservice() {
|
||||||
|
response=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json')
|
||||||
|
# always print the log
|
||||||
|
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||||
|
docker logs speecht5-service > $LOG_PATH/tts-service.log
|
||||||
|
docker logs tgi-service > $LOG_PATH/tgi-service.log
|
||||||
|
docker logs audioqna-xeon-backend-server > $LOG_PATH/audioqna-xeon-backend-server.log
|
||||||
|
echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3
|
||||||
|
|
||||||
|
if [[ $(file speech.mp3) == *"RIFF"* ]]; then
|
||||||
|
echo "Result correct."
|
||||||
|
else
|
||||||
|
echo "Result wrong."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#function validate_frontend() {
|
||||||
|
# cd $WORKPATH/ui/svelte
|
||||||
|
# local conda_env_name="OPEA_e2e"
|
||||||
|
# export PATH=${HOME}/miniforge3/bin/:$PATH
|
||||||
|
## conda remove -n ${conda_env_name} --all -y
|
||||||
|
## conda create -n ${conda_env_name} python=3.12 -y
|
||||||
|
# source activate ${conda_env_name}
|
||||||
|
#
|
||||||
|
# sed -i "s/localhost/$ip_address/g" playwright.config.ts
|
||||||
|
#
|
||||||
|
## conda install -c conda-forge nodejs=22.6.0 -y
|
||||||
|
# npm install && npm ci && npx playwright install --with-deps
|
||||||
|
# node -v && npm -v && pip list
|
||||||
|
#
|
||||||
|
# exit_status=0
|
||||||
|
# npx playwright test || exit_status=$?
|
||||||
|
#
|
||||||
|
# if [ $exit_status -ne 0 ]; then
|
||||||
|
# echo "[TEST INFO]: ---------frontend test failed---------"
|
||||||
|
# exit $exit_status
|
||||||
|
# else
|
||||||
|
# echo "[TEST INFO]: ---------frontend test passed---------"
|
||||||
|
# fi
|
||||||
|
#}
|
||||||
|
|
||||||
|
function stop_docker() {
|
||||||
|
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||||
|
docker compose -f compose_tgi.yaml stop && docker compose rm -f
|
||||||
|
}
|
||||||
|
|
||||||
|
function main() {
|
||||||
|
|
||||||
|
stop_docker
|
||||||
|
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||||
|
start_services
|
||||||
|
|
||||||
|
validate_megaservice
|
||||||
|
# validate_frontend
|
||||||
|
|
||||||
|
stop_docker
|
||||||
|
echo y | docker system prune
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
main
|
||||||
Reference in New Issue
Block a user