Refactor AudioQnA/MultiModalQnA/AvatarChatbot (#1310)
Signed-off-by: chensuyue <suyue.chen@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: chensuyue <suyue.chen@intel.com>
This commit is contained in:
@@ -16,7 +16,6 @@ RUN useradd -m -s /bin/bash user && \
|
||||
|
||||
WORKDIR /home/user/
|
||||
RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip setuptools && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType
|
||||
@@ -10,43 +9,68 @@ from comps.cores.proto.docarray import LLMParams
|
||||
from fastapi import Request
|
||||
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
ASR_SERVICE_HOST_IP = os.getenv("ASR_SERVICE_HOST_IP", "0.0.0.0")
|
||||
ASR_SERVICE_PORT = int(os.getenv("ASR_SERVICE_PORT", 9099))
|
||||
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
|
||||
TTS_SERVICE_HOST_IP = os.getenv("TTS_SERVICE_HOST_IP", "0.0.0.0")
|
||||
TTS_SERVICE_PORT = int(os.getenv("TTS_SERVICE_PORT", 9088))
|
||||
|
||||
WHISPER_SERVER_HOST_IP = os.getenv("WHISPER_SERVER_HOST_IP", "0.0.0.0")
|
||||
WHISPER_SERVER_PORT = int(os.getenv("WHISPER_SERVER_PORT", 7066))
|
||||
SPEECHT5_SERVER_HOST_IP = os.getenv("SPEECHT5_SERVER_HOST_IP", "0.0.0.0")
|
||||
SPEECHT5_SERVER_PORT = int(os.getenv("SPEECHT5_SERVER_PORT", 7055))
|
||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 3006))
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
if self.services[cur_node].service_type == ServiceType.LLM:
|
||||
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
|
||||
next_inputs = {}
|
||||
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
|
||||
next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
|
||||
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
|
||||
next_inputs["top_p"] = llm_parameters_dict["top_p"]
|
||||
next_inputs["stream"] = inputs["streaming"] # False as default
|
||||
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
|
||||
# next_inputs["presence_penalty"] = inputs["presence_penalty"]
|
||||
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
|
||||
next_inputs["temperature"] = inputs["temperature"]
|
||||
inputs = next_inputs
|
||||
elif self.services[cur_node].service_type == ServiceType.TTS:
|
||||
next_inputs = {}
|
||||
next_inputs["text"] = inputs["choices"][0]["message"]["content"]
|
||||
next_inputs["voice"] = kwargs["voice"]
|
||||
inputs = next_inputs
|
||||
return inputs
|
||||
|
||||
|
||||
class AudioQnAService:
|
||||
def __init__(self, host="0.0.0.0", port=8000):
|
||||
self.host = host
|
||||
self.port = port
|
||||
ServiceOrchestrator.align_inputs = align_inputs
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
|
||||
self.endpoint = str(MegaServiceEndpoint.AUDIO_QNA)
|
||||
|
||||
def add_remote_service(self):
|
||||
asr = MicroService(
|
||||
name="asr",
|
||||
host=ASR_SERVICE_HOST_IP,
|
||||
port=ASR_SERVICE_PORT,
|
||||
endpoint="/v1/audio/transcriptions",
|
||||
host=WHISPER_SERVER_HOST_IP,
|
||||
port=WHISPER_SERVER_PORT,
|
||||
endpoint="/v1/asr",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.ASR,
|
||||
)
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVICE_HOST_IP,
|
||||
port=LLM_SERVICE_PORT,
|
||||
host=LLM_SERVER_HOST_IP,
|
||||
port=LLM_SERVER_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
tts = MicroService(
|
||||
name="tts",
|
||||
host=TTS_SERVICE_HOST_IP,
|
||||
port=TTS_SERVICE_PORT,
|
||||
endpoint="/v1/audio/speech",
|
||||
host=SPEECHT5_SERVER_HOST_IP,
|
||||
port=SPEECHT5_SERVER_PORT,
|
||||
endpoint="/v1/tts",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.TTS,
|
||||
)
|
||||
@@ -70,11 +94,13 @@ class AudioQnAService:
|
||||
streaming=False, # TODO add streaming LLM output as input to TTS
|
||||
)
|
||||
result_dict, runtime_graph = await self.megaservice.schedule(
|
||||
initial_inputs={"byte_str": chat_request.audio}, llm_parameters=parameters
|
||||
initial_inputs={"audio": chat_request.audio},
|
||||
llm_parameters=parameters,
|
||||
voice=chat_request.voice if hasattr(chat_request, "voice") else "default",
|
||||
)
|
||||
|
||||
last_node = runtime_graph.all_leaves()[-1]
|
||||
response = result_dict[last_node]["byte_str"]
|
||||
response = result_dict[last_node]["tts_result"]
|
||||
|
||||
return response
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import os
|
||||
|
||||
@@ -21,12 +20,8 @@ LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888))
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
print(inputs)
|
||||
if self.services[cur_node].service_type == ServiceType.ASR:
|
||||
# {'byte_str': 'UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA'}
|
||||
inputs["audio"] = inputs["byte_str"]
|
||||
del inputs["byte_str"]
|
||||
elif self.services[cur_node].service_type == ServiceType.LLM:
|
||||
|
||||
if self.services[cur_node].service_type == ServiceType.LLM:
|
||||
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
|
||||
next_inputs = {}
|
||||
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
|
||||
@@ -111,7 +106,7 @@ class AudioQnAService:
|
||||
streaming=False, # TODO add streaming LLM output as input to TTS
|
||||
)
|
||||
result_dict, runtime_graph = await self.megaservice.schedule(
|
||||
initial_inputs={"byte_str": chat_request.audio}, llm_parameters=parameters
|
||||
initial_inputs={"audio": chat_request.audio}, llm_parameters=parameters
|
||||
)
|
||||
|
||||
last_node = runtime_graph.all_leaves()[-1]
|
||||
|
||||
@@ -14,12 +14,12 @@ We evaluate the WER (Word Error Rate) metric of the ASR microservice.
|
||||
|
||||
### Launch ASR microservice
|
||||
|
||||
Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr/whisper/README.md).
|
||||
Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr/src/README.md).
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps
|
||||
cd GenAIComps
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile .
|
||||
# change the name of model by editing model_name_or_path you want to evaluate
|
||||
docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper:latest --model_name_or_path "openai/whisper-tiny"
|
||||
```
|
||||
|
||||
@@ -15,30 +15,20 @@ cd GenAIComps
|
||||
### 2. Build ASR Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
|
||||
|
||||
|
||||
docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build LLM Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
```
|
||||
|
||||
Note:
|
||||
For compose for ROCm example AMD optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm (https://github.com/huggingface/text-generation-inference)
|
||||
|
||||
### 4. Build TTS Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile .
|
||||
|
||||
docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
|
||||
docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile .
|
||||
```
|
||||
|
||||
### 6. Build MegaService Docker Image
|
||||
### 5. Build MegaService Docker Image
|
||||
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
|
||||
|
||||
@@ -51,11 +41,8 @@ docker build --no-cache -t opea/audioqna:latest --build-arg https_proxy=$https_p
|
||||
Then run the command `docker images`, you will have following images ready:
|
||||
|
||||
1. `opea/whisper:latest`
|
||||
2. `opea/asr:latest`
|
||||
3. `opea/llm-tgi:latest`
|
||||
4. `opea/speecht5:latest`
|
||||
5. `opea/tts:latest`
|
||||
6. `opea/audioqna:latest`
|
||||
2. `opea/speecht5:latest`
|
||||
3. `opea/audioqna:latest`
|
||||
|
||||
## 🚀 Set the environment variables
|
||||
|
||||
@@ -65,20 +52,18 @@ Before starting the services with `docker compose`, you have to recheck the foll
|
||||
export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your HF token>
|
||||
|
||||
export TGI_LLM_ENDPOINT=http://$host_ip:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export TTS_ENDPOINT=http://$host_ip:7055
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||
export LLM_SERVER_HOST_IP=${host_ip}
|
||||
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_PORT=3006
|
||||
|
||||
export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna
|
||||
```
|
||||
|
||||
or use set_env.sh file to setup environment variables.
|
||||
@@ -122,9 +107,10 @@ base64 string to the megaservice endpoint. The megaservice will return a spoken
|
||||
to the response, decode the base64 string and save it as a .wav file.
|
||||
|
||||
```bash
|
||||
# voice can be "default" or "male"
|
||||
curl http://${host_ip}:3008/v1/audioqna \
|
||||
-X POST \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64, "voice":"default"}' \
|
||||
-H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
|
||||
```
|
||||
|
||||
@@ -137,34 +123,15 @@ curl http://${host_ip}:7066/v1/asr \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# asr microservice
|
||||
curl http://${host_ip}:3001/v1/audio/transcriptions \
|
||||
-X POST \
|
||||
-d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tgi service
|
||||
curl http://${host_ip}:3006/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# llm microservice
|
||||
curl http://${host_ip}:3007/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# speecht5 service
|
||||
curl http://${host_ip}:7055/v1/tts \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tts microservice
|
||||
curl http://${host_ip}:3002/v1/audio/speech \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
```
|
||||
|
||||
@@ -13,14 +13,6 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
asr:
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
container_name: asr-service
|
||||
ports:
|
||||
- "3001:9099"
|
||||
ipc: host
|
||||
environment:
|
||||
ASR_ENDPOINT: ${ASR_ENDPOINT}
|
||||
speecht5-service:
|
||||
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
|
||||
container_name: speecht5-service
|
||||
@@ -32,14 +24,6 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
tts:
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
container_name: tts-service
|
||||
ports:
|
||||
- "3002:9088"
|
||||
ipc: host
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
|
||||
container_name: tgi-service
|
||||
@@ -67,28 +51,13 @@ services:
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
ipc: host
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "3007:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
audioqna-backend-server:
|
||||
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||
container_name: audioqna-xeon-backend-server
|
||||
depends_on:
|
||||
- asr
|
||||
- llm
|
||||
- tts
|
||||
- whisper-service
|
||||
- tgi-service
|
||||
- speecht5-service
|
||||
ports:
|
||||
- "3008:8888"
|
||||
environment:
|
||||
@@ -96,12 +65,12 @@ services:
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
|
||||
- ASR_SERVICE_PORT=${ASR_SERVICE_PORT}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
|
||||
- TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP}
|
||||
- TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
|
||||
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
|
||||
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
|
||||
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
|
||||
- SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
|
||||
- SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
|
||||
@@ -10,17 +10,15 @@ export host_ip="192.165.1.21"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${YOUR_HUGGINGFACEHUB_API_TOKEN}
|
||||
# <token>
|
||||
|
||||
export TGI_LLM_ENDPOINT=http://$host_ip:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export TTS_ENDPOINT=http://$host_ip:7055
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||
export LLM_SERVER_HOST_IP=${host_ip}
|
||||
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_PORT=3006
|
||||
|
||||
export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna
|
||||
|
||||
@@ -14,27 +14,20 @@ cd GenAIComps
|
||||
### 2. Build ASR Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
|
||||
|
||||
|
||||
docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build LLM Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
```
|
||||
Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu (https://github.com/huggingface/text-generation-inference)
|
||||
|
||||
### 4. Build TTS Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile .
|
||||
|
||||
docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
|
||||
docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile .
|
||||
```
|
||||
|
||||
### 6. Build MegaService Docker Image
|
||||
### 5. Build MegaService Docker Image
|
||||
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
|
||||
|
||||
@@ -47,11 +40,8 @@ docker build --no-cache -t opea/audioqna:latest --build-arg https_proxy=$https_p
|
||||
Then run the command `docker images`, you will have following images ready:
|
||||
|
||||
1. `opea/whisper:latest`
|
||||
2. `opea/asr:latest`
|
||||
3. `opea/llm-tgi:latest`
|
||||
4. `opea/speecht5:latest`
|
||||
5. `opea/tts:latest`
|
||||
6. `opea/audioqna:latest`
|
||||
2. `opea/speecht5:latest`
|
||||
3. `opea/audioqna:latest`
|
||||
|
||||
## 🚀 Set the environment variables
|
||||
|
||||
@@ -61,22 +51,24 @@ Before starting the services with `docker compose`, you have to recheck the foll
|
||||
export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your HF token>
|
||||
|
||||
export TGI_LLM_ENDPOINT=http://$host_ip:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export TTS_ENDPOINT=http://$host_ip:7055
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||
export LLM_SERVER_HOST_IP=${host_ip}
|
||||
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_PORT=3006
|
||||
|
||||
export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna
|
||||
```
|
||||
|
||||
or use set_env.sh file to setup environment variables.
|
||||
|
||||
Note: Please replace with host_ip with your external IP address, do not use localhost.
|
||||
|
||||
## 🚀 Start the MegaService
|
||||
|
||||
```bash
|
||||
@@ -93,36 +85,18 @@ curl http://${host_ip}:7066/v1/asr \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# asr microservice
|
||||
curl http://${host_ip}:3001/v1/audio/transcriptions \
|
||||
-X POST \
|
||||
-d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tgi service
|
||||
curl http://${host_ip}:3006/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# llm microservice
|
||||
curl http://${host_ip}:3007/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# speecht5 service
|
||||
curl http://${host_ip}:7055/v1/tts \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tts microservice
|
||||
curl http://${host_ip}:3002/v1/audio/speech \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
```
|
||||
|
||||
## 🚀 Test MegaService
|
||||
@@ -132,8 +106,9 @@ base64 string to the megaservice endpoint. The megaservice will return a spoken
|
||||
to the response, decode the base64 string and save it as a .wav file.
|
||||
|
||||
```bash
|
||||
# voice can be "default" or "male"
|
||||
curl http://${host_ip}:3008/v1/audioqna \
|
||||
-X POST \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64, "voice":"default"}' \
|
||||
-H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
|
||||
```
|
||||
|
||||
@@ -13,14 +13,6 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
asr:
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
container_name: asr-service
|
||||
ports:
|
||||
- "3001:9099"
|
||||
ipc: host
|
||||
environment:
|
||||
ASR_ENDPOINT: ${ASR_ENDPOINT}
|
||||
speecht5-service:
|
||||
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
|
||||
container_name: speecht5-service
|
||||
@@ -32,14 +24,6 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
tts:
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
container_name: tts-service
|
||||
ports:
|
||||
- "3002:9088"
|
||||
ipc: host
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-service
|
||||
@@ -54,28 +38,13 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "3007:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
audioqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||
container_name: audioqna-xeon-backend-server
|
||||
depends_on:
|
||||
- asr
|
||||
- llm
|
||||
- tts
|
||||
- whisper-service
|
||||
- tgi-service
|
||||
- speecht5-service
|
||||
ports:
|
||||
- "3008:8888"
|
||||
environment:
|
||||
@@ -83,12 +52,12 @@ services:
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
|
||||
- ASR_SERVICE_PORT=${ASR_SERVICE_PORT}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
|
||||
- TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP}
|
||||
- TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
|
||||
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
|
||||
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
|
||||
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
|
||||
- SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
|
||||
- SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
audioqna-xeon-ui-server:
|
||||
|
||||
@@ -2,6 +2,21 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
|
||||
# export host_ip=<your External Public IP>
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
# <token>
|
||||
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||
export LLM_SERVER_HOST_IP=${host_ip}
|
||||
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_PORT=3006
|
||||
|
||||
export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna
|
||||
|
||||
@@ -14,27 +14,20 @@ cd GenAIComps
|
||||
### 2. Build ASR Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile.intel_hpu .
|
||||
|
||||
|
||||
docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
|
||||
docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu .
|
||||
```
|
||||
|
||||
### 3. Build LLM Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
```
|
||||
Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/tgi-gaudi:2.0.6 (https://github.com/huggingface/tgi-gaudi)
|
||||
|
||||
### 4. Build TTS Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile.intel_hpu .
|
||||
|
||||
docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
|
||||
docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu .
|
||||
```
|
||||
|
||||
### 6. Build MegaService Docker Image
|
||||
### 5. Build MegaService Docker Image
|
||||
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
|
||||
|
||||
@@ -47,11 +40,8 @@ docker build --no-cache -t opea/audioqna:latest --build-arg https_proxy=$https_p
|
||||
Then run the command `docker images`, you will have following images ready:
|
||||
|
||||
1. `opea/whisper-gaudi:latest`
|
||||
2. `opea/asr:latest`
|
||||
3. `opea/llm-tgi:latest`
|
||||
4. `opea/speecht5-gaudi:latest`
|
||||
5. `opea/tts:latest`
|
||||
6. `opea/audioqna:latest`
|
||||
2. `opea/speecht5-gaudi:latest`
|
||||
3. `opea/audioqna:latest`
|
||||
|
||||
## 🚀 Set the environment variables
|
||||
|
||||
@@ -61,20 +51,18 @@ Before starting the services with `docker compose`, you have to recheck the foll
|
||||
export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your HF token>
|
||||
|
||||
export TGI_LLM_ENDPOINT=http://$host_ip:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export TTS_ENDPOINT=http://$host_ip:7055
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||
export LLM_SERVER_HOST_IP=${host_ip}
|
||||
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_PORT=3006
|
||||
|
||||
export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna
|
||||
```
|
||||
|
||||
## 🚀 Start the MegaService
|
||||
@@ -95,36 +83,18 @@ curl http://${host_ip}:7066/v1/asr \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# asr microservice
|
||||
curl http://${host_ip}:3001/v1/audio/transcriptions \
|
||||
-X POST \
|
||||
-d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tgi service
|
||||
curl http://${host_ip}:3006/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# llm microservice
|
||||
curl http://${host_ip}:3007/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# speecht5 service
|
||||
curl http://${host_ip}:7055/v1/tts \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tts microservice
|
||||
curl http://${host_ip}:3002/v1/audio/speech \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
```
|
||||
|
||||
## 🚀 Test MegaService
|
||||
@@ -134,8 +104,9 @@ base64 string to the megaservice endpoint. The megaservice will return a spoken
|
||||
to the response, decode the base64 string and save it as a .wav file.
|
||||
|
||||
```bash
|
||||
# voice can be "default" or "male"
|
||||
curl http://${host_ip}:3008/v1/audioqna \
|
||||
-X POST \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64, "voice":"default"}' \
|
||||
-H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
|
||||
```
|
||||
|
||||
@@ -18,14 +18,6 @@ services:
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
restart: unless-stopped
|
||||
asr:
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
container_name: asr-service
|
||||
ports:
|
||||
- "3001:9099"
|
||||
ipc: host
|
||||
environment:
|
||||
ASR_ENDPOINT: ${ASR_ENDPOINT}
|
||||
speecht5-service:
|
||||
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
|
||||
container_name: speecht5-service
|
||||
@@ -42,14 +34,6 @@ services:
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
restart: unless-stopped
|
||||
tts:
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
container_name: tts-service
|
||||
ports:
|
||||
- "3002:9088"
|
||||
ipc: host
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
@@ -75,28 +59,13 @@ services:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-gaudi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "3007:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
audioqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
|
||||
container_name: audioqna-gaudi-backend-server
|
||||
depends_on:
|
||||
- asr
|
||||
- llm
|
||||
- tts
|
||||
- whisper-service
|
||||
- tgi-service
|
||||
- speecht5-service
|
||||
ports:
|
||||
- "3008:8888"
|
||||
environment:
|
||||
@@ -104,12 +73,12 @@ services:
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
|
||||
- ASR_SERVICE_PORT=${ASR_SERVICE_PORT}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
|
||||
- TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP}
|
||||
- TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
|
||||
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
|
||||
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
|
||||
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
|
||||
- SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
|
||||
- SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
audioqna-gaudi-ui-server:
|
||||
|
||||
@@ -2,6 +2,21 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
|
||||
# export host_ip=<your External Public IP>
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
# <token>
|
||||
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||
export LLM_SERVER_HOST_IP=${host_ip}
|
||||
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_PORT=3006
|
||||
|
||||
export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna
|
||||
|
||||
@@ -26,19 +26,19 @@ services:
|
||||
whisper-gaudi:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/asr/whisper/dependency/Dockerfile.intel_hpu
|
||||
dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
|
||||
whisper:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/asr/whisper/dependency/Dockerfile
|
||||
dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||
asr:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/asr/whisper/Dockerfile
|
||||
dockerfile: comps/asr/src/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
llm-tgi:
|
||||
@@ -50,24 +50,24 @@ services:
|
||||
speecht5-gaudi:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/tts/speecht5/dependency/Dockerfile.intel_hpu
|
||||
dockerfile: comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
|
||||
speecht5:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/tts/speecht5/dependency/Dockerfile
|
||||
dockerfile: comps/tts/src/integrations/dependency/speecht5/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
|
||||
tts:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/tts/speecht5/Dockerfile
|
||||
dockerfile: comps/tts/src/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
gpt-sovits:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/tts/gpt-sovits/Dockerfile
|
||||
dockerfile: comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
|
||||
|
||||
@@ -7,69 +7,17 @@ metadata:
|
||||
name: audio-qna-config
|
||||
namespace: default
|
||||
data:
|
||||
ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
|
||||
TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
|
||||
MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
|
||||
ASR_SERVICE_HOST_IP: asr-svc
|
||||
ASR_SERVICE_PORT: "3001"
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
LLM_SERVICE_PORT: "3007"
|
||||
TTS_SERVICE_HOST_IP: tts-svc
|
||||
TTS_SERVICE_PORT: "3002"
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: asr-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: asr-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: asr-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: asr-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/asr:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: asr-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9099
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: asr-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: asr-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 3001
|
||||
targetPort: 9099
|
||||
WHISPER_SERVER_HOST_IP: whisper-svc
|
||||
WHISPER_SERVER_PORT: 7066
|
||||
SPEECHT5_SERVER_HOST_IP: speecht5-svc
|
||||
SPEECHT5_SERVER_PORT: 7055
|
||||
LLM_SERVER_HOST_IP: llm-svc
|
||||
LLM_SERVER_PORT: 3006
|
||||
|
||||
---
|
||||
|
||||
apiVersion: apps/v1
|
||||
@@ -122,57 +70,6 @@ spec:
|
||||
port: 7066
|
||||
targetPort: 7066
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: tts-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: tts-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: tts-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: tts-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/tts:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: tts-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9088
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: tts-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: tts-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 3002
|
||||
targetPort: 9088
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
@@ -291,57 +188,6 @@ spec:
|
||||
port: 3006
|
||||
targetPort: 80
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 3007
|
||||
targetPort: 9000
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
|
||||
@@ -7,69 +7,17 @@ metadata:
|
||||
name: audio-qna-config
|
||||
namespace: default
|
||||
data:
|
||||
ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
|
||||
TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
|
||||
MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
|
||||
ASR_SERVICE_HOST_IP: asr-svc
|
||||
ASR_SERVICE_PORT: "3001"
|
||||
LLM_SERVICE_HOST_IP: llm-svc
|
||||
LLM_SERVICE_PORT: "3007"
|
||||
TTS_SERVICE_HOST_IP: tts-svc
|
||||
TTS_SERVICE_PORT: "3002"
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: asr-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: asr-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: asr-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: asr-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/asr:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: asr-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9099
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: asr-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: asr-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 3001
|
||||
targetPort: 9099
|
||||
WHISPER_SERVER_HOST_IP: whisper-svc
|
||||
WHISPER_SERVER_PORT: 7066
|
||||
SPEECHT5_SERVER_HOST_IP: speecht5-svc
|
||||
SPEECHT5_SERVER_PORT: 7055
|
||||
LLM_SERVER_HOST_IP: llm-svc
|
||||
LLM_SERVER_PORT: 3006
|
||||
|
||||
---
|
||||
|
||||
apiVersion: apps/v1
|
||||
@@ -134,57 +82,6 @@ spec:
|
||||
port: 7066
|
||||
targetPort: 7066
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: tts-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: tts-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: tts-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: tts-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/tts:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: tts-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9088
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: tts-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: tts-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 3002
|
||||
targetPort: 9088
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
@@ -343,57 +240,6 @@ spec:
|
||||
port: 3006
|
||||
targetPort: 80
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-deploy
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: llm-deploy
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app: llm-deploy
|
||||
hostIPC: true
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: opea/llm-tgi:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: llm-deploy
|
||||
args: null
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
serviceAccountName: default
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: llm-svc
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-deploy
|
||||
ports:
|
||||
- name: service
|
||||
port: 3007
|
||||
targetPort: 9000
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
|
||||
@@ -19,7 +19,7 @@ function build_docker_images() {
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="audioqna audioqna-ui whisper-gaudi asr llm-tgi speecht5-gaudi tts"
|
||||
service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
@@ -29,28 +29,24 @@ function build_docker_images() {
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
export TGI_LLM_ENDPOINT=http://$ip_address:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$ip_address:7066
|
||||
export TTS_ENDPOINT=http://$ip_address:7055
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export ASR_SERVICE_HOST_IP=${ip_address}
|
||||
export TTS_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export WHISPER_SERVER_HOST_IP=${ip_address}
|
||||
export SPEECHT5_SERVER_HOST_IP=${ip_address}
|
||||
export LLM_SERVER_HOST_IP=${ip_address}
|
||||
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_PORT=3006
|
||||
|
||||
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
||||
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
until [[ "$n" -ge 200 ]]; do
|
||||
docker logs tgi-gaudi-server > $LOG_PATH/tgi_service_start.log
|
||||
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
|
||||
break
|
||||
@@ -72,18 +68,17 @@ function start_services() {
|
||||
|
||||
|
||||
function validate_megaservice() {
|
||||
result=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json')
|
||||
echo "result is === $result"
|
||||
if [[ $result == *"AAA"* ]]; then
|
||||
response=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json')
|
||||
# always print the log
|
||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||
docker logs speecht5-service > $LOG_PATH/tts-service.log
|
||||
docker logs tgi-gaudi-server > $LOG_PATH/tgi-gaudi-server.log
|
||||
docker logs audioqna-gaudi-backend-server > $LOG_PATH/audioqna-gaudi-backend-server.log
|
||||
echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3
|
||||
|
||||
if [[ $(file speech.mp3) == *"RIFF"* ]]; then
|
||||
echo "Result correct."
|
||||
else
|
||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||
docker logs asr-service > $LOG_PATH/asr-service.log
|
||||
docker logs speecht5-service > $LOG_PATH/tts-service.log
|
||||
docker logs tts-service > $LOG_PATH/tts-service.log
|
||||
docker logs tgi-gaudi-server > $LOG_PATH/tgi-gaudi-server.log
|
||||
docker logs llm-tgi-gaudi-server > $LOG_PATH/llm-tgi-gaudi-server.log
|
||||
|
||||
echo "Result wrong."
|
||||
exit 1
|
||||
fi
|
||||
@@ -126,7 +121,6 @@ function main() {
|
||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||
start_services
|
||||
|
||||
# validate_microservices
|
||||
validate_megaservice
|
||||
# validate_frontend
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ function build_docker_images() {
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="audioqna whisper asr llm-tgi speecht5 tts"
|
||||
service_list="audioqna audioqna-ui whisper speecht5"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
echo "docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm"
|
||||
docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
|
||||
@@ -30,27 +30,25 @@ function build_docker_images() {
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/amd/gpu/rocm/
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export TGI_LLM_ENDPOINT=http://$ip_address:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$ip_address:7066
|
||||
export TTS_ENDPOINT=http://$ip_address:7055
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export ASR_SERVICE_HOST_IP=${ip_address}
|
||||
export TTS_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export WHISPER_SERVER_HOST_IP=${ip_address}
|
||||
export SPEECHT5_SERVER_HOST_IP=${ip_address}
|
||||
export LLM_SERVER_HOST_IP=${ip_address}
|
||||
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_PORT=3006
|
||||
|
||||
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
||||
|
||||
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
until [[ "$n" -ge 200 ]]; do
|
||||
docker logs tgi-service > $LOG_PATH/tgi_service_start.log
|
||||
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
|
||||
break
|
||||
@@ -60,19 +58,17 @@ function start_services() {
|
||||
done
|
||||
}
|
||||
function validate_megaservice() {
|
||||
result=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json')
|
||||
echo $result
|
||||
if [[ $result == *"AAA"* ]]; then
|
||||
response=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json')
|
||||
# always print the log
|
||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||
docker logs speecht5-service > $LOG_PATH/tts-service.log
|
||||
docker logs tgi-service > $LOG_PATH/tgi-service.log
|
||||
docker logs audioqna-xeon-backend-server > $LOG_PATH/audioqna-xeon-backend-server.log
|
||||
echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3
|
||||
|
||||
if [[ $(file speech.mp3) == *"RIFF"* ]]; then
|
||||
echo "Result correct."
|
||||
else
|
||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||
docker logs asr-service > $LOG_PATH/asr-service.log
|
||||
docker logs speecht5-service > $LOG_PATH/tts-service.log
|
||||
docker logs tts-service > $LOG_PATH/tts-service.log
|
||||
docker logs tgi-service > $LOG_PATH/tgi-service.log
|
||||
docker logs llm-tgi-server > $LOG_PATH/llm-tgi-server.log
|
||||
docker logs audioqna-xeon-backend-server > $LOG_PATH/audioqna-xeon-backend-server.log
|
||||
|
||||
echo "Result wrong."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -19,7 +19,7 @@ function build_docker_images() {
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="audioqna audioqna-ui whisper asr llm-tgi speecht5 tts"
|
||||
service_list="audioqna audioqna-ui whisper speecht5"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
@@ -29,27 +29,25 @@ function build_docker_images() {
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export TGI_LLM_ENDPOINT=http://$ip_address:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$ip_address:7066
|
||||
export TTS_ENDPOINT=http://$ip_address:7055
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export ASR_SERVICE_HOST_IP=${ip_address}
|
||||
export TTS_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export WHISPER_SERVER_HOST_IP=${ip_address}
|
||||
export SPEECHT5_SERVER_HOST_IP=${ip_address}
|
||||
export LLM_SERVER_HOST_IP=${ip_address}
|
||||
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_PORT=3006
|
||||
|
||||
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
||||
|
||||
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
until [[ "$n" -ge 200 ]]; do
|
||||
docker logs tgi-service > $LOG_PATH/tgi_service_start.log
|
||||
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
|
||||
break
|
||||
@@ -61,19 +59,17 @@ function start_services() {
|
||||
|
||||
|
||||
function validate_megaservice() {
|
||||
result=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json')
|
||||
echo $result
|
||||
if [[ $result == *"AAA"* ]]; then
|
||||
response=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json')
|
||||
# always print the log
|
||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||
docker logs speecht5-service > $LOG_PATH/tts-service.log
|
||||
docker logs tgi-service > $LOG_PATH/tgi-service.log
|
||||
docker logs audioqna-xeon-backend-server > $LOG_PATH/audioqna-xeon-backend-server.log
|
||||
echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3
|
||||
|
||||
if [[ $(file speech.mp3) == *"RIFF"* ]]; then
|
||||
echo "Result correct."
|
||||
else
|
||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||
docker logs asr-service > $LOG_PATH/asr-service.log
|
||||
docker logs speecht5-service > $LOG_PATH/tts-service.log
|
||||
docker logs tts-service > $LOG_PATH/tts-service.log
|
||||
docker logs tgi-service > $LOG_PATH/tgi-service.log
|
||||
docker logs llm-tgi-server > $LOG_PATH/llm-tgi-server.log
|
||||
docker logs audioqna-xeon-backend-server > $LOG_PATH/audioqna-xeon-backend-server.log
|
||||
|
||||
echo "Result wrong."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -11,16 +11,42 @@ from comps.cores.proto.docarray import LLMParams
|
||||
from fastapi import Request
|
||||
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
ASR_SERVICE_HOST_IP = os.getenv("ASR_SERVICE_HOST_IP", "0.0.0.0")
|
||||
ASR_SERVICE_PORT = int(os.getenv("ASR_SERVICE_PORT", 9099))
|
||||
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
|
||||
TTS_SERVICE_HOST_IP = os.getenv("TTS_SERVICE_HOST_IP", "0.0.0.0")
|
||||
TTS_SERVICE_PORT = int(os.getenv("TTS_SERVICE_PORT", 9088))
|
||||
WHISPER_SERVER_HOST_IP = os.getenv("WHISPER_SERVER_HOST_IP", "0.0.0.0")
|
||||
WHISPER_SERVER_PORT = int(os.getenv("WHISPER_SERVER_PORT", 7066))
|
||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 3006))
|
||||
SPEECHT5_SERVER_HOST_IP = os.getenv("SPEECHT5_SERVER_HOST_IP", "0.0.0.0")
|
||||
SPEECHT5_SERVER_PORT = int(os.getenv("SPEECHT5_SERVER_PORT", 7055))
|
||||
ANIMATION_SERVICE_HOST_IP = os.getenv("ANIMATION_SERVICE_HOST_IP", "0.0.0.0")
|
||||
ANIMATION_SERVICE_PORT = int(os.getenv("ANIMATION_SERVICE_PORT", 9066))
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
if self.services[cur_node].service_type == ServiceType.LLM:
|
||||
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
|
||||
next_inputs = {}
|
||||
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
|
||||
next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
|
||||
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
|
||||
next_inputs["top_p"] = llm_parameters_dict["top_p"]
|
||||
next_inputs["stream"] = inputs["streaming"] # False as default
|
||||
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
|
||||
# next_inputs["presence_penalty"] = inputs["presence_penalty"]
|
||||
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
|
||||
next_inputs["temperature"] = inputs["temperature"]
|
||||
inputs = next_inputs
|
||||
elif self.services[cur_node].service_type == ServiceType.TTS:
|
||||
next_inputs = {}
|
||||
next_inputs["text"] = inputs["choices"][0]["message"]["content"]
|
||||
next_inputs["voice"] = kwargs["voice"]
|
||||
inputs = next_inputs
|
||||
elif self.services[cur_node].service_type == ServiceType.ANIMATION:
|
||||
next_inputs = {}
|
||||
next_inputs["byte_str"] = inputs["tts_result"]
|
||||
inputs = next_inputs
|
||||
return inputs
|
||||
|
||||
|
||||
def check_env_vars(env_var_list):
|
||||
for var in env_var_list:
|
||||
if os.getenv(var) is None:
|
||||
@@ -33,31 +59,32 @@ class AvatarChatbotService:
|
||||
def __init__(self, host="0.0.0.0", port=8000):
|
||||
self.host = host
|
||||
self.port = port
|
||||
ServiceOrchestrator.align_inputs = align_inputs
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
self.endpoint = str(MegaServiceEndpoint.AVATAR_CHATBOT)
|
||||
|
||||
def add_remote_service(self):
|
||||
asr = MicroService(
|
||||
name="asr",
|
||||
host=ASR_SERVICE_HOST_IP,
|
||||
port=ASR_SERVICE_PORT,
|
||||
endpoint="/v1/audio/transcriptions",
|
||||
host=WHISPER_SERVER_HOST_IP,
|
||||
port=WHISPER_SERVER_PORT,
|
||||
endpoint="/v1/asr",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.ASR,
|
||||
)
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVICE_HOST_IP,
|
||||
port=LLM_SERVICE_PORT,
|
||||
host=LLM_SERVER_HOST_IP,
|
||||
port=LLM_SERVER_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
tts = MicroService(
|
||||
name="tts",
|
||||
host=TTS_SERVICE_HOST_IP,
|
||||
port=TTS_SERVICE_PORT,
|
||||
endpoint="/v1/audio/speech",
|
||||
host=SPEECHT5_SERVER_HOST_IP,
|
||||
port=SPEECHT5_SERVER_PORT,
|
||||
endpoint="/v1/tts",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.TTS,
|
||||
)
|
||||
@@ -90,7 +117,9 @@ class AvatarChatbotService:
|
||||
# print(parameters)
|
||||
|
||||
result_dict, runtime_graph = await self.megaservice.schedule(
|
||||
initial_inputs={"byte_str": chat_request.audio}, llm_parameters=parameters
|
||||
initial_inputs={"audio": chat_request.audio},
|
||||
llm_parameters=parameters,
|
||||
voice=chat_request.voice if hasattr(chat_request, "voice") else "default",
|
||||
)
|
||||
|
||||
last_node = runtime_graph.all_leaves()[-1]
|
||||
@@ -116,12 +145,12 @@ if __name__ == "__main__":
|
||||
[
|
||||
"MEGA_SERVICE_HOST_IP",
|
||||
"MEGA_SERVICE_PORT",
|
||||
"ASR_SERVICE_HOST_IP",
|
||||
"ASR_SERVICE_PORT",
|
||||
"LLM_SERVICE_HOST_IP",
|
||||
"LLM_SERVICE_PORT",
|
||||
"TTS_SERVICE_HOST_IP",
|
||||
"TTS_SERVICE_PORT",
|
||||
"WHISPER_SERVER_HOST_IP",
|
||||
"WHISPER_SERVER_PORT",
|
||||
"LLM_SERVER_HOST_IP",
|
||||
"LLM_SERVER_PORT",
|
||||
"SPEECHT5_SERVER_HOST_IP",
|
||||
"SPEECHT5_SERVER_PORT",
|
||||
"ANIMATION_SERVICE_HOST_IP",
|
||||
"ANIMATION_SERVICE_PORT",
|
||||
]
|
||||
|
||||
@@ -14,24 +14,17 @@ cd GenAIComps
|
||||
### 2. Build ASR Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
|
||||
|
||||
|
||||
docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build LLM Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
```
|
||||
Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu (https://github.com/huggingface/text-generation-inference)
|
||||
|
||||
### 4. Build TTS Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile .
|
||||
|
||||
docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
|
||||
docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Build Animation Image
|
||||
@@ -55,13 +48,10 @@ docker build --no-cache -t opea/avatarchatbot:latest --build-arg https_proxy=$ht
|
||||
Then run the command `docker images`, you will have following images ready:
|
||||
|
||||
1. `opea/whisper:latest`
|
||||
2. `opea/asr:latest`
|
||||
3. `opea/llm-tgi:latest`
|
||||
4. `opea/speecht5:latest`
|
||||
5. `opea/tts:latest`
|
||||
6. `opea/wav2lip:latest`
|
||||
7. `opea/animation:latest`
|
||||
8. `opea/avatarchatbot:latest`
|
||||
2. `opea/speecht5:latest`
|
||||
3. `opea/wav2lip:latest`
|
||||
4. `opea/animation:latest`
|
||||
5. `opea/avatarchatbot:latest`
|
||||
|
||||
## 🚀 Set the environment variables
|
||||
|
||||
@@ -71,24 +61,21 @@ Before starting the services with `docker compose`, you have to recheck the foll
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
export TGI_LLM_ENDPOINT=http://$host_ip:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export TTS_ENDPOINT=http://$host_ip:7055
|
||||
export WAV2LIP_ENDPOINT=http://$host_ip:7860
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_HOST_IP=${host_ip}
|
||||
export LLM_SERVER_PORT=3006
|
||||
export ANIMATION_SERVICE_HOST_IP=${host_ip}
|
||||
export ANIMATION_SERVICE_PORT=3008
|
||||
|
||||
export MEGA_SERVICE_PORT=8888
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export ANIMATION_SERVICE_PORT=3008
|
||||
```
|
||||
|
||||
- Xeon CPU
|
||||
@@ -124,36 +111,18 @@ curl http://${host_ip}:7066/v1/asr \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# asr microservice
|
||||
curl http://${host_ip}:3001/v1/audio/transcriptions \
|
||||
-X POST \
|
||||
-d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tgi service
|
||||
curl http://${host_ip}:3006/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# llm microservice
|
||||
curl http://${host_ip}:3007/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# speecht5 service
|
||||
curl http://${host_ip}:7055/v1/tts \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tts microservice
|
||||
curl http://${host_ip}:3002/v1/audio/speech \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# wav2lip service
|
||||
cd ../../../..
|
||||
curl http://${host_ip}:7860/v1/wav2lip \
|
||||
|
||||
@@ -14,14 +14,6 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
asr:
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
container_name: asr-service
|
||||
ports:
|
||||
- "3001:9099"
|
||||
ipc: host
|
||||
environment:
|
||||
ASR_ENDPOINT: ${ASR_ENDPOINT}
|
||||
speecht5-service:
|
||||
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
|
||||
container_name: speecht5-service
|
||||
@@ -33,14 +25,6 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
tts:
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
container_name: tts-service
|
||||
ports:
|
||||
- "3002:9088"
|
||||
ipc: host
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-service
|
||||
@@ -55,21 +39,6 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "3007:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
wav2lip-service:
|
||||
image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
|
||||
container_name: wav2lip-service
|
||||
@@ -110,9 +79,6 @@ services:
|
||||
image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
|
||||
container_name: avatarchatbot-xeon-backend-server
|
||||
depends_on:
|
||||
- asr
|
||||
- llm
|
||||
- tts
|
||||
- animation
|
||||
ports:
|
||||
- "3009:8888"
|
||||
@@ -122,12 +88,12 @@ services:
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
|
||||
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
|
||||
- ASR_SERVICE_PORT=${ASR_SERVICE_PORT}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
|
||||
- TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP}
|
||||
- TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
|
||||
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
|
||||
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
|
||||
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
|
||||
- SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
|
||||
- SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
|
||||
- ANIMATION_SERVICE_HOST_IP=${ANIMATION_SERVICE_HOST_IP}
|
||||
- ANIMATION_SERVICE_PORT=${ANIMATION_SERVICE_PORT}
|
||||
ipc: host
|
||||
|
||||
@@ -14,24 +14,17 @@ cd GenAIComps
|
||||
### 2. Build ASR Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile.intel_hpu .
|
||||
|
||||
|
||||
docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
|
||||
docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu .
|
||||
```
|
||||
|
||||
### 3. Build LLM Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
```
|
||||
Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/tgi-gaudi:2.0.6 (https://github.com/huggingface/tgi-gaudi)
|
||||
|
||||
### 4. Build TTS Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile.intel_hpu .
|
||||
|
||||
docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
|
||||
docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu .
|
||||
```
|
||||
|
||||
### 5. Build Animation Image
|
||||
@@ -55,13 +48,10 @@ docker build --no-cache -t opea/avatarchatbot:latest --build-arg https_proxy=$ht
|
||||
Then run the command `docker images`, you will have following images ready:
|
||||
|
||||
1. `opea/whisper-gaudi:latest`
|
||||
2. `opea/asr:latest`
|
||||
3. `opea/llm-tgi:latest`
|
||||
4. `opea/speecht5-gaudi:latest`
|
||||
5. `opea/tts:latest`
|
||||
6. `opea/wav2lip-gaudi:latest`
|
||||
7. `opea/animation:latest`
|
||||
8. `opea/avatarchatbot:latest`
|
||||
2. `opea/speecht5-gaudi:latest`
|
||||
3. `opea/wav2lip-gaudi:latest`
|
||||
4. `opea/animation:latest`
|
||||
5. `opea/avatarchatbot:latest`
|
||||
|
||||
## 🚀 Set the environment variables
|
||||
|
||||
@@ -71,24 +61,21 @@ Before starting the services with `docker compose`, you have to recheck the foll
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
export TGI_LLM_ENDPOINT=http://$host_ip:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export TTS_ENDPOINT=http://$host_ip:7055
|
||||
export WAV2LIP_ENDPOINT=http://$host_ip:7860
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_HOST_IP=${host_ip}
|
||||
export LLM_SERVER_PORT=3006
|
||||
export ANIMATION_SERVICE_HOST_IP=${host_ip}
|
||||
export ANIMATION_SERVICE_PORT=3008
|
||||
|
||||
export MEGA_SERVICE_PORT=8888
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export ANIMATION_SERVICE_PORT=3008
|
||||
```
|
||||
|
||||
- Gaudi2 HPU
|
||||
@@ -124,36 +111,18 @@ curl http://${host_ip}:7066/v1/asr \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# asr microservice
|
||||
curl http://${host_ip}:3001/v1/audio/transcriptions \
|
||||
-X POST \
|
||||
-d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tgi service
|
||||
curl http://${host_ip}:3006/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# llm microservice
|
||||
curl http://${host_ip}:3007/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# speecht5 service
|
||||
curl http://${host_ip}:7055/v1/tts \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tts microservice
|
||||
curl http://${host_ip}:3002/v1/audio/speech \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# wav2lip service
|
||||
cd ../../../..
|
||||
curl http://${host_ip}:7860/v1/wav2lip \
|
||||
|
||||
@@ -21,14 +21,6 @@ services:
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
restart: unless-stopped
|
||||
asr:
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
container_name: asr-service
|
||||
ports:
|
||||
- "3001:9099"
|
||||
ipc: host
|
||||
environment:
|
||||
ASR_ENDPOINT: ${ASR_ENDPOINT}
|
||||
speecht5-service:
|
||||
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
|
||||
container_name: speecht5-service
|
||||
@@ -45,14 +37,6 @@ services:
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
restart: unless-stopped
|
||||
tts:
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
container_name: tts-service
|
||||
ports:
|
||||
- "3002:9088"
|
||||
ipc: host
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
@@ -78,21 +62,6 @@ services:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 128 --max-total-tokens 256
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-gaudi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "3007:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
wav2lip-service:
|
||||
image: ${REGISTRY:-opea}/wav2lip-gaudi:${TAG:-latest}
|
||||
container_name: wav2lip-service
|
||||
@@ -143,9 +112,6 @@ services:
|
||||
image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
|
||||
container_name: avatarchatbot-gaudi-backend-server
|
||||
depends_on:
|
||||
- asr
|
||||
- llm
|
||||
- tts
|
||||
- animation
|
||||
ports:
|
||||
- "3009:8888"
|
||||
@@ -155,12 +121,12 @@ services:
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
|
||||
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
|
||||
- ASR_SERVICE_PORT=${ASR_SERVICE_PORT}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
|
||||
- TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP}
|
||||
- TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
|
||||
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
|
||||
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
|
||||
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
|
||||
- SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
|
||||
- SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
|
||||
- ANIMATION_SERVICE_HOST_IP=${ANIMATION_SERVICE_HOST_IP}
|
||||
- ANIMATION_SERVICE_PORT=${ANIMATION_SERVICE_PORT}
|
||||
ipc: host
|
||||
|
||||
@@ -14,19 +14,19 @@ services:
|
||||
whisper-gaudi:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/asr/whisper/dependency/Dockerfile.intel_hpu
|
||||
dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu
|
||||
extends: avatarchatbot
|
||||
image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
|
||||
whisper:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/asr/whisper/dependency/Dockerfile
|
||||
dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile
|
||||
extends: avatarchatbot
|
||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||
asr:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/asr/whisper/Dockerfile
|
||||
dockerfile: comps/asr/src/Dockerfile
|
||||
extends: avatarchatbot
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
llm-tgi:
|
||||
@@ -38,19 +38,19 @@ services:
|
||||
speecht5-gaudi:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/tts/speecht5/dependency/Dockerfile.intel_hpu
|
||||
dockerfile: comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu
|
||||
extends: avatarchatbot
|
||||
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
|
||||
speecht5:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/tts/speecht5/dependency/Dockerfile
|
||||
dockerfile: comps/tts/src/integrations/dependency/speecht5/Dockerfile
|
||||
extends: avatarchatbot
|
||||
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
|
||||
tts:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/tts/speecht5/Dockerfile
|
||||
dockerfile: comps/tts/src/Dockerfile
|
||||
extends: avatarchatbot
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
wav2lip-gaudi:
|
||||
|
||||
@@ -26,7 +26,7 @@ function build_docker_images() {
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="avatarchatbot whisper-gaudi asr llm-tgi speecht5-gaudi tts wav2lip-gaudi animation"
|
||||
service_list="avatarchatbot whisper-gaudi speecht5-gaudi wav2lip-gaudi animation"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
@@ -41,24 +41,21 @@ function start_services() {
|
||||
export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
export TGI_LLM_ENDPOINT=http://$host_ip:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export TTS_ENDPOINT=http://$host_ip:7055
|
||||
export WAV2LIP_ENDPOINT=http://$host_ip:7860
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_HOST_IP=${host_ip}
|
||||
export LLM_SERVER_PORT=3006
|
||||
export ANIMATION_SERVICE_HOST_IP=${host_ip}
|
||||
export ANIMATION_SERVICE_PORT=3008
|
||||
|
||||
export MEGA_SERVICE_PORT=8888
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export ANIMATION_SERVICE_PORT=3008
|
||||
|
||||
export DEVICE="hpu"
|
||||
export WAV2LIP_PORT=7860
|
||||
@@ -77,7 +74,7 @@ function start_services() {
|
||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
until [[ "$n" -ge 200 ]]; do
|
||||
docker logs tgi-gaudi-server > $LOG_PATH/tgi_service_start.log
|
||||
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
|
||||
break
|
||||
@@ -100,14 +97,12 @@ function validate_megaservice() {
|
||||
else
|
||||
echo "Result wrong, print docker logs."
|
||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||
docker logs asr-service > $LOG_PATH/asr-service.log
|
||||
docker logs speecht5-service > $LOG_PATH/speecht5-service.log
|
||||
docker logs tts-service > $LOG_PATH/tts-service.log
|
||||
docker logs tgi-gaudi-server > $LOG_PATH/tgi-gaudi-server.log
|
||||
docker logs llm-tgi-gaudi-server > $LOG_PATH/llm-tgi-gaudi-server.log
|
||||
docker logs wav2lip-service > $LOG_PATH/wav2lip-service.log
|
||||
docker logs animation-gaudi-server > $LOG_PATH/animation-gaudi-server.log
|
||||
echo "Exit test."
|
||||
|
||||
echo "Result wrong."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
@@ -26,10 +26,10 @@ function build_docker_images() {
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="avatarchatbot whisper asr llm-tgi speecht5 tts wav2lip animation"
|
||||
service_list="avatarchatbot whisper speecht5 wav2lip animation"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
@@ -41,24 +41,21 @@ function start_services() {
|
||||
export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
export TGI_LLM_ENDPOINT=http://$host_ip:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export TTS_ENDPOINT=http://$host_ip:7055
|
||||
export WAV2LIP_ENDPOINT=http://$host_ip:7860
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||
export SPEECHT5_SERVER_PORT=7055
|
||||
export LLM_SERVER_HOST_IP=${host_ip}
|
||||
export LLM_SERVER_PORT=3006
|
||||
export ANIMATION_SERVICE_HOST_IP=${host_ip}
|
||||
export ANIMATION_SERVICE_PORT=3008
|
||||
|
||||
export MEGA_SERVICE_PORT=8888
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export ANIMATION_SERVICE_PORT=3008
|
||||
|
||||
export DEVICE="cpu"
|
||||
export WAV2LIP_PORT=7860
|
||||
@@ -97,11 +94,8 @@ function validate_megaservice() {
|
||||
echo "Result correct."
|
||||
else
|
||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||
docker logs asr-service > $LOG_PATH/asr-service.log
|
||||
docker logs speecht5-service > $LOG_PATH/speecht5-service.log
|
||||
docker logs tts-service > $LOG_PATH/tts-service.log
|
||||
docker logs tgi-service > $LOG_PATH/tgi-service.log
|
||||
docker logs llm-tgi-server > $LOG_PATH/llm-tgi-server.log
|
||||
docker logs wav2lip-service > $LOG_PATH/wav2lip-service.log
|
||||
docker logs animation-server > $LOG_PATH/animation-server.log
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ cd GenAIComps
|
||||
The Whisper Service converts audio files to text. Follow these steps to build and run the service:
|
||||
|
||||
```bash
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build MegaService Docker Image
|
||||
|
||||
@@ -18,7 +18,7 @@ cd GenAIComps
|
||||
The Whisper Service converts audio files to text. Follow these steps to build and run the service:
|
||||
|
||||
```bash
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
|
||||
```
|
||||
|
||||
### 2. Build MegaService Docker Image
|
||||
|
||||
@@ -38,7 +38,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
context: GenAIComps
|
||||
dockerfile: comps/asr/whisper/dependency/Dockerfile
|
||||
dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile
|
||||
extends: docsum
|
||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||
llm-docsum-tgi:
|
||||
|
||||
@@ -13,16 +13,6 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
asr:
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
container_name: asr-service
|
||||
ports:
|
||||
- "${ASR_SERVICE_PORT}:9099"
|
||||
ipc: host
|
||||
environment:
|
||||
ASR_ENDPOINT: ${ASR_ENDPOINT}
|
||||
ASR_SERVICE_PORT: ${ASR_SERVICE_PORT}
|
||||
ASR_SERVICE_ENDPOINT: ${ASR_SERVICE_ENDPOINT}
|
||||
redis-vector-db:
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
container_name: redis-vector-db
|
||||
@@ -153,8 +143,8 @@ services:
|
||||
MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE}
|
||||
MM_RETRIEVER_SERVICE_HOST_IP: ${MM_RETRIEVER_SERVICE_HOST_IP}
|
||||
LVM_SERVICE_HOST_IP: ${LVM_SERVICE_HOST_IP}
|
||||
ASR_SERVICE_PORT: ${ASR_SERVICE_PORT}
|
||||
ASR_SERVICE_ENDPOINT: ${ASR_SERVICE_ENDPOINT}
|
||||
WHISPER_SERVER_PORT: ${WHISPER_SERVER_PORT}
|
||||
WHISPER_SERVER_ENDPOINT: ${WHISPER_SERVER_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
multimodalqna-ui:
|
||||
|
||||
@@ -78,9 +78,8 @@ export https_proxy=${your_http_proxy}
|
||||
export EMBEDDER_PORT=6006
|
||||
export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMBEDDER_PORT/v1/encode"
|
||||
export MM_EMBEDDING_PORT_MICROSERVICE=6000
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export ASR_SERVICE_ENDPOINT="http://${host_ip}:${ASR_SERVICE_PORT}/v1/audio/transcriptions"
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_SERVER_PORT}/v1/asr"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export REDIS_HOST=${host_ip}
|
||||
export INDEX_NAME="mm-rag-redis"
|
||||
@@ -153,13 +152,7 @@ docker build --no-cache -t opea/dataprep-multimodal-redis:latest --build-arg htt
|
||||
Build whisper server image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
|
||||
```
|
||||
|
||||
Build asr image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
|
||||
docker build --no-cache -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
|
||||
```
|
||||
|
||||
### 6. Build MegaService Docker Image
|
||||
@@ -190,12 +183,11 @@ Then run the command `docker images`, you will have the following 11 Docker Imag
|
||||
3. `opea/lvm-llava:latest`
|
||||
4. `opea/retriever-multimodal-redis:latest`
|
||||
5. `opea/whisper:latest`
|
||||
6. `opea/asr:latest`
|
||||
7. `opea/redis-vector-db`
|
||||
8. `opea/embedding-multimodal:latest`
|
||||
9. `opea/embedding-multimodal-bridgetower:latest`
|
||||
10. `opea/multimodalqna:latest`
|
||||
11. `opea/multimodalqna-ui:latest`
|
||||
6. `opea/redis-vector-db`
|
||||
7. `opea/embedding-multimodal:latest`
|
||||
8. `opea/embedding-multimodal-bridgetower:latest`
|
||||
9. `opea/multimodalqna:latest`
|
||||
10. `opea/multimodalqna-ui:latest`
|
||||
|
||||
## 🚀 Start Microservices
|
||||
|
||||
@@ -264,10 +256,10 @@ curl http://${host_ip}:7000/v1/multimodal_retrieval \
|
||||
4. asr
|
||||
|
||||
```bash
|
||||
curl ${ASR_SERVICE_ENDPOINT} \
|
||||
curl ${WHISPER_SERVER_ENDPOINT} \
|
||||
-X POST \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"byte_str" : "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}'
|
||||
-d '{"audio" : "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}'
|
||||
```
|
||||
|
||||
5. lvm-llava
|
||||
|
||||
@@ -13,16 +13,6 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
asr:
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
container_name: asr-service
|
||||
ports:
|
||||
- "${ASR_SERVICE_PORT}:9099"
|
||||
ipc: host
|
||||
environment:
|
||||
ASR_ENDPOINT: ${ASR_ENDPOINT}
|
||||
ASR_SERVICE_PORT: ${ASR_SERVICE_PORT}
|
||||
ASR_SERVICE_ENDPOINT: ${ASR_SERVICE_ENDPOINT}
|
||||
redis-vector-db:
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
container_name: redis-vector-db
|
||||
@@ -124,7 +114,6 @@ services:
|
||||
- embedding-multimodal
|
||||
- retriever-redis
|
||||
- lvm-llava-svc
|
||||
- asr
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
@@ -136,8 +125,8 @@ services:
|
||||
MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE}
|
||||
MM_RETRIEVER_SERVICE_HOST_IP: ${MM_RETRIEVER_SERVICE_HOST_IP}
|
||||
LVM_SERVICE_HOST_IP: ${LVM_SERVICE_HOST_IP}
|
||||
ASR_SERVICE_PORT: ${ASR_SERVICE_PORT}
|
||||
ASR_SERVICE_ENDPOINT: ${ASR_SERVICE_ENDPOINT}
|
||||
WHISPER_SERVER_PORT: ${WHISPER_SERVER_PORT}
|
||||
WHISPER_SERVER_ENDPOINT: ${WHISPER_SERVER_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
multimodalqna-ui:
|
||||
|
||||
@@ -12,9 +12,8 @@ export https_proxy=${your_http_proxy}
|
||||
export EMBEDDER_PORT=6006
|
||||
export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMBEDDER_PORT/v1/encode"
|
||||
export MM_EMBEDDING_PORT_MICROSERVICE=6000
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export ASR_SERVICE_ENDPOINT="http://${host_ip}:${ASR_SERVICE_PORT}/v1/audio/transcriptions"
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_SERVER_PORT}/v1/asr"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export REDIS_HOST=${host_ip}
|
||||
export INDEX_NAME="mm-rag-redis"
|
||||
|
||||
@@ -38,9 +38,8 @@ export LVM_MODEL_ID="llava-hf/llava-v1.6-vicuna-13b-hf"
|
||||
export WHISPER_MODEL="base"
|
||||
export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export ASR_SERVICE_ENDPOINT="http://${host_ip}:${ASR_SERVICE_PORT}/v1/audio/transcriptions"
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_SERVER_PORT}/v1/asr"v1/audio/transcriptions"
|
||||
export LVM_SERVICE_HOST_IP=${host_ip}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
|
||||
@@ -104,13 +103,7 @@ docker build --no-cache -t opea/dataprep-multimodal-redis:latest --build-arg htt
|
||||
Build whisper server image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
|
||||
```
|
||||
|
||||
Build asr image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
|
||||
docker build --no-cache -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
|
||||
```
|
||||
|
||||
### 6. Build MegaService Docker Image
|
||||
@@ -139,12 +132,11 @@ Then run the command `docker images`, you will have the following 11 Docker Imag
|
||||
3. `ghcr.io/huggingface/tgi-gaudi:2.0.6`
|
||||
4. `opea/retriever-multimodal-redis:latest`
|
||||
5. `opea/whisper:latest`
|
||||
6. `opea/asr:latest`
|
||||
7. `opea/redis-vector-db`
|
||||
8. `opea/embedding-multimodal:latest`
|
||||
9. `opea/embedding-multimodal-bridgetower:latest`
|
||||
10. `opea/multimodalqna:latest`
|
||||
11. `opea/multimodalqna-ui:latest`
|
||||
6. `opea/redis-vector-db`
|
||||
7. `opea/embedding-multimodal:latest`
|
||||
8. `opea/embedding-multimodal-bridgetower:latest`
|
||||
9. `opea/multimodalqna:latest`
|
||||
10. `opea/multimodalqna-ui:latest`
|
||||
|
||||
## 🚀 Start Microservices
|
||||
|
||||
@@ -213,10 +205,10 @@ curl http://${host_ip}:7000/v1/multimodal_retrieval \
|
||||
4. asr
|
||||
|
||||
```bash
|
||||
curl ${ASR_SERVICE_ENDPOINT} \
|
||||
curl ${WHISPER_SERVER_ENDPOINT} \
|
||||
-X POST \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"byte_str" : "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}'
|
||||
-d '{"audio" : "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}'
|
||||
```
|
||||
|
||||
5. TGI LLaVA Gaudi Server
|
||||
|
||||
@@ -19,16 +19,6 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
asr:
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
container_name: asr-service
|
||||
ports:
|
||||
- "${ASR_SERVICE_PORT}:9099"
|
||||
ipc: host
|
||||
environment:
|
||||
ASR_ENDPOINT: ${ASR_ENDPOINT}
|
||||
ASR_SERVICE_PORT: ${ASR_SERVICE_PORT}
|
||||
ASR_SERVICE_ENDPOINT: ${ASR_SERVICE_ENDPOINT}
|
||||
dataprep-multimodal-redis:
|
||||
image: ${REGISTRY:-opea}/dataprep-multimodal-redis:${TAG:-latest}
|
||||
container_name: dataprep-multimodal-redis
|
||||
@@ -141,7 +131,6 @@ services:
|
||||
- embedding-multimodal
|
||||
- retriever-redis
|
||||
- lvm-tgi
|
||||
- asr
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
@@ -153,8 +142,8 @@ services:
|
||||
MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE}
|
||||
MM_RETRIEVER_SERVICE_HOST_IP: ${MM_RETRIEVER_SERVICE_HOST_IP}
|
||||
LVM_SERVICE_HOST_IP: ${LVM_SERVICE_HOST_IP}
|
||||
ASR_SERVICE_PORT: ${ASR_SERVICE_PORT}
|
||||
ASR_SERVICE_ENDPOINT: ${ASR_SERVICE_ENDPOINT}
|
||||
WHISPER_SERVER_PORT: ${WHISPER_SERVER_PORT}
|
||||
WHISPER_SERVER_ENDPOINT: ${WHISPER_SERVER_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
multimodalqna-ui:
|
||||
|
||||
@@ -12,9 +12,8 @@ export https_proxy=${your_http_proxy}
|
||||
export EMBEDDER_PORT=6006
|
||||
export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMBEDDER_PORT/v1/encode"
|
||||
export MM_EMBEDDING_PORT_MICROSERVICE=6000
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export ASR_SERVICE_ENDPOINT="http://${host_ip}:${ASR_SERVICE_PORT}/v1/audio/transcriptions"
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_SERVER_PORT}/v1/asr"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export REDIS_HOST=${host_ip}
|
||||
export INDEX_NAME="mm-rag-redis"
|
||||
|
||||
@@ -62,12 +62,12 @@ services:
|
||||
whisper:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/asr/whisper/dependency/Dockerfile
|
||||
dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile
|
||||
extends: multimodalqna
|
||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||
asr:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/asr/whisper/Dockerfile
|
||||
dockerfile: comps/asr/src/Dockerfile
|
||||
extends: multimodalqna
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
|
||||
@@ -27,11 +27,10 @@ MM_RETRIEVER_SERVICE_HOST_IP = os.getenv("MM_RETRIEVER_SERVICE_HOST_IP", "0.0.0.
|
||||
MM_RETRIEVER_SERVICE_PORT = int(os.getenv("MM_RETRIEVER_SERVICE_PORT", 7000))
|
||||
LVM_SERVICE_HOST_IP = os.getenv("LVM_SERVICE_HOST_IP", "0.0.0.0")
|
||||
LVM_SERVICE_PORT = int(os.getenv("LVM_SERVICE_PORT", 9399))
|
||||
WHISPER_SERVER_ENDPOINT = os.getenv("WHISPER_SERVER_ENDPOINT", "http://0.0.0.0:7066/v1/asr")
|
||||
|
||||
|
||||
class MultimodalQnAService:
|
||||
asr_port = int(os.getenv("ASR_SERVICE_PORT", 3001))
|
||||
asr_endpoint = os.getenv("ASR_SERVICE_ENDPOINT", "http://0.0.0.0:{}/v1/audio/transcriptions".format(asr_port))
|
||||
|
||||
def __init__(self, host="0.0.0.0", port=8000):
|
||||
self.host = host
|
||||
@@ -189,11 +188,11 @@ class MultimodalQnAService:
|
||||
def convert_audio_to_text(self, audio):
|
||||
# translate audio to text by passing in base64 encoded audio to ASR
|
||||
if isinstance(audio, dict):
|
||||
input_dict = {"byte_str": audio["audio"][0]}
|
||||
input_dict = {"audio": audio["audio"][0]}
|
||||
else:
|
||||
input_dict = {"byte_str": audio[0]}
|
||||
input_dict = {"audio": audio[0]}
|
||||
|
||||
response = requests.post(self.asr_endpoint, data=json.dumps(input_dict))
|
||||
response = requests.post(WHISPER_SERVER_ENDPOINT, data=json.dumps(input_dict))
|
||||
|
||||
if response.status_code != 200:
|
||||
return JSONResponse(
|
||||
@@ -201,7 +200,7 @@ class MultimodalQnAService:
|
||||
)
|
||||
|
||||
response = response.json()
|
||||
return response["query"]
|
||||
return response["asr_result"]
|
||||
|
||||
async def handle_request(self, request: Request):
|
||||
data = await request.json()
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding-multimodal retriever-redis lvm-tgi dataprep-multimodal-redis whisper asr"
|
||||
service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding-multimodal retriever-redis lvm-tgi dataprep-multimodal-redis whisper"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
@@ -35,9 +35,8 @@ function setup_env() {
|
||||
export EMBEDDER_PORT=6006
|
||||
export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMBEDDER_PORT/v1/encode"
|
||||
export MM_EMBEDDING_PORT_MICROSERVICE=6000
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export ASR_SERVICE_ENDPOINT="http://${host_ip}:${ASR_SERVICE_PORT}/v1/audio/transcriptions"
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_SERVER_PORT}/v1/asr"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export REDIS_HOST=${host_ip}
|
||||
export INDEX_NAME="mm-rag-redis"
|
||||
|
||||
@@ -23,7 +23,7 @@ function build_docker_images() {
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding-multimodal retriever-redis lvm-tgi lvm-llava-svc dataprep-multimodal-redis whisper asr"
|
||||
service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding-multimodal retriever-redis lvm-tgi lvm-llava-svc dataprep-multimodal-redis whisper"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker images && sleep 1m
|
||||
@@ -41,9 +41,8 @@ function setup_env() {
|
||||
export EMBEDDER_PORT=6006
|
||||
export MMEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:$EMBEDDER_PORT/v1/encode"
|
||||
export MM_EMBEDDING_PORT_MICROSERVICE=6000
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export ASR_SERVICE_ENDPOINT="http://${host_ip}:${ASR_SERVICE_PORT}/v1/audio/transcriptions"
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export WHISPER_SERVER_ENDPOINT="http://${HOST_IP}:${WHISPER_SERVER_PORT}/v1/asr"
|
||||
export REDIS_URL="redis://${HOST_IP}:6379"
|
||||
export REDIS_HOST=${HOST_IP}
|
||||
export INDEX_NAME="mm-rag-redis"
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding-multimodal retriever-redis lvm-llava lvm-llava-svc dataprep-multimodal-redis whisper asr"
|
||||
service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding-multimodal retriever-redis lvm-llava lvm-llava-svc dataprep-multimodal-redis whisper"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker images && sleep 1m
|
||||
@@ -33,9 +33,8 @@ function setup_env() {
|
||||
export EMBEDDER_PORT=6006
|
||||
export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMBEDDER_PORT/v1/encode"
|
||||
export MM_EMBEDDING_PORT_MICROSERVICE=6000
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export ASR_SERVICE_ENDPOINT="http://${host_ip}:${ASR_SERVICE_PORT}/v1/audio/transcriptions"
|
||||
export WHISPER_SERVER_PORT=7066
|
||||
export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_SERVER_PORT}/v1/asr"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export REDIS_HOST=${host_ip}
|
||||
export INDEX_NAME="mm-rag-redis"
|
||||
|
||||
@@ -41,9 +41,9 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
|
||||
## Microservice images
|
||||
|
||||
| Microservice Images | Dockerfile | Description |
|
||||
| ------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| ------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| [opea/agent-langchain](https://hub.docker.com/r/opea/comps-agent-langchain) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/agent/langchain/Dockerfile) | The docker image exposed the OPEA agent microservice for GenAI application use |
|
||||
| [opea/asr](https://hub.docker.com/r/opea/asr) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/whisper/Dockerfile) | The docker image exposed the OPEA Audio-Speech-Recognition microservice for GenAI application use |
|
||||
| [opea/asr](https://hub.docker.com/r/opea/asr) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/Dockerfile) | The docker image exposed the OPEA Audio-Speech-Recognition microservice for GenAI application use |
|
||||
| [opea/chathistory-mongo-server](https://hub.docker.com/r/opea/chathistory-mongo-server) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/chathistory/mongo/Dockerfile) | The docker image exposes OPEA Chat History microservice which based on MongoDB database, designed to allow user to store, retrieve and manage chat conversations |
|
||||
| [opea/dataprep-milvus](https://hub.docker.com/r/opea/dataprep-milvus) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/milvus/langchain/Dockerfile) | The docker image exposed the OPEA dataprep microservice based on milvus vectordb for GenAI application use |
|
||||
| [opea/dataprep-multimodal-vdms](https://hub.docker.com/r/opea/dataprep-multimodal-vdms) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/vdms/multimodal_langchain/Dockerfile) | This docker image exposes an OPEA dataprep microservice based on a multi-modal VDMS for use by GenAI applications. |
|
||||
@@ -98,15 +98,15 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
|
||||
| [opea/retriever-redis](https://hub.docker.com/r/opea/retriever-redis) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/redis/langchain/Dockerfile) | The docker image exposed the OPEA retrieval microservice based on redis vectordb for GenAI application use |
|
||||
| [opea/retriever-redis-llamaindex](https://hub.docker.com/r/opea/retriever-redis-llamaindex) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/redis/llama_index/Dockerfile) | The docker image exposed the OPEA retriever service based on LlamaIndex for GenAI application use |
|
||||
| [opea/retriever-vdms](https://hub.docker.com/r/opea/retriever-vdms) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/vdms/langchain/Dockerfile) | The docker image exposed the OPEA retriever service based on Visual Data Management System for GenAI application use |
|
||||
| [opea/speecht5](https://hub.docker.com/r/opea/speecht5) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/speecht5/dependency/Dockerfile) | The docker image exposed the OPEA SpeechT5 service for GenAI application use |
|
||||
| [opea/speecht5-gaudi](https://hub.docker.com/r/opea/speecht5-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/speecht5/dependency/Dockerfile.intel_hpu) | The docker image exposed the OPEA SpeechT5 service on Gaudi2 for GenAI application use |
|
||||
| [opea/speecht5](https://hub.docker.com/r/opea/speecht5) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/integrations/dependency/speecht5/Dockerfile) | The docker image exposed the OPEA SpeechT5 service for GenAI application use |
|
||||
| [opea/speecht5-gaudi](https://hub.docker.com/r/opea/speecht5-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu) | The docker image exposed the OPEA SpeechT5 service on Gaudi2 for GenAI application use |
|
||||
| [opea/tei-gaudi](https://hub.docker.com/r/opea/tei-gaudi/tags) | [Link](https://github.com/huggingface/tei-gaudi/blob/habana-main/Dockerfile-hpu) | The docker image powered by HuggingFace Text Embedding Inference (TEI) on Gaudi2 for deploying and serving Embedding Models |
|
||||
| [opea/vectorstore-pathway](https://hub.docker.com/r/opea/vectorstore-pathway) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/vectorstores/pathway/Dockerfile) | The docker image exposed the OPEA Vectorstores microservice with Pathway for GenAI application use |
|
||||
| [opea/video-llama-lvm-server](https://hub.docker.com/r/opea/video-llama-lvm-server) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/video-llama/dependency/Dockerfile) | The docker image exposed the OPEA microservice running Video-Llama as a large visual model (LVM) server for GenAI application use |
|
||||
| [opea/tts](https://hub.docker.com/r/opea/tts) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/speecht5/Dockerfile) | The docker image exposed the OPEA Text-To-Speech microservice for GenAI application use |
|
||||
| [opea/tts](https://hub.docker.com/r/opea/tts) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/Dockerfile) | The docker image exposed the OPEA Text-To-Speech microservice for GenAI application use |
|
||||
| [opea/vllm](https://hub.docker.com/r/opea/vllm) | [Link](https://github.com/vllm-project/vllm/blob/main/Dockerfile.cpu) | The docker image powered by vllm-project for deploying and serving vllm Models |
|
||||
| [opea/vllm-gaudi]() | [Link](https://github.com/HabanaAI/vllm-fork/blob/habana_main/Dockerfile.hpu) | The docker image powered by vllm-fork for deploying and serving vllm-gaudi Models |
|
||||
| [opea/vllm-openvino](https://hub.docker.com/r/opea/vllm-openvino) | [Link](https://github.com/vllm-project/vllm/blob/main/Dockerfile.openvino) | The docker image powered by vllm-project for deploying and serving vllm Models of the Openvino Framework |
|
||||
| [opea/web-retriever-chroma](https://hub.docker.com/r/opea/web-retriever-chroma) | [Link](https://github.com/opea-project/GenAIComps/tree/main/comps/web_retrievers/chroma/langchain/Dockerfile) | The docker image exposed the OPEA retrieval microservice based on chroma vectordb for GenAI application use |
|
||||
| [opea/whisper](https://hub.docker.com/r/opea/whisper) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/whisper/dependency/Dockerfile) | The docker image exposed the OPEA Whisper service for GenAI application use |
|
||||
| [opea/whisper-gaudi](https://hub.docker.com/r/opea/whisper-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/whisper/dependency/Dockerfile.intel_hpu) | The docker image exposed the OPEA Whisper service on Gaudi2 for GenAI application use |
|
||||
| [opea/whisper](https://hub.docker.com/r/opea/whisper) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/integrations/dependency/whisper/Dockerfile) | The docker image exposed the OPEA Whisper service for GenAI application use |
|
||||
| [opea/whisper-gaudi](https://hub.docker.com/r/opea/whisper-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu) | The docker image exposed the OPEA Whisper service on Gaudi2 for GenAI application use |
|
||||
|
||||
Reference in New Issue
Block a user