Refactor asr/tts components (#1083)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Sihan Chen
2024-12-31 12:03:10 +08:00
committed by GitHub
parent 1040875055
commit a19c222636
46 changed files with 759 additions and 268 deletions

View File

@@ -0,0 +1,35 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
whisper-service:
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
container_name: whisper-service
ports:
- "7066:7066"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:7066/health"]
interval: 10s
timeout: 6s
retries: 18
asr:
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
container_name: asr-service
ports:
- "9099:9099"
ipc: host
environment:
ASR_ENDPOINT: ${ASR_ENDPOINT}
dependes_on:
speecht5-service:
condition: service_healthy
networks:
default:
driver: bridge

View File

@@ -0,0 +1,40 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
whisper-service:
image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
container_name: whisper-service
ports:
- "7066:7066"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
runtime: habana
cap_add:
- SYS_NICE
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:7066/health"]
interval: 10s
timeout: 6s
retries: 18
asr:
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
container_name: asr-service
ports:
- "3001:9099"
ipc: host
environment:
ASR_ENDPOINT: ${ASR_ENDPOINT}
dependes_on:
speecht5-service:
condition: service_healthy
networks:
default:
driver: bridge

View File

@@ -16,13 +16,13 @@ COPY comps /home/user/comps
RUN pip install --no-cache-dir --upgrade pip setuptools && \
if [ "${ARCH}" = "cpu" ]; then \
pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu ; \
pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/asr/whisper/requirements.txt ; \
pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/asr/src/requirements.txt ; \
else \
pip install --no-cache-dir -r /home/user/comps/asr/whisper/requirements.txt ; \
pip install --no-cache-dir -r /home/user/comps/asr/src/requirements.txt ; \
fi
ENV PYTHONPATH=$PYTHONPATH:/home/user
WORKDIR /home/user/comps/asr/whisper
WORKDIR /home/user/comps/asr/src
ENTRYPOINT ["python", "asr.py"]
ENTRYPOINT ["python", "opea_asr_microservice.py"]

View File

@@ -17,7 +17,7 @@ pip install -r requirements.txt
- Xeon CPU
```bash
cd dependency/
cd integrations/dependency/whisper
nohup python whisper_server.py --device=cpu &
python check_whisper_server.py
```
@@ -51,15 +51,15 @@ curl http://localhost:7066/v1/audio/transcriptions \
### 1.3 Start ASR Service/Test
```bash
cd ../
python asr.py
cd ../../..
python opea_asr_microservice.py
python check_asr_server.py
```
While the Whisper service is running, you can start the ASR service. If the ASR service is running properly, you should see the output similar to the following:
```bash
{'id': '0e686efd33175ce0ebcf7e0ed7431673', 'text': 'who is pat gelsinger'}
{'text': 'who is pat gelsinger'}
```
## 🚀2. Start Microservice with Docker (Option 2)
@@ -74,20 +74,20 @@ Alternatively, you can also start the ASR microservice with Docker.
```bash
cd ../..
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/Dockerfile .
```
- Gaudi2 HPU
```bash
cd ../..
docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile.intel_hpu .
docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/Dockerfile.intel_hpu .
```
#### 2.1.2 ASR Service Image
```bash
docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile .
```
### 2.2 Start Whisper and ASR Service
@@ -97,13 +97,13 @@ docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg
- Xeon
```bash
docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper:latest
docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy opea/whisper:latest
```
- Gaudi2 HPU
```bash
docker run -p 7066:7066 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper-gaudi:latest
docker run -p 7066:7066 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy opea/whisper-gaudi:latest
```
#### 2.2.2 Start ASR service
@@ -111,7 +111,7 @@ docker run -p 7066:7066 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_M
```bash
ip_address=$(hostname -I | awk '{print $1}')
docker run -d -p 9099:9099 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ASR_ENDPOINT=http://$ip_address:7066 opea/asr:latest
docker run -d -p 9099:9099 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e ASR_ENDPOINT=http://$ip_address:7066 opea/asr:latest
```
#### 2.2.3 Test
@@ -120,8 +120,11 @@ docker run -d -p 9099:9099 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$
# Use curl or python
# curl
http_proxy="" curl http://localhost:9099/v1/audio/transcriptions -XPOST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' -H 'Content-Type: application/json'
wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav
curl http://localhost:9099/v1/audio/transcriptions \
-H "Content-Type: multipart/form-data" \
-F file="@./sample.wav" \
-F model="openai/whisper-small"
# python
python check_asr_server.py

View File

@@ -20,11 +20,24 @@ urllib.request.urlretrieve(
file_name,
)
with open(file_name, "rb") as f:
test_audio_base64_str = base64.b64encode(f.read()).decode("utf-8")
os.remove(file_name)
endpoint = "http://localhost:9099/v1/audio/transcriptions"
inputs = {"byte_str": test_audio_base64_str}
response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None})
print(response.json())
headers = {"accept": "application/json"}
# Prepare the data and files
data = {
"model": "openai/whisper-small",
"language": "english",
}
try:
with open(file_name, "rb") as audio_file:
files = {"file": (file_name, audio_file)}
response = requests.post(endpoint, headers=headers, data=data, files=files)
if response.status_code != 200:
print(f"Failure with {response.reason}!")
else:
print(response.json())
except Exception as e:
print(f"Failure with {e}!")
os.remove(file_name)

View File

@@ -20,16 +20,16 @@ COPY --chown=user:user comps /home/user/comps
USER user
RUN pip install --no-cache-dir --upgrade pip setuptools && \
pip install --no-cache-dir -r /home/user/comps/asr/whisper/requirements.txt && \
pip install --no-cache-dir -r /home/user/comps/asr/src/requirements.txt && \
if [ "${ARCH}" = "cpu" ]; then \
pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu ; \
pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/asr/whisper/requirements.txt ; \
pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/asr/src/requirements.txt ; \
else \
pip install --no-cache-dir -r /home/user/comps/asr/whisper/requirements.txt ; \
pip install --no-cache-dir -r /home/user/comps/asr/src/requirements.txt ; \
fi
ENV PYTHONPATH=$PYTHONPATH:/home/user
WORKDIR /home/user/comps/asr/whisper/dependency
WORKDIR /home/user/comps/asr/src/integrations/dependency/whisper
ENTRYPOINT ["python", "whisper_server.py", "--device", "cpu"]

View File

@@ -23,11 +23,11 @@ USER user
# Install requirements and optimum habana
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/comps/asr/whisper/requirements.txt && \
pip install --no-cache-dir -r /home/user/comps/asr/src/requirements.txt && \
pip install --no-cache-dir optimum[habana]
ENV PYTHONPATH=$PYTHONPATH:/home/users
WORKDIR /home/user/comps/asr/whisper/dependency
WORKDIR /home/user/comps/asr/src/integrations/dependency/whisper
ENTRYPOINT ["python", "whisper_server.py", "--device", "hpu"]

View File

@@ -5,7 +5,7 @@ import argparse
import base64
import os
import uuid
from typing import List, Optional, Union
from typing import List
import uvicorn
from fastapi import FastAPI, File, Form, Request, UploadFile
@@ -28,7 +28,7 @@ app.add_middleware(
)
@app.get("/v1/health")
@app.get("/health")
async def health() -> Response:
"""Health check."""
return Response(status_code=200)

View File

@@ -0,0 +1,76 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
import time
from typing import List
import requests
from fastapi import File, Form, UploadFile
from comps import CustomLogger, OpeaComponent, ServiceType
from comps.cores.proto.api_protocol import AudioTranscriptionResponse
logger = CustomLogger("opea_whisper")
logflag = os.getenv("LOGFLAG", False)
class OpeaWhisperAsr(OpeaComponent):
"""A specialized ASR (Automatic Speech Recognition) component derived from OpeaComponent for Whisper ASR services.
Attributes:
model_name (str): The name of the ASR model used.
"""
def __init__(self, name: str, description: str, config: dict = None):
super().__init__(name, ServiceType.ASR.name.lower(), description, config)
self.base_url = os.getenv("ASR_ENDPOINT", "http://localhost:7066")
async def invoke(
self,
file: UploadFile = File(...), # Handling the uploaded file directly
model: str = Form("openai/whisper-small"),
language: str = Form("english"),
prompt: str = Form(None),
response_format: str = Form("json"),
temperature: float = Form(0),
timestamp_granularities: List[str] = Form(None),
) -> AudioTranscriptionResponse:
"""Involve the ASR service to generate transcription for the provided input."""
# Read the uploaded file
file_contents = await file.read()
# Prepare the files and data for requests.post
files = {
"file": (file.filename, file_contents, file.content_type),
}
data = {
"model": model,
"language": language,
"prompt": prompt,
"response_format": response_format,
"temperature": temperature,
"timestamp_granularities": timestamp_granularities,
}
# Send the file and model to the server
response = requests.post(f"{self.base_url}/v1/audio/transcriptions", files=files, data=data)
res = response.json()["text"]
return AudioTranscriptionResponse(text=res)
def check_health(self) -> bool:
"""Checks the health of the embedding service.
Returns:
bool: True if the service is reachable and healthy, False otherwise.
"""
try:
response = requests.get(f"{self.base_url}/health")
if response.status_code == 200:
return True
else:
return False
except Exception as e:
# Handle connection errors, timeouts, etc.
logger.error(f"Health check failed: {e}")
return False

View File

@@ -0,0 +1,94 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
import time
from typing import List
from fastapi import File, Form, UploadFile
from integrations.opea_whisper import OpeaWhisperAsr
from comps import (
Base64ByteStrDoc,
CustomLogger,
LLMParamsDoc,
OpeaComponentController,
ServiceType,
opea_microservices,
register_microservice,
register_statistics,
statistics_dict,
)
from comps.cores.proto.api_protocol import AudioTranscriptionResponse
logger = CustomLogger("opea_asr_microservice")
logflag = os.getenv("LOGFLAG", False)
# Initialize OpeaComponentController
controller = OpeaComponentController()
# Register components
try:
# Instantiate ASR components
opea_whisper = OpeaWhisperAsr(
name="OpeaWhisperAsr",
description="OPEA Whisper ASR Service",
)
# Register components with the controller
controller.register(opea_whisper)
# Discover and activate a healthy component
controller.discover_and_activate()
except Exception as e:
logger.error(f"Failed to initialize components: {e}")
@register_microservice(
name="opea_service@asr",
service_type=ServiceType.ASR,
endpoint="/v1/audio/transcriptions",
host="0.0.0.0",
port=9099,
input_datatype=Base64ByteStrDoc,
output_datatype=LLMParamsDoc,
)
@register_statistics(names=["opea_service@asr"])
async def audio_to_text(
file: UploadFile = File(...), # Handling the uploaded file directly
model: str = Form("openai/whisper-small"),
language: str = Form("english"),
prompt: str = Form(None),
response_format: str = Form("json"),
temperature: float = Form(0),
timestamp_granularities: List[str] = Form(None),
) -> AudioTranscriptionResponse:
start = time.time()
if logflag:
logger.info("ASR file uploaded.")
try:
# Use the controller to invoke the active component
asr_response = await controller.invoke(
file=file,
model=model,
language=language,
prompt=prompt,
response_format=response_format,
temperature=temperature,
timestamp_granularities=timestamp_granularities,
)
if logflag:
logger.info(asr_response)
statistics_dict["opea_service@asr"].append_latency(time.time() - start, None)
return asr_response
except Exception as e:
logger.error(f"Error during asr invocation: {e}")
raise
if __name__ == "__main__":
logger.info("OPEA ASR Microservice is starting....")
opea_microservices["opea_service@asr"].start()

View File

@@ -1,53 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import json
import os
import time
import requests
from comps import CustomLogger
logger = CustomLogger("asr")
logflag = os.getenv("LOGFLAG", False)
from comps import (
Base64ByteStrDoc,
LLMParamsDoc,
ServiceType,
opea_microservices,
register_microservice,
register_statistics,
statistics_dict,
)
@register_microservice(
name="opea_service@asr",
service_type=ServiceType.ASR,
endpoint="/v1/audio/transcriptions",
host="0.0.0.0",
port=9099,
input_datatype=Base64ByteStrDoc,
output_datatype=LLMParamsDoc,
)
@register_statistics(names=["opea_service@asr"])
async def audio_to_text(audio: Base64ByteStrDoc):
start = time.time()
byte_str = audio.byte_str
inputs = {"audio": byte_str}
if logflag:
logger.info(inputs)
response = requests.post(url=f"{asr_endpoint}/v1/asr", data=json.dumps(inputs), proxies={"http": None})
if logflag:
logger.info(response)
statistics_dict["opea_service@asr"].append_latency(time.time() - start, None)
return LLMParamsDoc(query=response.json()["asr_result"])
if __name__ == "__main__":
asr_endpoint = os.getenv("ASR_ENDPOINT", "http://localhost:7066")
logger.info("[asr - router] ASR initialized.")
opea_microservices["opea_service@asr"].start()

View File

@@ -279,6 +279,7 @@ class DocSumChatCompletionRequest(BaseModel):
class AudioChatCompletionRequest(BaseModel):
audio: str
voice: str = "default"
messages: Optional[
Union[
str,

View File

@@ -0,0 +1,35 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
gpt-sovits-service:
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
container_name: gpt-sovits-service
ports:
- "9880:9880"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9880/health"]
interval: 10s
timeout: 6s
retries: 18
tts:
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
container_name: tts-service
ports:
- "3002:9088"
ipc: host
environment:
TTS_ENDPOINT: ${TTS_ENDPOINT}
dependes_on:
speecht5-service:
condition: service_healthy
networks:
default:
driver: bridge

View File

@@ -0,0 +1,35 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
speecht5-service:
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
container_name: speecht5-service
ports:
- "7055:7055"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:7055/health"]
interval: 10s
timeout: 6s
retries: 18
tts:
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
container_name: tts-service
ports:
- "3002:9088"
ipc: host
environment:
TTS_ENDPOINT: ${TTS_ENDPOINT}
dependes_on:
speecht5-service:
condition: service_healthy
networks:
default:
driver: bridge

View File

@@ -0,0 +1,40 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
speecht5-service:
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
container_name: speecht5-service
ports:
- "7055:7055"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
runtime: habana
cap_add:
- SYS_NICE
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:7055/health"]
interval: 10s
timeout: 6s
retries: 18
tts:
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
container_name: tts-service
ports:
- "3002:9088"
ipc: host
environment:
TTS_ENDPOINT: ${TTS_ENDPOINT}
dependes_on:
speecht5-service:
condition: service_healthy
networks:
default:
driver: bridge

View File

@@ -1,2 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

View File

@@ -1,2 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

View File

@@ -1,2 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

View File

@@ -1,53 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import json
import os
import time
import requests
from comps import (
Base64ByteStrDoc,
CustomLogger,
ServiceType,
TextDoc,
opea_microservices,
register_microservice,
register_statistics,
statistics_dict,
)
logger = CustomLogger("tts")
logflag = os.getenv("LOGFLAG", False)
@register_microservice(
name="opea_service@tts",
service_type=ServiceType.TTS,
endpoint="/v1/audio/speech",
host="0.0.0.0",
port=9088,
input_datatype=TextDoc,
output_datatype=Base64ByteStrDoc,
)
@register_statistics(names=["opea_service@tts"])
async def text_to_audio(input: TextDoc):
if logflag:
logger.info(input)
start = time.time()
text = input.text
inputs = {"text": text}
response = requests.post(url=f"{tts_endpoint}/v1/tts", data=json.dumps(inputs), proxies={"http": None})
statistics_dict["opea_service@tts"].append_latency(time.time() - start, None)
result = Base64ByteStrDoc(byte_str=response.json()["tts_result"])
if logflag:
logger.info(result)
return result
if __name__ == "__main__":
tts_endpoint = os.getenv("TTS_ENDPOINT", "http://localhost:7055")
logger.info("[tts - router] TTS initialized.")
opea_microservices["opea_service@tts"].start()

View File

@@ -14,13 +14,13 @@ COPY comps /home/user/comps
RUN pip install --no-cache-dir --upgrade pip setuptools && \
if [ "${ARCH}" = "cpu" ]; then \
pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu ; \
pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/tts/speecht5/requirements.txt ; \
pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/tts/src/requirements.txt ; \
else \
pip install --no-cache-dir -r /home/user/comps/tts/speecht5/requirements.txt ; \
pip install --no-cache-dir -r /home/user/comps/tts/src/requirements.txt ; \
fi
ENV PYTHONPATH=$PYTHONPATH:/home/user
WORKDIR /home/user/comps/tts/speecht5
WORKDIR /home/user/comps/tts/src
ENTRYPOINT ["python", "tts.py"]
ENTRYPOINT ["python", "opea_tts_microservice.py"]

View File

@@ -7,7 +7,7 @@ TTS (Text-To-Speech) microservice helps users convert text to speech. When build
- Xeon CPU
```bash
cd dependency/
cd integrations/dependency/speecht5
nohup python speecht5_server.py --device=cpu &
curl http://localhost:7055/v1/tts -XPOST -d '{"text": "Who are you?"}' -H 'Content-Type: application/json'
```
@@ -17,7 +17,7 @@ curl http://localhost:7055/v1/tts -XPOST -d '{"text": "Who are you?"}' -H 'Conte
```bash
pip install optimum[habana]
cd dependency/
cd integrations/dependency/speecht5
nohup python speecht5_server.py --device=hpu &
curl http://localhost:7055/v1/tts -XPOST -d '{"text": "Who are you?"}' -H 'Content-Type: application/json'
```
@@ -25,9 +25,9 @@ curl http://localhost:7055/v1/tts -XPOST -d '{"text": "Who are you?"}' -H 'Conte
## 1.3 Start TTS Service/Test
```bash
python tts.py
python opea_tts_microservice.py
curl http://localhost:9088/v1/audio/speech -XPOST -d '{"text": "Who are you?"}' -H 'Content-Type: application/json'
curl http://localhost:9088/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3
```
## 🚀2. Start Microservice with Docker (Option 2)
@@ -42,20 +42,20 @@ Alternatively, you can start the TTS microservice with Docker.
```bash
cd ../../../
docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile .
docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile .
```
- Gaudi2 HPU
```bash
cd ../../../
docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile.intel_hpu .
docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu .
```
#### 2.1.2 TTS Service Image
```bash
docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/Dockerfile .
```
### 2.2 Start SpeechT5 and TTS Service
@@ -89,7 +89,5 @@ curl http://localhost:7055/v1/tts -XPOST -d '{"text": "Who are you?"}' -H 'Conte
# openai protocol compatible
# voice can be 'male' or 'default'
curl http://localhost:7055/v1/audio/speech -XPOST -d '{"input":"Who are you?", "voice": "male"}' -H 'Content-Type: application/json' --output speech.wav
curl http://localhost:9088/v1/audio/speech -XPOST -d '{"text": "Who are you?"}' -H 'Content-Type: application/json'
curl http://localhost:9088/v1/audio/speech -XPOST -d '{"input":"Who are you?", "voice": "male"}' -H 'Content-Type: application/json' --output speech.wav
```

View File

@@ -7,7 +7,7 @@ This microservice is validated on Xeon/CUDA. HPU support is under development.
## Build the Image
```bash
docker build -t opea/gpt-sovits:latest --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -f comps/tts/gpt-sovits/Dockerfile .
docker build -t opea/gpt-sovits:latest --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -f comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile .
```
## Start the Service

View File

@@ -23,13 +23,13 @@ USER user
RUN pip install --no-cache-dir --upgrade pip setuptools && \
if [ "${ARCH}" = "cpu" ]; then \
pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu ; \
pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/tts/speecht5/requirements.txt ; \
pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/tts/src/integrations/dependency/speecht5/requirements.txt ; \
else \
pip install --no-cache-dir -r /home/user/comps/tts/speecht5/requirements.txt ; \
pip install --no-cache-dir -r /home/user/comps/tts/src/integrations/dependency/speecht5/requirements.txt ; \
fi
ENV PYTHONPATH=$PYTHONPATH:/home/user
WORKDIR /home/user/comps/tts/speecht5/dependency
WORKDIR /home/user/comps/tts/src/integrations/dependency/speecht5
ENTRYPOINT ["python", "speecht5_server.py", "--device", "cpu"]

View File

@@ -24,11 +24,11 @@ USER user
# Install requirements and optimum habana
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/comps/tts/speecht5/requirements.txt && \
pip install --no-cache-dir -r /home/user/comps/tts/src/integrations/dependency/speecht5/requirements.txt && \
pip install --no-cache-dir optimum[habana]
ENV PYTHONPATH=$PYTHONPATH:/home/user
WORKDIR /home/user/comps/tts/speecht5/dependency
WORKDIR /home/user/comps/tts/src/integrations/dependency/speecht5
ENTRYPOINT ["python", "speecht5_server.py", "--device", "hpu"]

View File

@@ -24,6 +24,7 @@ class SpeechT5Model:
self.processor = SpeechT5Processor.from_pretrained(self.model_name_or_path, normalize=True)
self.vocoder = SpeechT5HifiGan.from_pretrained(vocoder_model_name_or_path).to(device)
self.vocoder.eval()
self.voice = "default"
# fetch default speaker embedding
try:
@@ -89,8 +90,13 @@ class SpeechT5Model:
)
def t2s(self, text, voice="default"):
if voice == "male":
self.default_speaker_embedding = torch.load("spk_embed_male.pt")
if self.voice != voice:
try:
print(f"Loading spk embedding with voice: {voice}.")
self.default_speaker_embedding = torch.load("spk_embed_{voice}.pt")
self.voice = voice
except Exception as e:
print(e)
if self.device == "hpu":
# See https://github.com/huggingface/optimum-habana/pull/824
from optimum.habana.utils import set_seed

View File

@@ -26,7 +26,7 @@ app.add_middleware(
)
@app.get("/v1/health")
@app.get("/health")
async def health() -> Response:
"""Health check."""
return Response(status_code=200)
@@ -37,8 +37,9 @@ async def text_to_speech(request: Request):
logger.info("SpeechT5 generation begin.")
request_dict = await request.json()
text = request_dict.pop("text")
voice = request_dict.pop("voice", "default")
speech = tts.t2s(text)
speech = tts.t2s(text, voice)
sf.write("tmp.wav", speech, samplerate=16000)
with open("tmp.wav", "rb") as f:
bytes = f.read()
@@ -48,13 +49,8 @@ async def text_to_speech(request: Request):
@app.post("/v1/audio/speech")
async def audio_speech(request: AudioSpeechRequest):
async def audio_speech(request: AudioSpeechRequest) -> StreamingResponse:
logger.info("SpeechT5 generation begin.")
# validate the request parameters
if request.model != tts.model_name_or_path:
raise Exception("TTS model mismatch! Currently only support model: microsoft/speecht5_tts")
if request.voice not in ["default", "male"] or request.speed != 1.0:
logger.warning("Currently parameter 'speed' can only be 1.0 and 'voice' can only be default or male!")
speech = tts.t2s(request.input, voice=request.voice)

View File

@@ -0,0 +1,55 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
import time
import requests
from fastapi.responses import StreamingResponse
from comps import CustomLogger, OpeaComponent, ServiceType
from comps.cores.proto.api_protocol import AudioSpeechRequest
logger = CustomLogger("opea_gptsovits")
logflag = os.getenv("LOGFLAG", False)
class OpeaGptsovitsTts(OpeaComponent):
"""A specialized TTS (Text To Speech) component derived from OpeaComponent for GPTSoVITS TTS services.
Attributes:
model_name (str): The name of the TTS model used.
"""
def __init__(self, name: str, description: str, config: dict = None):
super().__init__(name, ServiceType.TTS.name.lower(), description, config)
self.base_url = os.getenv("TTS_ENDPOINT", "http://localhost:9880")
async def invoke(
self,
request: AudioSpeechRequest,
) -> requests.models.Response:
"""Involve the TTS service to generate speech for the provided input."""
# see https://github.com/Spycsh/GPT-SoVITS/blob/openai_compat/api.py#L948 for usage
# make sure you change the refer_wav_path locally
request.voice = None
response = requests.post(f"{self.base_url}/v1/audio/speech", data=request.json())
return response
def check_health(self) -> bool:
"""Checks the health of the embedding service.
Returns:
bool: True if the service is reachable and healthy, False otherwise.
"""
try:
response = requests.get(f"{self.base_url}/health")
if response.status_code == 200:
return True
else:
return False
except Exception as e:
# Handle connection errors, timeouts, etc.
logger.error(f"Health check failed: {e}")
return False

View File

@@ -0,0 +1,57 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
import time
import requests
from fastapi.responses import StreamingResponse
from comps import CustomLogger, OpeaComponent, ServiceType
from comps.cores.proto.api_protocol import AudioSpeechRequest
logger = CustomLogger("opea_speecht5")
logflag = os.getenv("LOGFLAG", False)
class OpeaSpeecht5Tts(OpeaComponent):
"""A specialized TTS (Text To Speech) component derived from OpeaComponent for SpeechT5 TTS services.
Attributes:
model_name (str): The name of the TTS model used.
"""
def __init__(self, name: str, description: str, config: dict = None):
super().__init__(name, ServiceType.TTS.name.lower(), description, config)
self.base_url = os.getenv("TTS_ENDPOINT", "http://localhost:7055")
def invoke(
self,
request: AudioSpeechRequest,
) -> requests.models.Response:
"""Involve the TTS service to generate speech for the provided input."""
# validate the request parameters
if request.model not in ["microsoft/speecht5_tts"]:
raise Exception("TTS model mismatch! Currently only support model: microsoft/speecht5_tts")
if request.voice not in ["default", "male"] or request.speed != 1.0:
logger.warning("Currently parameter 'speed' can only be 1.0 and 'voice' can only be default or male!")
response = requests.post(f"{self.base_url}/v1/audio/speech", data=request.json())
return response
def check_health(self) -> bool:
"""Checks the health of the embedding service.
Returns:
bool: True if the service is reachable and healthy, False otherwise.
"""
try:
response = requests.get(f"{self.base_url}/health")
if response.status_code == 200:
return True
else:
return False
except Exception as e:
# Handle connection errors, timeouts, etc.
logger.error(f"Health check failed: {e}")
return False

View File

@@ -0,0 +1,88 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
import time
from fastapi.responses import StreamingResponse
from integrations.opea_gptsovits import OpeaGptsovitsTts
from integrations.opea_speecht5 import OpeaSpeecht5Tts
from comps import (
CustomLogger,
OpeaComponentController,
ServiceType,
opea_microservices,
register_microservice,
register_statistics,
statistics_dict,
)
from comps.cores.proto.api_protocol import AudioSpeechRequest
logger = CustomLogger("opea_tts_microservice")
logflag = os.getenv("LOGFLAG", False)
# Initialize OpeaComponentController
controller = OpeaComponentController()
# Register components
try:
# Instantiate TTS components
opea_speecht5 = OpeaSpeecht5Tts(
name="OpeaSpeecht5Tts",
description="OPEA SpeechT5 TTS Service",
)
opea_gptsovits = OpeaGptsovitsTts(
name="OpeaGptsovitsTts",
description="OPEA GPTSoVITS TTS Service",
)
# Register components with the controller
controller.register(opea_speecht5)
controller.register(opea_gptsovits)
# Discover and activate a healthy component
controller.discover_and_activate()
except Exception as e:
logger.error(f"Failed to initialize components: {e}")
async def stream_forwarder(response):
"""Forward the stream chunks to the client using iter_content."""
for chunk in response.iter_content(chunk_size=1024):
yield chunk
@register_microservice(
name="opea_service@tts",
service_type=ServiceType.TTS,
endpoint="/v1/audio/speech",
host="0.0.0.0",
port=9088,
input_datatype=AudioSpeechRequest,
output_datatype=StreamingResponse,
)
@register_statistics(names=["opea_service@tts"])
async def text_to_speech(request: AudioSpeechRequest) -> StreamingResponse:
start = time.time()
if logflag:
logger.info(f"Input received: {request}")
try:
# Use the controller to invoke the active component
tts_response = controller.invoke(request)
if logflag:
logger.info(tts_response)
statistics_dict["opea_service@tts"].append_latency(time.time() - start, None)
return StreamingResponse(stream_forwarder(tts_response))
except Exception as e:
logger.error(f"Error during tts invocation: {e}")
raise
if __name__ == "__main__":
logger.info("OPEA TTS Microservice is starting....")
opea_microservices["opea_service@tts"].start()

View File

@@ -0,0 +1,11 @@
aiohttp
docarray[full]
fastapi
opentelemetry-api
opentelemetry-exporter-otlp
opentelemetry-sdk
prometheus-fastapi-instrumentator
pydantic==2.9.1
pyyaml
shortuuid
uvicorn