Refactor DocSum example (#1286)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -6,7 +6,8 @@ FROM python:3.11-slim
|
|||||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||||
libgl1-mesa-glx \
|
libgl1-mesa-glx \
|
||||||
libjemalloc-dev \
|
libjemalloc-dev \
|
||||||
git
|
git \
|
||||||
|
ffmpeg
|
||||||
|
|
||||||
RUN useradd -m -s /bin/bash user && \
|
RUN useradd -m -s /bin/bash user && \
|
||||||
mkdir -p /home/user && \
|
mkdir -p /home/user && \
|
||||||
|
|||||||
@@ -70,34 +70,6 @@ services:
|
|||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
dataprep-audio2text:
|
|
||||||
image: ${REGISTRY:-opea}/dataprep-audio2text:${TAG:-latest}
|
|
||||||
container_name: dataprep-audio2text-service
|
|
||||||
ports:
|
|
||||||
- "9099:9099"
|
|
||||||
ipc: host
|
|
||||||
environment:
|
|
||||||
A2T_ENDPOINT: ${A2T_ENDPOINT}
|
|
||||||
|
|
||||||
dataprep-video2audio:
|
|
||||||
image: ${REGISTRY:-opea}/dataprep-video2audio:${TAG:-latest}
|
|
||||||
container_name: dataprep-video2audio-service
|
|
||||||
ports:
|
|
||||||
- "7078:7078"
|
|
||||||
ipc: host
|
|
||||||
environment:
|
|
||||||
V2A_ENDPOINT: ${V2A_ENDPOINT}
|
|
||||||
|
|
||||||
dataprep-multimedia2text:
|
|
||||||
image: ${REGISTRY:-opea}/dataprep-multimedia2text:${TAG:-latest}
|
|
||||||
container_name: dataprep-multimedia2text
|
|
||||||
ports:
|
|
||||||
- "7079:7079"
|
|
||||||
ipc: host
|
|
||||||
environment:
|
|
||||||
V2A_ENDPOINT: ${V2A_ENDPOINT}
|
|
||||||
A2T_ENDPOINT: ${A2T_ENDPOINT}
|
|
||||||
|
|
||||||
docsum-backend-server:
|
docsum-backend-server:
|
||||||
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
|
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
|
||||||
container_name: docsum-backend-server
|
container_name: docsum-backend-server
|
||||||
@@ -111,8 +83,9 @@ services:
|
|||||||
- https_proxy=${https_proxy}
|
- https_proxy=${https_proxy}
|
||||||
- http_proxy=${http_proxy}
|
- http_proxy=${http_proxy}
|
||||||
- MEGA_SERVICE_HOST_IP=${HOST_IP}
|
- MEGA_SERVICE_HOST_IP=${HOST_IP}
|
||||||
- DATA_SERVICE_HOST_IP=${DATA_SERVICE_HOST_IP}
|
|
||||||
- LLM_SERVICE_HOST_IP=${HOST_IP}
|
- LLM_SERVICE_HOST_IP=${HOST_IP}
|
||||||
|
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
|
||||||
|
|
||||||
ipc: host
|
ipc: host
|
||||||
restart: always
|
restart: always
|
||||||
|
|
||||||
|
|||||||
@@ -15,11 +15,3 @@ export DOCSUM_LLM_SERVER_PORT="9000"
|
|||||||
export DOCSUM_BACKEND_SERVER_PORT="8888"
|
export DOCSUM_BACKEND_SERVER_PORT="8888"
|
||||||
export DOCSUM_FRONTEND_PORT="5173"
|
export DOCSUM_FRONTEND_PORT="5173"
|
||||||
export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"
|
export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"
|
||||||
export V2A_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export V2A_ENDPOINT=http://$host_ip:7078
|
|
||||||
export A2T_ENDPOINT=http://$host_ip:7066
|
|
||||||
export A2T_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export A2T_SERVICE_PORT=9099
|
|
||||||
export DATA_ENDPOINT=http://$host_ip:7079
|
|
||||||
export DATA_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export DATA_SERVICE_PORT=7079
|
|
||||||
|
|||||||
@@ -29,30 +29,6 @@ The Whisper Service converts audio files to text. Follow these steps to build an
|
|||||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
|
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Audio to text Service
|
|
||||||
|
|
||||||
The Audio to text Service is another service for converting audio to text. Follow these steps to build and run the service:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker build -t opea/dataprep-audio2text:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/audio2text/Dockerfile .
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Video to Audio Service
|
|
||||||
|
|
||||||
The Video to Audio Service extracts audio from video files. Follow these steps to build and run the service:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker build -t opea/dataprep-video2audio:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/video2audio/Dockerfile .
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Multimedia to Text Service
|
|
||||||
|
|
||||||
The Multimedia to Text Service transforms multimedia data to text data. Follow these steps to build and run the service:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker build -t opea/dataprep-multimedia2text:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/Dockerfile .
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. Build MegaService Docker Image
|
### 2. Build MegaService Docker Image
|
||||||
|
|
||||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command:
|
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command:
|
||||||
@@ -149,9 +125,6 @@ You will have the following Docker Images:
|
|||||||
2. `opea/docsum:latest`
|
2. `opea/docsum:latest`
|
||||||
3. `opea/llm-docsum-tgi:latest`
|
3. `opea/llm-docsum-tgi:latest`
|
||||||
4. `opea/whisper:latest`
|
4. `opea/whisper:latest`
|
||||||
5. `opea/dataprep-audio2text:latest`
|
|
||||||
6. `opea/dataprep-multimedia2text:latest`
|
|
||||||
7. `opea/dataprep-video2audio:latest`
|
|
||||||
|
|
||||||
### Validate Microservices
|
### Validate Microservices
|
||||||
|
|
||||||
@@ -188,37 +161,7 @@ You will have the following Docker Images:
|
|||||||
{"asr_result":"you"}
|
{"asr_result":"you"}
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Audio2Text Microservice
|
4. MegaService
|
||||||
|
|
||||||
```bash
|
|
||||||
curl http://${host_ip}:9099/v1/audio/transcriptions \
|
|
||||||
-X POST \
|
|
||||||
-d '{"byte_str":"UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
|
||||||
-H 'Content-Type: application/json'
|
|
||||||
```
|
|
||||||
|
|
||||||
Expected output:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
{"downstream_black_list":[],"id":"--> this will be different id number for each run <--","query":"you"}
|
|
||||||
```
|
|
||||||
|
|
||||||
5. Multimedia to text Microservice
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl http://${host_ip}:7079/v1/multimedia2text \
|
|
||||||
-X POST \
|
|
||||||
-d '{"audio":"UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
|
||||||
-H 'Content-Type: application/json'
|
|
||||||
```
|
|
||||||
|
|
||||||
Expected output:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
{"downstream_black_list":[],"id":"--> this will be different id number for each run <--","query":"you"}
|
|
||||||
```
|
|
||||||
|
|
||||||
6. MegaService
|
|
||||||
|
|
||||||
Text:
|
Text:
|
||||||
|
|
||||||
@@ -257,7 +200,7 @@ You will have the following Docker Images:
|
|||||||
-F "stream=true"
|
-F "stream=true"
|
||||||
```
|
```
|
||||||
|
|
||||||
> Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI.
|
> Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI. You can still pass base64 string of the audio or video file as follows:
|
||||||
|
|
||||||
Audio:
|
Audio:
|
||||||
|
|
||||||
@@ -291,7 +234,7 @@ You will have the following Docker Images:
|
|||||||
-F "stream=true"
|
-F "stream=true"
|
||||||
```
|
```
|
||||||
|
|
||||||
7. MegaService with long context
|
5. MegaService with long context
|
||||||
|
|
||||||
If you want to deal with long context, can set following parameters and select suitable summary type.
|
If you want to deal with long context, can set following parameters and select suitable summary type.
|
||||||
|
|
||||||
|
|||||||
@@ -50,43 +50,12 @@ services:
|
|||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
dataprep-audio2text:
|
|
||||||
image: ${REGISTRY:-opea}/dataprep-audio2text:${TAG:-latest}
|
|
||||||
container_name: dataprep-audio2text-server
|
|
||||||
ports:
|
|
||||||
- "9099:9099"
|
|
||||||
ipc: host
|
|
||||||
environment:
|
|
||||||
A2T_ENDPOINT: ${A2T_ENDPOINT}
|
|
||||||
|
|
||||||
dataprep-video2audio:
|
|
||||||
image: ${REGISTRY:-opea}/dataprep-video2audio:${TAG:-latest}
|
|
||||||
container_name: dataprep-video2audio-server
|
|
||||||
ports:
|
|
||||||
- "7078:7078"
|
|
||||||
ipc: host
|
|
||||||
environment:
|
|
||||||
V2A_ENDPOINT: ${V2A_ENDPOINT}
|
|
||||||
|
|
||||||
dataprep-multimedia2text:
|
|
||||||
image: ${REGISTRY:-opea}/dataprep-multimedia2text:${TAG:-latest}
|
|
||||||
container_name: dataprep-multimedia2text
|
|
||||||
ports:
|
|
||||||
- "7079:7079"
|
|
||||||
ipc: host
|
|
||||||
environment:
|
|
||||||
V2A_ENDPOINT: ${V2A_ENDPOINT}
|
|
||||||
A2T_ENDPOINT: ${A2T_ENDPOINT}
|
|
||||||
|
|
||||||
docsum-xeon-backend-server:
|
docsum-xeon-backend-server:
|
||||||
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
|
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
|
||||||
container_name: docsum-xeon-backend-server
|
container_name: docsum-xeon-backend-server
|
||||||
depends_on:
|
depends_on:
|
||||||
- tgi-server
|
- tgi-server
|
||||||
- llm-docsum-tgi
|
- llm-docsum-tgi
|
||||||
- dataprep-multimedia2text
|
|
||||||
- dataprep-video2audio
|
|
||||||
- dataprep-audio2text
|
|
||||||
ports:
|
ports:
|
||||||
- "8888:8888"
|
- "8888:8888"
|
||||||
environment:
|
environment:
|
||||||
@@ -94,8 +63,8 @@ services:
|
|||||||
- https_proxy=${https_proxy}
|
- https_proxy=${https_proxy}
|
||||||
- http_proxy=${http_proxy}
|
- http_proxy=${http_proxy}
|
||||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||||
- DATA_SERVICE_HOST_IP=${DATA_SERVICE_HOST_IP}
|
|
||||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||||
|
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
|
||||||
ipc: host
|
ipc: host
|
||||||
restart: always
|
restart: always
|
||||||
|
|
||||||
|
|||||||
@@ -13,28 +13,12 @@ git clone https://github.com/opea-project/GenAIComps.git
|
|||||||
cd GenAIComps
|
cd GenAIComps
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Audio to text Service
|
#### Whisper Service
|
||||||
|
|
||||||
The Audio to text Service is another service for converting audio to text. Follow these steps to build and run the service:
|
The Whisper Service converts audio files to text. Follow these steps to build and run the service:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build -t opea/dataprep-audio2text:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/audio2text/Dockerfile .
|
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
|
||||||
```
|
|
||||||
|
|
||||||
#### Video to Audio Service
|
|
||||||
|
|
||||||
The Video to Audio Service extracts audio from video files. Follow these steps to build and run the service:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker build -t opea/dataprep-video2audio:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/video2audio/Dockerfile .
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Multimedia to Text Service
|
|
||||||
|
|
||||||
The Multimedia to Text Service transforms multimedia data to text data. Follow these steps to build and run the service:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker build -t opea/dataprep-multimedia2text:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/Dockerfile .
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### 2. Build MegaService Docker Image
|
### 2. Build MegaService Docker Image
|
||||||
@@ -133,9 +117,6 @@ You will have the following Docker Images:
|
|||||||
2. `opea/docsum:latest`
|
2. `opea/docsum:latest`
|
||||||
3. `opea/llm-docsum-tgi:latest`
|
3. `opea/llm-docsum-tgi:latest`
|
||||||
4. `opea/whisper:latest`
|
4. `opea/whisper:latest`
|
||||||
5. `opea/dataprep-audio2text:latest`
|
|
||||||
6. `opea/dataprep-multimedia2text:latest`
|
|
||||||
7. `opea/dataprep-video2audio:latest`
|
|
||||||
|
|
||||||
### Validate Microservices
|
### Validate Microservices
|
||||||
|
|
||||||
@@ -172,37 +153,7 @@ You will have the following Docker Images:
|
|||||||
{"asr_result":"you"}
|
{"asr_result":"you"}
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Audio2Text Microservice
|
4. MegaService
|
||||||
|
|
||||||
```bash
|
|
||||||
curl http://${host_ip}:9199/v1/audio/transcriptions \
|
|
||||||
-X POST \
|
|
||||||
-d '{"byte_str":"UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
|
||||||
-H 'Content-Type: application/json'
|
|
||||||
```
|
|
||||||
|
|
||||||
Expected output:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
{"downstream_black_list":[],"id":"--> this will be different id number for each run <--","query":"you"}
|
|
||||||
```
|
|
||||||
|
|
||||||
5. Multimedia to text Microservice
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl http://${host_ip}:7079/v1/multimedia2text \
|
|
||||||
-X POST \
|
|
||||||
-d '{"audio":"UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
|
||||||
-H 'Content-Type: application/json'
|
|
||||||
```
|
|
||||||
|
|
||||||
Expected output:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
{"downstream_black_list":[],"id":"--> this will be different id number for each run <--","query":"you"}
|
|
||||||
```
|
|
||||||
|
|
||||||
6. MegaService
|
|
||||||
|
|
||||||
Text:
|
Text:
|
||||||
|
|
||||||
@@ -274,7 +225,7 @@ You will have the following Docker Images:
|
|||||||
-F "stream=True"
|
-F "stream=True"
|
||||||
```
|
```
|
||||||
|
|
||||||
7. MegaService with long context
|
5. MegaService with long context
|
||||||
|
|
||||||
If you want to deal with long context, can set following parameters and select suitable summary type.
|
If you want to deal with long context, can set following parameters and select suitable summary type.
|
||||||
|
|
||||||
|
|||||||
@@ -62,43 +62,12 @@ services:
|
|||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
dataprep-audio2text:
|
|
||||||
image: ${REGISTRY:-opea}/dataprep-audio2text:${TAG:-latest}
|
|
||||||
container_name: dataprep-audio2text-server
|
|
||||||
ports:
|
|
||||||
- "9199:9099"
|
|
||||||
ipc: host
|
|
||||||
environment:
|
|
||||||
A2T_ENDPOINT: ${A2T_ENDPOINT}
|
|
||||||
|
|
||||||
dataprep-video2audio:
|
|
||||||
image: ${REGISTRY:-opea}/dataprep-video2audio:${TAG:-latest}
|
|
||||||
container_name: dataprep-video2audio-server
|
|
||||||
ports:
|
|
||||||
- "7078:7078"
|
|
||||||
ipc: host
|
|
||||||
environment:
|
|
||||||
V2A_ENDPOINT: ${V2A_ENDPOINT}
|
|
||||||
|
|
||||||
dataprep-multimedia2text:
|
|
||||||
image: ${REGISTRY:-opea}/dataprep-multimedia2text:${TAG:-latest}
|
|
||||||
container_name: dataprep-multimedia2text
|
|
||||||
ports:
|
|
||||||
- "7079:7079"
|
|
||||||
ipc: host
|
|
||||||
environment:
|
|
||||||
V2A_ENDPOINT: ${V2A_ENDPOINT}
|
|
||||||
A2T_ENDPOINT: ${A2T_ENDPOINT}
|
|
||||||
|
|
||||||
docsum-gaudi-backend-server:
|
docsum-gaudi-backend-server:
|
||||||
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
|
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
|
||||||
container_name: docsum-gaudi-backend-server
|
container_name: docsum-gaudi-backend-server
|
||||||
depends_on:
|
depends_on:
|
||||||
- tgi-server
|
- tgi-server
|
||||||
- llm-docsum-tgi
|
- llm-docsum-tgi
|
||||||
- dataprep-multimedia2text
|
|
||||||
- dataprep-video2audio
|
|
||||||
- dataprep-audio2text
|
|
||||||
ports:
|
ports:
|
||||||
- "8888:8888"
|
- "8888:8888"
|
||||||
environment:
|
environment:
|
||||||
@@ -106,8 +75,8 @@ services:
|
|||||||
- https_proxy=${https_proxy}
|
- https_proxy=${https_proxy}
|
||||||
- http_proxy=${http_proxy}
|
- http_proxy=${http_proxy}
|
||||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||||
- DATA_SERVICE_HOST_IP=${DATA_SERVICE_HOST_IP}
|
|
||||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||||
|
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
|
||||||
|
|
||||||
ipc: host
|
ipc: host
|
||||||
restart: always
|
restart: always
|
||||||
|
|||||||
@@ -13,15 +13,7 @@ export no_proxy="${no_proxy},${host_ip}"
|
|||||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||||
|
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||||
|
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||||
|
|
||||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
||||||
|
|
||||||
export V2A_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export V2A_ENDPOINT=http://$host_ip:7078
|
|
||||||
|
|
||||||
export A2T_ENDPOINT=http://$host_ip:7066
|
|
||||||
export A2T_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export A2T_SERVICE_PORT=9099
|
|
||||||
|
|
||||||
export DATA_ENDPOINT=http://$host_ip:7079
|
|
||||||
export DATA_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export DATA_SERVICE_PORT=7079
|
|
||||||
|
|||||||
@@ -41,33 +41,6 @@ services:
|
|||||||
dockerfile: comps/asr/whisper/dependency/Dockerfile
|
dockerfile: comps/asr/whisper/dependency/Dockerfile
|
||||||
extends: docsum
|
extends: docsum
|
||||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||||
dataprep-multimedia2text:
|
|
||||||
build:
|
|
||||||
args:
|
|
||||||
http_proxy: ${http_proxy}
|
|
||||||
https_proxy: ${https_proxy}
|
|
||||||
context: GenAIComps
|
|
||||||
dockerfile: comps/dataprep/multimedia2text/Dockerfile
|
|
||||||
extends: docsum
|
|
||||||
image: ${REGISTRY:-opea}/dataprep-multimedia2text:${TAG:-latest}
|
|
||||||
dataprep-audio2text:
|
|
||||||
build:
|
|
||||||
args:
|
|
||||||
http_proxy: ${http_proxy}
|
|
||||||
https_proxy: ${https_proxy}
|
|
||||||
context: GenAIComps
|
|
||||||
dockerfile: comps/dataprep/multimedia2text/audio2text/Dockerfile
|
|
||||||
extends: docsum
|
|
||||||
image: ${REGISTRY:-opea}/dataprep-audio2text:${TAG:-latest}
|
|
||||||
dataprep-video2audio:
|
|
||||||
build:
|
|
||||||
args:
|
|
||||||
http_proxy: ${http_proxy}
|
|
||||||
https_proxy: ${https_proxy}
|
|
||||||
context: GenAIComps
|
|
||||||
dockerfile: comps/dataprep/multimedia2text/video2audio/Dockerfile
|
|
||||||
extends: docsum
|
|
||||||
image: ${REGISTRY:-opea}/dataprep-video2audio:${TAG:-latest}
|
|
||||||
llm-docsum-tgi:
|
llm-docsum-tgi:
|
||||||
build:
|
build:
|
||||||
context: GenAIComps
|
context: GenAIComps
|
||||||
|
|||||||
@@ -2,7 +2,10 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import base64
|
||||||
import os
|
import os
|
||||||
|
import subprocess
|
||||||
|
import uuid
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType
|
from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType
|
||||||
@@ -20,8 +23,8 @@ from fastapi.responses import StreamingResponse
|
|||||||
|
|
||||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||||
|
|
||||||
DATA_SERVICE_HOST_IP = os.getenv("DATA_SERVICE_HOST_IP", "0.0.0.0")
|
ASR_SERVICE_HOST_IP = os.getenv("ASR_SERVICE_HOST_IP", "0.0.0.0")
|
||||||
DATA_SERVICE_PORT = int(os.getenv("DATA_SERVICE_PORT", 7079))
|
ASR_SERVICE_PORT = int(os.getenv("ASR_SERVICE_PORT", 7066))
|
||||||
|
|
||||||
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
|
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
|
||||||
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
|
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
|
||||||
@@ -29,11 +32,20 @@ LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
|
|||||||
|
|
||||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||||
if self.services[cur_node].service_type == ServiceType.LLM:
|
if self.services[cur_node].service_type == ServiceType.LLM:
|
||||||
|
for key_to_replace in ["text", "asr_result"]:
|
||||||
|
if key_to_replace in inputs:
|
||||||
|
inputs["query"] = inputs[key_to_replace]
|
||||||
|
del inputs[key_to_replace]
|
||||||
|
|
||||||
docsum_parameters = kwargs.get("docsum_parameters", None)
|
docsum_parameters = kwargs.get("docsum_parameters", None)
|
||||||
if docsum_parameters:
|
if docsum_parameters:
|
||||||
docsum_parameters = docsum_parameters.model_dump()
|
docsum_parameters = docsum_parameters.model_dump()
|
||||||
del docsum_parameters["query"]
|
del docsum_parameters["query"]
|
||||||
inputs.update(docsum_parameters)
|
inputs.update(docsum_parameters)
|
||||||
|
elif self.services[cur_node].service_type == ServiceType.ASR:
|
||||||
|
if "video" in inputs:
|
||||||
|
audio_base64 = video2audio(inputs["video"])
|
||||||
|
inputs["audio"] = audio_base64
|
||||||
return inputs
|
return inputs
|
||||||
|
|
||||||
|
|
||||||
@@ -45,6 +57,44 @@ def read_pdf(file):
|
|||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
|
||||||
|
def video2audio(
|
||||||
|
video_base64: str,
|
||||||
|
) -> str:
|
||||||
|
"""Convert a base64 video string to a base64 audio string using ffmpeg.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
video_base64 (str): Base64 encoded video string.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Base64 encoded audio string.
|
||||||
|
"""
|
||||||
|
video_data = base64.b64decode(video_base64)
|
||||||
|
|
||||||
|
uid = str(uuid.uuid4())
|
||||||
|
temp_video_path = f"{uid}.mp4"
|
||||||
|
temp_audio_path = f"{uid}.mp3"
|
||||||
|
with open(temp_video_path, "wb") as video_file:
|
||||||
|
video_file.write(video_data)
|
||||||
|
|
||||||
|
try:
|
||||||
|
subprocess.run(
|
||||||
|
["ffmpeg", "-i", temp_video_path, "-q:a", "0", "-map", "a", temp_audio_path],
|
||||||
|
check=True,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
)
|
||||||
|
# Read the extracted audio file and encode it to base64
|
||||||
|
with open(temp_audio_path, "rb") as audio_file:
|
||||||
|
audio_base64 = base64.b64encode(audio_file.read()).decode("utf-8")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Clean up the temporary video file
|
||||||
|
os.remove(temp_video_path)
|
||||||
|
os.remove(temp_audio_path)
|
||||||
|
|
||||||
|
return audio_base64
|
||||||
|
|
||||||
|
|
||||||
def read_text_from_file(file, save_file_name):
|
def read_text_from_file(file, save_file_name):
|
||||||
import docx2txt
|
import docx2txt
|
||||||
from langchain.text_splitter import CharacterTextSplitter
|
from langchain.text_splitter import CharacterTextSplitter
|
||||||
@@ -78,17 +128,18 @@ class DocSumService:
|
|||||||
self.port = port
|
self.port = port
|
||||||
ServiceOrchestrator.align_inputs = align_inputs
|
ServiceOrchestrator.align_inputs = align_inputs
|
||||||
self.megaservice = ServiceOrchestrator()
|
self.megaservice = ServiceOrchestrator()
|
||||||
|
self.megaservice_text_only = ServiceOrchestrator()
|
||||||
self.endpoint = str(MegaServiceEndpoint.DOC_SUMMARY)
|
self.endpoint = str(MegaServiceEndpoint.DOC_SUMMARY)
|
||||||
|
|
||||||
def add_remote_service(self):
|
def add_remote_service(self):
|
||||||
|
|
||||||
data = MicroService(
|
asr = MicroService(
|
||||||
name="multimedia2text",
|
name="asr",
|
||||||
host=DATA_SERVICE_HOST_IP,
|
host=ASR_SERVICE_HOST_IP,
|
||||||
port=DATA_SERVICE_PORT,
|
port=ASR_SERVICE_PORT,
|
||||||
endpoint="/v1/multimedia2text",
|
endpoint="/v1/asr",
|
||||||
use_remote_service=True,
|
use_remote_service=True,
|
||||||
service_type=ServiceType.DATAPREP,
|
service_type=ServiceType.ASR,
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = MicroService(
|
llm = MicroService(
|
||||||
@@ -100,10 +151,12 @@ class DocSumService:
|
|||||||
service_type=ServiceType.LLM,
|
service_type=ServiceType.LLM,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.megaservice.add(data).add(llm)
|
self.megaservice.add(asr).add(llm)
|
||||||
self.megaservice.flow_to(data, llm)
|
self.megaservice.flow_to(asr, llm)
|
||||||
|
self.megaservice_text_only.add(llm)
|
||||||
|
|
||||||
async def handle_request(self, request: Request, files: List[UploadFile] = File(default=None)):
|
async def handle_request(self, request: Request, files: List[UploadFile] = File(default=None)):
|
||||||
|
"""Accept pure text, or files .txt/.pdf.docx, audio/video base64 string."""
|
||||||
|
|
||||||
if "application/json" in request.headers.get("content-type"):
|
if "application/json" in request.headers.get("content-type"):
|
||||||
data = await request.json()
|
data = await request.json()
|
||||||
@@ -129,11 +182,15 @@ class DocSumService:
|
|||||||
file_summaries = []
|
file_summaries = []
|
||||||
if files:
|
if files:
|
||||||
for file in files:
|
for file in files:
|
||||||
file_path = f"/tmp/{file.filename}"
|
# Fix concurrency issue with the same file name
|
||||||
|
# https://github.com/opea-project/GenAIExamples/issues/1279
|
||||||
|
uid = str(uuid.uuid4())
|
||||||
|
file_path = f"/tmp/{uid}"
|
||||||
|
|
||||||
if data_type is not None and data_type in ["audio", "video"]:
|
if data_type is not None and data_type in ["audio", "video"]:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Audio and Video file uploads are not supported in docsum with curl request, please use the UI."
|
"Audio and Video file uploads are not supported in docsum with curl request, \
|
||||||
|
please use the UI or pass base64 string of the content directly."
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@@ -181,7 +238,8 @@ class DocSumService:
|
|||||||
chunk_overlap=chunk_overlap,
|
chunk_overlap=chunk_overlap,
|
||||||
chunk_size=chunk_size,
|
chunk_size=chunk_size,
|
||||||
)
|
)
|
||||||
|
text_only = "text" in initial_inputs_data
|
||||||
|
if not text_only:
|
||||||
result_dict, runtime_graph = await self.megaservice.schedule(
|
result_dict, runtime_graph = await self.megaservice.schedule(
|
||||||
initial_inputs=initial_inputs_data, docsum_parameters=docsum_parameters
|
initial_inputs=initial_inputs_data, docsum_parameters=docsum_parameters
|
||||||
)
|
)
|
||||||
@@ -194,6 +252,20 @@ class DocSumService:
|
|||||||
and self.megaservice.services[node].service_type == ServiceType.LLM
|
and self.megaservice.services[node].service_type == ServiceType.LLM
|
||||||
):
|
):
|
||||||
return response
|
return response
|
||||||
|
else:
|
||||||
|
result_dict, runtime_graph = await self.megaservice_text_only.schedule(
|
||||||
|
initial_inputs=initial_inputs_data, docsum_parameters=docsum_parameters
|
||||||
|
)
|
||||||
|
|
||||||
|
for node, response in result_dict.items():
|
||||||
|
# Here it suppose the last microservice in the megaservice is LLM.
|
||||||
|
if (
|
||||||
|
isinstance(response, StreamingResponse)
|
||||||
|
and node == list(self.megaservice.services.keys())[-1]
|
||||||
|
and self.megaservice.services[node].service_type == ServiceType.LLM
|
||||||
|
):
|
||||||
|
return response
|
||||||
|
|
||||||
last_node = runtime_graph.all_leaves()[-1]
|
last_node = runtime_graph.all_leaves()[-1]
|
||||||
response = result_dict[last_node]["text"]
|
response = result_dict[last_node]["text"]
|
||||||
choices = []
|
choices = []
|
||||||
|
|||||||
@@ -135,67 +135,6 @@ data:
|
|||||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||||
HF_HOME: "/tmp/.cache/huggingface"
|
HF_HOME: "/tmp/.cache/huggingface"
|
||||||
---
|
---
|
||||||
# Source: docsum/charts/tgi/templates/configmap.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: docsum-audio2text-config
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: audio2text-1.0.0
|
|
||||||
app.kubernetes.io/name: audio2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "2.1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
data:
|
|
||||||
http_proxy: ""
|
|
||||||
https_proxy: ""
|
|
||||||
no_proxy: ""
|
|
||||||
A2T_ENDPOINT: "http://docsum-whisper"
|
|
||||||
---
|
|
||||||
# Source: docsum/charts/tgi/templates/configmap.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: docsum-video2audio-config
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: video2audio-1.0.0
|
|
||||||
app.kubernetes.io/name: video2audio
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "2.1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
data:
|
|
||||||
http_proxy: ""
|
|
||||||
https_proxy: ""
|
|
||||||
no_proxy: ""
|
|
||||||
V2A_ENDPOINT: "http://docsum-video2audio"
|
|
||||||
---
|
|
||||||
# Source: docsum/charts/tgi/templates/configmap.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: docsum-multimedia2text-config
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: multimedia2text-1.0.0
|
|
||||||
app.kubernetes.io/name: multimedia2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "2.1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
data:
|
|
||||||
http_proxy: ""
|
|
||||||
https_proxy: ""
|
|
||||||
no_proxy: ""
|
|
||||||
V2A_ENDPOINT: "http://docsum-video2audio"
|
|
||||||
A2T_ENDPOINT: "http://docsum-whisper"
|
|
||||||
---
|
|
||||||
# Source: docsum/charts/tgi/templates/service.yaml
|
# Source: docsum/charts/tgi/templates/service.yaml
|
||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
@@ -221,81 +160,6 @@ spec:
|
|||||||
app.kubernetes.io/name: whisper
|
app.kubernetes.io/name: whisper
|
||||||
app.kubernetes.io/instance: docsum
|
app.kubernetes.io/instance: docsum
|
||||||
---
|
---
|
||||||
# Source: docsum/charts/tgi/templates/service.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: docsum-audio2text
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: audio2text-1.0.0
|
|
||||||
app.kubernetes.io/name: audio2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "2.1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- port: 9099
|
|
||||||
targetPort: 9199
|
|
||||||
protocol: TCP
|
|
||||||
name: audio2text
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: audio2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
---
|
|
||||||
# Source: docsum/charts/tgi/templates/service.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: docsum-video2audio
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: video2audio-1.0.0
|
|
||||||
app.kubernetes.io/name: video2audio
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "2.1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- port: 7078
|
|
||||||
targetPort: 7078
|
|
||||||
protocol: TCP
|
|
||||||
name: video2audio
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: video2audio
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
---
|
|
||||||
# Source: docsum/charts/tgi/templates/service.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: docsum-multimedia2text
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: multimedia2text-1.0.0
|
|
||||||
app.kubernetes.io/name: multimedia2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "2.1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- port: 7079
|
|
||||||
targetPort: 7079
|
|
||||||
protocol: TCP
|
|
||||||
name: multimedia2text
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: multimedia2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
---
|
|
||||||
# Source: docsum/charts/docsum-ui/templates/service.yaml
|
# Source: docsum/charts/docsum-ui/templates/service.yaml
|
||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
@@ -801,214 +665,3 @@ spec:
|
|||||||
volumes:
|
volumes:
|
||||||
- name: tmp
|
- name: tmp
|
||||||
emptyDir: {}
|
emptyDir: {}
|
||||||
---
|
|
||||||
# Source: docsum/charts/audio2text/templates/deployment.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: docsum-audio2text
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: audio2text-1.0.0
|
|
||||||
app.kubernetes.io/name: audio2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "v1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/name: audio2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/name: audio2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
spec:
|
|
||||||
securityContext:
|
|
||||||
{}
|
|
||||||
containers:
|
|
||||||
- name: docsum
|
|
||||||
envFrom:
|
|
||||||
- configMapRef:
|
|
||||||
name: docsum-audio2text-config
|
|
||||||
securityContext:
|
|
||||||
allowPrivilegeEscalation: false
|
|
||||||
capabilities:
|
|
||||||
drop:
|
|
||||||
- ALL
|
|
||||||
readOnlyRootFilesystem: false
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 1000
|
|
||||||
seccompProfile:
|
|
||||||
type: RuntimeDefault
|
|
||||||
image: "opea/dataprep-audio2text:latest"
|
|
||||||
imagePullPolicy: IfNotPresent
|
|
||||||
ports:
|
|
||||||
- name: audio2text
|
|
||||||
containerPort: 9199
|
|
||||||
protocol: TCP
|
|
||||||
volumeMounts:
|
|
||||||
- mountPath: /tmp
|
|
||||||
name: tmp
|
|
||||||
resources:
|
|
||||||
{}
|
|
||||||
volumes:
|
|
||||||
- name: tmp
|
|
||||||
emptyDir: {}
|
|
||||||
---
|
|
||||||
# Source: docsum/charts/video2audio/templates/deployment.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: docsum-video2audio
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: video2audio-1.0.0
|
|
||||||
app.kubernetes.io/name: video2audio
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "v1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/name: video2audio
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/name: video2audio
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
spec:
|
|
||||||
securityContext:
|
|
||||||
{}
|
|
||||||
containers:
|
|
||||||
- name: docsum
|
|
||||||
envFrom:
|
|
||||||
- configMapRef:
|
|
||||||
name: docsum-video2audio-config
|
|
||||||
securityContext:
|
|
||||||
allowPrivilegeEscalation: false
|
|
||||||
capabilities:
|
|
||||||
drop:
|
|
||||||
- ALL
|
|
||||||
readOnlyRootFilesystem: false
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 1000
|
|
||||||
seccompProfile:
|
|
||||||
type: RuntimeDefault
|
|
||||||
image: "opea/dataprep-video2audio:latest"
|
|
||||||
imagePullPolicy: IfNotPresent
|
|
||||||
ports:
|
|
||||||
- name: video2audio
|
|
||||||
containerPort: 7078
|
|
||||||
protocol: TCP
|
|
||||||
volumeMounts:
|
|
||||||
- mountPath: /tmp
|
|
||||||
name: tmp
|
|
||||||
livenessProbe:
|
|
||||||
failureThreshold: 24
|
|
||||||
httpGet:
|
|
||||||
path: v1/health_check
|
|
||||||
port: video2audio
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: v1/health_check
|
|
||||||
port: video2audio
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
startupProbe:
|
|
||||||
failureThreshold: 120
|
|
||||||
httpGet:
|
|
||||||
path: v1/health_check
|
|
||||||
port: video2audio
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
resources:
|
|
||||||
{}
|
|
||||||
volumes:
|
|
||||||
- name: tmp
|
|
||||||
emptyDir: {}
|
|
||||||
---
|
|
||||||
# Source: docsum/charts/multimedia2text/templates/deployment.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: docsum-multimedia2text
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: multimedia2text-1.0.0
|
|
||||||
app.kubernetes.io/name: multimedia2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "v1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/name: multimedia2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/name: multimedia2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
spec:
|
|
||||||
securityContext:
|
|
||||||
{}
|
|
||||||
containers:
|
|
||||||
- name: docsum
|
|
||||||
envFrom:
|
|
||||||
- configMapRef:
|
|
||||||
name: docsum-multimedia2text-config
|
|
||||||
securityContext:
|
|
||||||
allowPrivilegeEscalation: false
|
|
||||||
capabilities:
|
|
||||||
drop:
|
|
||||||
- ALL
|
|
||||||
readOnlyRootFilesystem: false
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 1000
|
|
||||||
seccompProfile:
|
|
||||||
type: RuntimeDefault
|
|
||||||
image: "opea/dataprep-multimedia2text:latest"
|
|
||||||
imagePullPolicy: IfNotPresent
|
|
||||||
ports:
|
|
||||||
- name: multimedia2text
|
|
||||||
containerPort: 7079
|
|
||||||
protocol: TCP
|
|
||||||
volumeMounts:
|
|
||||||
- mountPath: /tmp
|
|
||||||
name: tmp
|
|
||||||
livenessProbe:
|
|
||||||
failureThreshold: 24
|
|
||||||
httpGet:
|
|
||||||
path: v1/health_check
|
|
||||||
port: multimedia2text
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: v1/health_check
|
|
||||||
port: multimedia2text
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
startupProbe:
|
|
||||||
failureThreshold: 120
|
|
||||||
httpGet:
|
|
||||||
path: v1/health_check
|
|
||||||
port: multimedia2text
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
resources:
|
|
||||||
{}
|
|
||||||
volumes:
|
|
||||||
- name: tmp
|
|
||||||
emptyDir: {}
|
|
||||||
|
|||||||
@@ -136,66 +136,30 @@ data:
|
|||||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||||
HF_HOME: "/tmp/.cache/huggingface"
|
HF_HOME: "/tmp/.cache/huggingface"
|
||||||
---
|
---
|
||||||
# Source: docsum/charts/tgi/templates/configmap.yaml
|
# Source: docsum/charts/tgi/templates/service.yaml
|
||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
name: docsum-audio2text-config
|
name: docsum-whisper
|
||||||
labels:
|
labels:
|
||||||
helm.sh/chart: audio2text-1.0.0
|
helm.sh/chart: whisper-1.0.0
|
||||||
app.kubernetes.io/name: audio2text
|
app.kubernetes.io/name: whisper
|
||||||
app.kubernetes.io/instance: docsum
|
app.kubernetes.io/instance: docsum
|
||||||
app.kubernetes.io/version: "2.1.0"
|
app.kubernetes.io/version: "2.1.0"
|
||||||
app.kubernetes.io/managed-by: Helm
|
app.kubernetes.io/managed-by: Helm
|
||||||
data:
|
spec:
|
||||||
http_proxy: ""
|
type: ClusterIP
|
||||||
https_proxy: ""
|
ports:
|
||||||
no_proxy: ""
|
- port: 7066
|
||||||
A2T_ENDPOINT: "http://docsum-whisper"
|
targetPort: 7066
|
||||||
---
|
protocol: TCP
|
||||||
# Source: docsum/charts/tgi/templates/configmap.yaml
|
name: whisper
|
||||||
# Copyright (C) 2024 Intel Corporation
|
selector:
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
app.kubernetes.io/name: whisper
|
||||||
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: docsum-video2audio-config
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: video2audio-1.0.0
|
|
||||||
app.kubernetes.io/name: video2audio
|
|
||||||
app.kubernetes.io/instance: docsum
|
app.kubernetes.io/instance: docsum
|
||||||
app.kubernetes.io/version: "2.1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
data:
|
|
||||||
http_proxy: ""
|
|
||||||
https_proxy: ""
|
|
||||||
no_proxy: ""
|
|
||||||
V2A_ENDPOINT: "http://docsum-video2audio"
|
|
||||||
---
|
|
||||||
# Source: docsum/charts/tgi/templates/configmap.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: docsum-multimedia2text-config
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: multimedia2text-1.0.0
|
|
||||||
app.kubernetes.io/name: multimedia2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "2.1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
data:
|
|
||||||
http_proxy: ""
|
|
||||||
https_proxy: ""
|
|
||||||
no_proxy: ""
|
|
||||||
V2A_ENDPOINT: "http://docsum-video2audio"
|
|
||||||
A2T_ENDPOINT: "http://docsum-whisper"
|
|
||||||
---
|
---
|
||||||
# Source: docsum/charts/docsum-ui/templates/service.yaml
|
# Source: docsum/charts/docsum-ui/templates/service.yaml
|
||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
@@ -288,106 +252,6 @@ spec:
|
|||||||
app: docsum-nginx
|
app: docsum-nginx
|
||||||
type: NodePort
|
type: NodePort
|
||||||
---
|
---
|
||||||
# Source: docsum/charts/tgi/templates/service.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: docsum-whisper
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: whisper-1.0.0
|
|
||||||
app.kubernetes.io/name: whisper
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "2.1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- port: 7066
|
|
||||||
targetPort: 7066
|
|
||||||
protocol: TCP
|
|
||||||
name: whisper
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: whisper
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
---
|
|
||||||
# Source: docsum/charts/tgi/templates/service.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: docsum-audio2text
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: audio2text-1.0.0
|
|
||||||
app.kubernetes.io/name: audio2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "2.1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- port: 9099
|
|
||||||
targetPort: 9199
|
|
||||||
protocol: TCP
|
|
||||||
name: audio2text
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: audio2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
---
|
|
||||||
# Source: docsum/charts/tgi/templates/service.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: docsum-video2audio
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: video2audio-1.0.0
|
|
||||||
app.kubernetes.io/name: video2audio
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "2.1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- port: 7078
|
|
||||||
targetPort: 7078
|
|
||||||
protocol: TCP
|
|
||||||
name: video2audio
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: video2audio
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
---
|
|
||||||
# Source: docsum/charts/tgi/templates/service.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: docsum-multimedia2text
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: multimedia2text-1.0.0
|
|
||||||
app.kubernetes.io/name: multimedia2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "2.1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- port: 7079
|
|
||||||
targetPort: 7079
|
|
||||||
protocol: TCP
|
|
||||||
name: multimedia2text
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: multimedia2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
---
|
|
||||||
# Source: docsum/templates/service.yaml
|
# Source: docsum/templates/service.yaml
|
||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
@@ -805,214 +669,3 @@ spec:
|
|||||||
volumes:
|
volumes:
|
||||||
- name: tmp
|
- name: tmp
|
||||||
emptyDir: {}
|
emptyDir: {}
|
||||||
---
|
|
||||||
# Source: docsum/charts/audio2text/templates/deployment.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: docsum-audio2text
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: audio2text-1.0.0
|
|
||||||
app.kubernetes.io/name: audio2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "v1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/name: audio2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/name: audio2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
spec:
|
|
||||||
securityContext:
|
|
||||||
{}
|
|
||||||
containers:
|
|
||||||
- name: docsum
|
|
||||||
envFrom:
|
|
||||||
- configMapRef:
|
|
||||||
name: docsum-audio2text-config
|
|
||||||
securityContext:
|
|
||||||
allowPrivilegeEscalation: false
|
|
||||||
capabilities:
|
|
||||||
drop:
|
|
||||||
- ALL
|
|
||||||
readOnlyRootFilesystem: false
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 1000
|
|
||||||
seccompProfile:
|
|
||||||
type: RuntimeDefault
|
|
||||||
image: "opea/dataprep-audio2text:latest"
|
|
||||||
imagePullPolicy: IfNotPresent
|
|
||||||
ports:
|
|
||||||
- name: audio2text
|
|
||||||
containerPort: 9199
|
|
||||||
protocol: TCP
|
|
||||||
volumeMounts:
|
|
||||||
- mountPath: /tmp
|
|
||||||
name: tmp
|
|
||||||
resources:
|
|
||||||
{}
|
|
||||||
volumes:
|
|
||||||
- name: tmp
|
|
||||||
emptyDir: {}
|
|
||||||
---
|
|
||||||
# Source: docsum/charts/video2audio/templates/deployment.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: docsum-video2audio
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: video2audio-1.0.0
|
|
||||||
app.kubernetes.io/name: video2audio
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "v1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/name: video2audio
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/name: video2audio
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
spec:
|
|
||||||
securityContext:
|
|
||||||
{}
|
|
||||||
containers:
|
|
||||||
- name: docsum
|
|
||||||
envFrom:
|
|
||||||
- configMapRef:
|
|
||||||
name: docsum-video2audio-config
|
|
||||||
securityContext:
|
|
||||||
allowPrivilegeEscalation: false
|
|
||||||
capabilities:
|
|
||||||
drop:
|
|
||||||
- ALL
|
|
||||||
readOnlyRootFilesystem: false
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 1000
|
|
||||||
seccompProfile:
|
|
||||||
type: RuntimeDefault
|
|
||||||
image: "opea/dataprep-video2audio:latest"
|
|
||||||
imagePullPolicy: IfNotPresent
|
|
||||||
ports:
|
|
||||||
- name: video2audio
|
|
||||||
containerPort: 7078
|
|
||||||
protocol: TCP
|
|
||||||
volumeMounts:
|
|
||||||
- mountPath: /tmp
|
|
||||||
name: tmp
|
|
||||||
livenessProbe:
|
|
||||||
failureThreshold: 24
|
|
||||||
httpGet:
|
|
||||||
path: v1/health_check
|
|
||||||
port: video2audio
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: v1/health_check
|
|
||||||
port: video2audio
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
startupProbe:
|
|
||||||
failureThreshold: 120
|
|
||||||
httpGet:
|
|
||||||
path: v1/health_check
|
|
||||||
port: video2audio
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
resources:
|
|
||||||
{}
|
|
||||||
volumes:
|
|
||||||
- name: tmp
|
|
||||||
emptyDir: {}
|
|
||||||
---
|
|
||||||
# Source: docsum/charts/multimedia2text/templates/deployment.yaml
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: docsum-multimedia2text
|
|
||||||
labels:
|
|
||||||
helm.sh/chart: multimedia2text-1.0.0
|
|
||||||
app.kubernetes.io/name: multimedia2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
app.kubernetes.io/version: "v1.0"
|
|
||||||
app.kubernetes.io/managed-by: Helm
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/name: multimedia2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/name: multimedia2text
|
|
||||||
app.kubernetes.io/instance: docsum
|
|
||||||
spec:
|
|
||||||
securityContext:
|
|
||||||
{}
|
|
||||||
containers:
|
|
||||||
- name: docsum
|
|
||||||
envFrom:
|
|
||||||
- configMapRef:
|
|
||||||
name: docsum-multimedia2text-config
|
|
||||||
securityContext:
|
|
||||||
allowPrivilegeEscalation: false
|
|
||||||
capabilities:
|
|
||||||
drop:
|
|
||||||
- ALL
|
|
||||||
readOnlyRootFilesystem: false
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 1000
|
|
||||||
seccompProfile:
|
|
||||||
type: RuntimeDefault
|
|
||||||
image: "opea/dataprep-multimedia2text:latest"
|
|
||||||
imagePullPolicy: IfNotPresent
|
|
||||||
ports:
|
|
||||||
- name: multimedia2text
|
|
||||||
containerPort: 7079
|
|
||||||
protocol: TCP
|
|
||||||
volumeMounts:
|
|
||||||
- mountPath: /tmp
|
|
||||||
name: tmp
|
|
||||||
livenessProbe:
|
|
||||||
failureThreshold: 24
|
|
||||||
httpGet:
|
|
||||||
path: v1/health_check
|
|
||||||
port: multimedia2text
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: v1/health_check
|
|
||||||
port: multimedia2text
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
startupProbe:
|
|
||||||
failureThreshold: 120
|
|
||||||
httpGet:
|
|
||||||
path: v1/health_check
|
|
||||||
port: multimedia2text
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
resources:
|
|
||||||
{}
|
|
||||||
volumes:
|
|
||||||
- name: tmp
|
|
||||||
emptyDir: {}
|
|
||||||
|
|||||||
@@ -21,20 +21,10 @@ export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
|||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||||
|
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
||||||
export no_proxy="${no_proxy},${host_ip}"
|
export no_proxy="${no_proxy},${host_ip}"
|
||||||
|
|
||||||
export V2A_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export V2A_ENDPOINT=http://$host_ip:7078
|
|
||||||
|
|
||||||
export A2T_ENDPOINT=http://$host_ip:7066
|
|
||||||
export A2T_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export A2T_SERVICE_PORT=9199
|
|
||||||
|
|
||||||
export DATA_ENDPOINT=http://$host_ip:7079
|
|
||||||
export DATA_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export DATA_SERVICE_PORT=7079
|
|
||||||
|
|
||||||
WORKPATH=$(dirname "$PWD")
|
WORKPATH=$(dirname "$PWD")
|
||||||
LOG_PATH="$WORKPATH/tests"
|
LOG_PATH="$WORKPATH/tests"
|
||||||
|
|
||||||
@@ -47,7 +37,7 @@ function build_docker_images() {
|
|||||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="docsum docsum-gradio-ui whisper dataprep-multimedia2text dataprep-audio2text dataprep-video2audio llm-docsum-tgi"
|
service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi"
|
||||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||||
|
|
||||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||||
@@ -187,46 +177,6 @@ function validate_microservices() {
|
|||||||
"whisper-server" \
|
"whisper-server" \
|
||||||
"{\"audio\": \"$(input_data_for_test "audio")\"}"
|
"{\"audio\": \"$(input_data_for_test "audio")\"}"
|
||||||
|
|
||||||
# Audio2Text service
|
|
||||||
validate_services_json \
|
|
||||||
"${host_ip}:9199/v1/audio/transcriptions" \
|
|
||||||
'"query":"well"' \
|
|
||||||
"dataprep-audio2text" \
|
|
||||||
"dataprep-audio2text-server" \
|
|
||||||
"{\"byte_str\": \"$(input_data_for_test "audio")\"}"
|
|
||||||
|
|
||||||
# Video2Audio service
|
|
||||||
validate_services_json \
|
|
||||||
"${host_ip}:7078/v1/video2audio" \
|
|
||||||
"SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU4LjI5LjEwMAAAAAAAAAAAAAAA//tQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAASW5mbwAAAA8AAAAIAAAN3wAtLS0tLS0tLS0tLS1LS0tLS0tLS0tLS0tpaWlpaWlpaWlpaWlph4eHh4eHh4eHh4eHpaWlpaWlpaWlpaWlpcPDw8PDw8PDw8PDw+Hh4eHh4eHh4eHh4eH///////////////8AAAAATGF2YzU4LjU0AAAAAAAAAAAAAAAAJAYwAAAAAAAADd95t4qPAAAAAAAAAAAAAAAAAAAAAP/7kGQAAAMhClSVMEACMOAabaCMAREA" \
|
|
||||||
"dataprep-video2audio" \
|
|
||||||
"dataprep-video2audio-server" \
|
|
||||||
"{\"byte_str\": \"$(input_data_for_test "video")\"}"
|
|
||||||
|
|
||||||
# Docsum Data service - video
|
|
||||||
validate_services_json \
|
|
||||||
"${host_ip}:7079/v1/multimedia2text" \
|
|
||||||
"well" \
|
|
||||||
"dataprep-multimedia2text" \
|
|
||||||
"dataprep-multimedia2text" \
|
|
||||||
"{\"video\": \"$(input_data_for_test "video")\"}"
|
|
||||||
|
|
||||||
# Docsum Data service - audio
|
|
||||||
validate_services_json \
|
|
||||||
"${host_ip}:7079/v1/multimedia2text" \
|
|
||||||
"well" \
|
|
||||||
"dataprep-multimedia2text" \
|
|
||||||
"dataprep-multimedia2text" \
|
|
||||||
"{\"audio\": \"$(input_data_for_test "audio")\"}"
|
|
||||||
|
|
||||||
# Docsum Data service - text
|
|
||||||
validate_services_json \
|
|
||||||
"${host_ip}:7079/v1/multimedia2text" \
|
|
||||||
"THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco" \
|
|
||||||
"dataprep-multimedia2text" \
|
|
||||||
"dataprep-multimedia2text" \
|
|
||||||
"{\"text\": \"$(input_data_for_test "text")\"}"
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function validate_megaservice_text() {
|
function validate_megaservice_text() {
|
||||||
|
|||||||
@@ -29,24 +29,17 @@ export DOCSUM_BACKEND_SERVER_PORT="8888"
|
|||||||
export DOCSUM_FRONTEND_PORT="5552"
|
export DOCSUM_FRONTEND_PORT="5552"
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||||
|
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum"
|
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum"
|
||||||
export DOCSUM_CARD_ID="card1"
|
export DOCSUM_CARD_ID="card1"
|
||||||
export DOCSUM_RENDER_ID="renderD136"
|
export DOCSUM_RENDER_ID="renderD136"
|
||||||
export V2A_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export V2A_ENDPOINT=http://${host_ip}:7078
|
|
||||||
export A2T_ENDPOINT=http://${host_ip}:7066
|
|
||||||
export A2T_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export A2T_SERVICE_PORT=9099
|
|
||||||
export DATA_ENDPOINT=http://${host_ip}:7079
|
|
||||||
export DATA_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export DATA_SERVICE_PORT=7079
|
|
||||||
|
|
||||||
function build_docker_images() {
|
function build_docker_images() {
|
||||||
cd $WORKPATH/docker_image_build
|
cd $WORKPATH/docker_image_build
|
||||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="docsum docsum-gradio-ui whisper dataprep-multimedia2text dataprep-audio2text dataprep-video2audio llm-docsum-tgi"
|
service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi"
|
||||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||||
|
|
||||||
docker pull ghcr.io/huggingface/text-generation-inference:1.4
|
docker pull ghcr.io/huggingface/text-generation-inference:1.4
|
||||||
@@ -141,46 +134,6 @@ function validate_microservices() {
|
|||||||
"whisper-service" \
|
"whisper-service" \
|
||||||
"{\"audio\": \"$(input_data_for_test "audio")\"}"
|
"{\"audio\": \"$(input_data_for_test "audio")\"}"
|
||||||
|
|
||||||
# Audio2Text service
|
|
||||||
validate_services \
|
|
||||||
"${host_ip}:9099/v1/audio/transcriptions" \
|
|
||||||
'"query":"well"' \
|
|
||||||
"dataprep-audio2text" \
|
|
||||||
"dataprep-audio2text-service" \
|
|
||||||
"{\"byte_str\": \"$(input_data_for_test "audio")\"}"
|
|
||||||
|
|
||||||
# Video2Audio service
|
|
||||||
validate_services \
|
|
||||||
"${host_ip}:7078/v1/video2audio" \
|
|
||||||
"SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU4LjI5LjEwMAAAAAAAAAAAAAAA//tQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAASW5mbwAAAA8AAAAIAAAN3wAtLS0tLS0tLS0tLS1LS0tLS0tLS0tLS0tpaWlpaWlpaWlpaWlph4eHh4eHh4eHh4eHpaWlpaWlpaWlpaWlpcPDw8PDw8PDw8PDw+Hh4eHh4eHh4eHh4eH///////////////8AAAAATGF2YzU4LjU0AAAAAAAAAAAAAAAAJAYwAAAAAAAADd95t4qPAAAAAAAAAAAAAAAAAAAAAP/7kGQAAAMhClSVMEACMOAabaCMAREA" \
|
|
||||||
"dataprep-video2audio" \
|
|
||||||
"dataprep-video2audio-service" \
|
|
||||||
"{\"byte_str\": \"$(input_data_for_test "video")\"}"
|
|
||||||
|
|
||||||
# Docsum Data service - video
|
|
||||||
validate_services \
|
|
||||||
"${host_ip}:7079/v1/multimedia2text" \
|
|
||||||
"well" \
|
|
||||||
"dataprep-multimedia2text-service" \
|
|
||||||
"dataprep-multimedia2text" \
|
|
||||||
"{\"video\": \"$(input_data_for_test "video")\"}"
|
|
||||||
|
|
||||||
# Docsum Data service - audio
|
|
||||||
validate_services \
|
|
||||||
"${host_ip}:7079/v1/multimedia2text" \
|
|
||||||
"well" \
|
|
||||||
"dataprep-multimedia2text-service" \
|
|
||||||
"dataprep-multimedia2text" \
|
|
||||||
"{\"audio\": \"$(input_data_for_test "audio")\"}"
|
|
||||||
|
|
||||||
# Docsum Data service - text
|
|
||||||
validate_services \
|
|
||||||
"${host_ip}:7079/v1/multimedia2text" \
|
|
||||||
"THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco" \
|
|
||||||
"dataprep-multimedia2text-service" \
|
|
||||||
"dataprep-multimedia2text" \
|
|
||||||
"{\"text\": \"$(input_data_for_test "text")\"}"
|
|
||||||
|
|
||||||
# tgi for llm service
|
# tgi for llm service
|
||||||
validate_services \
|
validate_services \
|
||||||
"${host_ip}:8008/generate" \
|
"${host_ip}:8008/generate" \
|
||||||
|
|||||||
@@ -21,20 +21,10 @@ export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
|||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||||
|
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
||||||
export no_proxy="${no_proxy},${host_ip}"
|
export no_proxy="${no_proxy},${host_ip}"
|
||||||
|
|
||||||
export V2A_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export V2A_ENDPOINT=http://$host_ip:7078
|
|
||||||
|
|
||||||
export A2T_ENDPOINT=http://$host_ip:7066
|
|
||||||
export A2T_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export A2T_SERVICE_PORT=9099
|
|
||||||
|
|
||||||
export DATA_ENDPOINT=http://$host_ip:7079
|
|
||||||
export DATA_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export DATA_SERVICE_PORT=7079
|
|
||||||
|
|
||||||
WORKPATH=$(dirname "$PWD")
|
WORKPATH=$(dirname "$PWD")
|
||||||
LOG_PATH="$WORKPATH/tests"
|
LOG_PATH="$WORKPATH/tests"
|
||||||
|
|
||||||
@@ -46,7 +36,7 @@ function build_docker_images() {
|
|||||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="docsum docsum-gradio-ui whisper dataprep-multimedia2text dataprep-audio2text dataprep-video2audio llm-docsum-tgi"
|
service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi"
|
||||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||||
|
|
||||||
docker pull ghcr.io/huggingface/text-generation-inference:1.4
|
docker pull ghcr.io/huggingface/text-generation-inference:1.4
|
||||||
@@ -189,46 +179,6 @@ function validate_microservices() {
|
|||||||
"whisper-server" \
|
"whisper-server" \
|
||||||
"{\"audio\": \"$(input_data_for_test "audio")\"}"
|
"{\"audio\": \"$(input_data_for_test "audio")\"}"
|
||||||
|
|
||||||
# Audio2Text service
|
|
||||||
validate_services_json \
|
|
||||||
"${host_ip}:9099/v1/audio/transcriptions" \
|
|
||||||
'"query":"well"' \
|
|
||||||
"dataprep-audio2text" \
|
|
||||||
"dataprep-audio2text-server" \
|
|
||||||
"{\"byte_str\": \"$(input_data_for_test "audio")\"}"
|
|
||||||
|
|
||||||
# Video2Audio service
|
|
||||||
validate_services_json \
|
|
||||||
"${host_ip}:7078/v1/video2audio" \
|
|
||||||
"SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU4LjI5LjEwMAAAAAAAAAAAAAAA//tQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAASW5mbwAAAA8AAAAIAAAN3wAtLS0tLS0tLS0tLS1LS0tLS0tLS0tLS0tpaWlpaWlpaWlpaWlph4eHh4eHh4eHh4eHpaWlpaWlpaWlpaWlpcPDw8PDw8PDw8PDw+Hh4eHh4eHh4eHh4eH///////////////8AAAAATGF2YzU4LjU0AAAAAAAAAAAAAAAAJAYwAAAAAAAADd95t4qPAAAAAAAAAAAAAAAAAAAAAP/7kGQAAAMhClSVMEACMOAabaCMAREA" \
|
|
||||||
"dataprep-video2audio" \
|
|
||||||
"dataprep-video2audio-server" \
|
|
||||||
"{\"byte_str\": \"$(input_data_for_test "video")\"}"
|
|
||||||
|
|
||||||
# Docsum Data service - video
|
|
||||||
validate_services_json \
|
|
||||||
"${host_ip}:7079/v1/multimedia2text" \
|
|
||||||
"well" \
|
|
||||||
"dataprep-multimedia2text" \
|
|
||||||
"dataprep-multimedia2text" \
|
|
||||||
"{\"video\": \"$(input_data_for_test "video")\"}"
|
|
||||||
|
|
||||||
# Docsum Data service - audio
|
|
||||||
validate_services_json \
|
|
||||||
"${host_ip}:7079/v1/multimedia2text" \
|
|
||||||
"well" \
|
|
||||||
"dataprep-multimedia2text" \
|
|
||||||
"dataprep-multimedia2text" \
|
|
||||||
"{\"audio\": \"$(input_data_for_test "audio")\"}"
|
|
||||||
|
|
||||||
# Docsum Data service - text
|
|
||||||
validate_services_json \
|
|
||||||
"${host_ip}:7079/v1/multimedia2text" \
|
|
||||||
"THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco" \
|
|
||||||
"dataprep-multimedia2text" \
|
|
||||||
"dataprep-multimedia2text" \
|
|
||||||
"{\"text\": \"$(input_data_for_test "text")\"}"
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function validate_megaservice_text() {
|
function validate_megaservice_text() {
|
||||||
|
|||||||
@@ -77,7 +77,7 @@ class DocSumUI:
|
|||||||
"""
|
"""
|
||||||
logger.info(">>> Reading audio file: %s", file.name)
|
logger.info(">>> Reading audio file: %s", file.name)
|
||||||
base64_str = self.encode_file_to_base64(file)
|
base64_str = self.encode_file_to_base64(file)
|
||||||
return self.generate_summary(base64_str, document_type="audio")
|
return base64_str
|
||||||
|
|
||||||
def read_video_file(self, file):
|
def read_video_file(self, file):
|
||||||
"""Read and process the content of a video file.
|
"""Read and process the content of a video file.
|
||||||
@@ -90,7 +90,7 @@ class DocSumUI:
|
|||||||
"""
|
"""
|
||||||
logger.info(">>> Reading video file: %s", file.name)
|
logger.info(">>> Reading video file: %s", file.name)
|
||||||
base64_str = self.encode_file_to_base64(file)
|
base64_str = self.encode_file_to_base64(file)
|
||||||
return self.generate_summary(base64_str, document_type="video")
|
return base64_str
|
||||||
|
|
||||||
def is_valid_url(self, url):
|
def is_valid_url(self, url):
|
||||||
try:
|
try:
|
||||||
@@ -193,7 +193,7 @@ class DocSumUI:
|
|||||||
|
|
||||||
return str(response.status_code)
|
return str(response.status_code)
|
||||||
|
|
||||||
def create_upload_ui(self, label, file_types, process_function):
|
def create_upload_ui(self, label, file_types, process_function, document_type="text"):
|
||||||
"""Create a Gradio UI for file uploads.
|
"""Create a Gradio UI for file uploads.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -213,7 +213,11 @@ class DocSumUI:
|
|||||||
generated_text = gr.TextArea(
|
generated_text = gr.TextArea(
|
||||||
label="Text Summary", placeholder="Summarized text will be displayed here"
|
label="Text Summary", placeholder="Summarized text will be displayed here"
|
||||||
)
|
)
|
||||||
upload_btn.upload(lambda file: self.generate_summary(process_function(file)), upload_btn, generated_text)
|
upload_btn.upload(
|
||||||
|
lambda file: self.generate_summary(process_function(file), document_type=document_type),
|
||||||
|
upload_btn,
|
||||||
|
generated_text,
|
||||||
|
)
|
||||||
return upload_ui
|
return upload_ui
|
||||||
|
|
||||||
def render(self):
|
def render(self):
|
||||||
@@ -269,11 +273,15 @@ class DocSumUI:
|
|||||||
label="Please upload audio file (.wav, .mp3)",
|
label="Please upload audio file (.wav, .mp3)",
|
||||||
file_types=[".wav", ".mp3"],
|
file_types=[".wav", ".mp3"],
|
||||||
process_function=self.read_audio_file,
|
process_function=self.read_audio_file,
|
||||||
|
document_type="audio",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Video Upload UI
|
# Video Upload UI
|
||||||
video_ui = self.create_upload_ui(
|
video_ui = self.create_upload_ui(
|
||||||
label="Please upload Video file (.mp4)", file_types=[".mp4"], process_function=self.read_video_file
|
label="Please upload Video file (.mp4)",
|
||||||
|
file_types=[".mp4"],
|
||||||
|
process_function=self.read_video_file,
|
||||||
|
document_type="video",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Render all the UI in separate tabs
|
# Render all the UI in separate tabs
|
||||||
|
|||||||
Reference in New Issue
Block a user