From a01729a5c2722d0eebb2ce1987e922bf4dcf4549 Mon Sep 17 00:00:00 2001 From: Sihan Chen <39623753+Spycsh@users.noreply.github.com> Date: Thu, 26 Dec 2024 14:45:17 +0800 Subject: [PATCH] Refactor DocSum example (#1286) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- DocSum/Dockerfile | 3 +- .../docker_compose/amd/gpu/rocm/compose.yaml | 31 +- DocSum/docker_compose/amd/gpu/rocm/set_env.sh | 8 - .../docker_compose/intel/cpu/xeon/README.md | 63 +-- .../intel/cpu/xeon/compose.yaml | 33 +- .../docker_compose/intel/hpu/gaudi/README.md | 59 +-- .../intel/hpu/gaudi/compose.yaml | 33 +- DocSum/docker_compose/set_env.sh | 14 +- DocSum/docker_image_build/build.yaml | 27 -- DocSum/docsum.py | 118 ++++-- .../intel/cpu/xeon/manifest/docsum.yaml | 347 ---------------- .../intel/hpu/gaudi/manifest/docsum.yaml | 375 +----------------- DocSum/tests/test_compose_on_gaudi.sh | 54 +-- DocSum/tests/test_compose_on_rocm.sh | 51 +-- DocSum/tests/test_compose_on_xeon.sh | 54 +-- DocSum/ui/gradio/docsum_ui_gradio.py | 18 +- 16 files changed, 145 insertions(+), 1143 deletions(-) diff --git a/DocSum/Dockerfile b/DocSum/Dockerfile index 183aff49d..27e08ee7a 100644 --- a/DocSum/Dockerfile +++ b/DocSum/Dockerfile @@ -6,7 +6,8 @@ FROM python:3.11-slim RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ libjemalloc-dev \ - git + git \ + ffmpeg RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ diff --git a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml index 933317001..fa36310ad 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml +++ b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml @@ -70,34 +70,6 @@ services: https_proxy: ${https_proxy} restart: unless-stopped - dataprep-audio2text: - image: ${REGISTRY:-opea}/dataprep-audio2text:${TAG:-latest} - container_name: dataprep-audio2text-service - ports: - - "9099:9099" - ipc: host - environment: - A2T_ENDPOINT: ${A2T_ENDPOINT} - - dataprep-video2audio: - image: ${REGISTRY:-opea}/dataprep-video2audio:${TAG:-latest} - container_name: dataprep-video2audio-service - ports: - - "7078:7078" - ipc: host - environment: - V2A_ENDPOINT: ${V2A_ENDPOINT} - - dataprep-multimedia2text: - image: ${REGISTRY:-opea}/dataprep-multimedia2text:${TAG:-latest} - container_name: dataprep-multimedia2text - ports: - - "7079:7079" - ipc: host - environment: - V2A_ENDPOINT: ${V2A_ENDPOINT} - A2T_ENDPOINT: ${A2T_ENDPOINT} - docsum-backend-server: image: ${REGISTRY:-opea}/docsum:${TAG:-latest} container_name: docsum-backend-server @@ -111,8 +83,9 @@ services: - https_proxy=${https_proxy} - http_proxy=${http_proxy} - MEGA_SERVICE_HOST_IP=${HOST_IP} - - DATA_SERVICE_HOST_IP=${DATA_SERVICE_HOST_IP} - LLM_SERVICE_HOST_IP=${HOST_IP} + - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP} + ipc: host restart: always diff --git a/DocSum/docker_compose/amd/gpu/rocm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm/set_env.sh index 0b48a19fb..797c6b8a4 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/set_env.sh +++ b/DocSum/docker_compose/amd/gpu/rocm/set_env.sh @@ -15,11 +15,3 @@ export DOCSUM_LLM_SERVER_PORT="9000" export DOCSUM_BACKEND_SERVER_PORT="8888" export DOCSUM_FRONTEND_PORT="5173" export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" -export V2A_SERVICE_HOST_IP=${host_ip} -export V2A_ENDPOINT=http://$host_ip:7078 -export A2T_ENDPOINT=http://$host_ip:7066 -export A2T_SERVICE_HOST_IP=${host_ip} -export A2T_SERVICE_PORT=9099 -export DATA_ENDPOINT=http://$host_ip:7079 -export DATA_SERVICE_HOST_IP=${host_ip} -export DATA_SERVICE_PORT=7079 diff --git a/DocSum/docker_compose/intel/cpu/xeon/README.md b/DocSum/docker_compose/intel/cpu/xeon/README.md index 3a3828bf2..5c579e82c 100644 --- a/DocSum/docker_compose/intel/cpu/xeon/README.md +++ b/DocSum/docker_compose/intel/cpu/xeon/README.md @@ -29,30 +29,6 @@ The Whisper Service converts audio files to text. Follow these steps to build an docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile . ``` -#### Audio to text Service - -The Audio to text Service is another service for converting audio to text. Follow these steps to build and run the service: - -```bash -docker build -t opea/dataprep-audio2text:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/audio2text/Dockerfile . -``` - -#### Video to Audio Service - -The Video to Audio Service extracts audio from video files. Follow these steps to build and run the service: - -```bash -docker build -t opea/dataprep-video2audio:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/video2audio/Dockerfile . -``` - -#### Multimedia to Text Service - -The Multimedia to Text Service transforms multimedia data to text data. Follow these steps to build and run the service: - -```bash -docker build -t opea/dataprep-multimedia2text:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/Dockerfile . -``` - ### 2. Build MegaService Docker Image To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: @@ -149,9 +125,6 @@ You will have the following Docker Images: 2. `opea/docsum:latest` 3. `opea/llm-docsum-tgi:latest` 4. `opea/whisper:latest` -5. `opea/dataprep-audio2text:latest` -6. `opea/dataprep-multimedia2text:latest` -7. `opea/dataprep-video2audio:latest` ### Validate Microservices @@ -188,37 +161,7 @@ You will have the following Docker Images: {"asr_result":"you"} ``` -4. Audio2Text Microservice - - ```bash - curl http://${host_ip}:9099/v1/audio/transcriptions \ - -X POST \ - -d '{"byte_str":"UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \ - -H 'Content-Type: application/json' - ``` - - Expected output: - - ```bash - {"downstream_black_list":[],"id":"--> this will be different id number for each run <--","query":"you"} - ``` - -5. Multimedia to text Microservice - - ```bash - curl http://${host_ip}:7079/v1/multimedia2text \ - -X POST \ - -d '{"audio":"UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \ - -H 'Content-Type: application/json' - ``` - - Expected output: - - ```bash - {"downstream_black_list":[],"id":"--> this will be different id number for each run <--","query":"you"} - ``` - -6. MegaService +4. MegaService Text: @@ -257,7 +200,7 @@ You will have the following Docker Images: -F "stream=true" ``` - > Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI. + > Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI. You can still pass base64 string of the audio or video file as follows: Audio: @@ -291,7 +234,7 @@ You will have the following Docker Images: -F "stream=true" ``` -7. MegaService with long context +5. MegaService with long context If you want to deal with long context, can set following parameters and select suitable summary type. diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml index a0285d9ce..42e89ee25 100644 --- a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml +++ b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml @@ -50,43 +50,12 @@ services: https_proxy: ${https_proxy} restart: unless-stopped - dataprep-audio2text: - image: ${REGISTRY:-opea}/dataprep-audio2text:${TAG:-latest} - container_name: dataprep-audio2text-server - ports: - - "9099:9099" - ipc: host - environment: - A2T_ENDPOINT: ${A2T_ENDPOINT} - - dataprep-video2audio: - image: ${REGISTRY:-opea}/dataprep-video2audio:${TAG:-latest} - container_name: dataprep-video2audio-server - ports: - - "7078:7078" - ipc: host - environment: - V2A_ENDPOINT: ${V2A_ENDPOINT} - - dataprep-multimedia2text: - image: ${REGISTRY:-opea}/dataprep-multimedia2text:${TAG:-latest} - container_name: dataprep-multimedia2text - ports: - - "7079:7079" - ipc: host - environment: - V2A_ENDPOINT: ${V2A_ENDPOINT} - A2T_ENDPOINT: ${A2T_ENDPOINT} - docsum-xeon-backend-server: image: ${REGISTRY:-opea}/docsum:${TAG:-latest} container_name: docsum-xeon-backend-server depends_on: - tgi-server - llm-docsum-tgi - - dataprep-multimedia2text - - dataprep-video2audio - - dataprep-audio2text ports: - "8888:8888" environment: @@ -94,8 +63,8 @@ services: - https_proxy=${https_proxy} - http_proxy=${http_proxy} - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} - - DATA_SERVICE_HOST_IP=${DATA_SERVICE_HOST_IP} - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP} ipc: host restart: always diff --git a/DocSum/docker_compose/intel/hpu/gaudi/README.md b/DocSum/docker_compose/intel/hpu/gaudi/README.md index e47ed2f43..5a9deec17 100644 --- a/DocSum/docker_compose/intel/hpu/gaudi/README.md +++ b/DocSum/docker_compose/intel/hpu/gaudi/README.md @@ -13,28 +13,12 @@ git clone https://github.com/opea-project/GenAIComps.git cd GenAIComps ``` -#### Audio to text Service +#### Whisper Service -The Audio to text Service is another service for converting audio to text. Follow these steps to build and run the service: +The Whisper Service converts audio files to text. Follow these steps to build and run the service: ```bash -docker build -t opea/dataprep-audio2text:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/audio2text/Dockerfile . -``` - -#### Video to Audio Service - -The Video to Audio Service extracts audio from video files. Follow these steps to build and run the service: - -```bash -docker build -t opea/dataprep-video2audio:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/video2audio/Dockerfile . -``` - -#### Multimedia to Text Service - -The Multimedia to Text Service transforms multimedia data to text data. Follow these steps to build and run the service: - -```bash -docker build -t opea/dataprep-multimedia2text:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/Dockerfile . +docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile . ``` ### 2. Build MegaService Docker Image @@ -133,9 +117,6 @@ You will have the following Docker Images: 2. `opea/docsum:latest` 3. `opea/llm-docsum-tgi:latest` 4. `opea/whisper:latest` -5. `opea/dataprep-audio2text:latest` -6. `opea/dataprep-multimedia2text:latest` -7. `opea/dataprep-video2audio:latest` ### Validate Microservices @@ -172,37 +153,7 @@ You will have the following Docker Images: {"asr_result":"you"} ``` -4. Audio2Text Microservice - - ```bash - curl http://${host_ip}:9199/v1/audio/transcriptions \ - -X POST \ - -d '{"byte_str":"UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \ - -H 'Content-Type: application/json' - ``` - - Expected output: - - ```bash - {"downstream_black_list":[],"id":"--> this will be different id number for each run <--","query":"you"} - ``` - -5. Multimedia to text Microservice - - ```bash - curl http://${host_ip}:7079/v1/multimedia2text \ - -X POST \ - -d '{"audio":"UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \ - -H 'Content-Type: application/json' - ``` - - Expected output: - - ```bash - {"downstream_black_list":[],"id":"--> this will be different id number for each run <--","query":"you"} - ``` - -6. MegaService +4. MegaService Text: @@ -274,7 +225,7 @@ You will have the following Docker Images: -F "stream=True" ``` -7. MegaService with long context +5. MegaService with long context If you want to deal with long context, can set following parameters and select suitable summary type. diff --git a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml index 78d2dba5a..e9ab3e163 100644 --- a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml @@ -62,43 +62,12 @@ services: - SYS_NICE restart: unless-stopped - dataprep-audio2text: - image: ${REGISTRY:-opea}/dataprep-audio2text:${TAG:-latest} - container_name: dataprep-audio2text-server - ports: - - "9199:9099" - ipc: host - environment: - A2T_ENDPOINT: ${A2T_ENDPOINT} - - dataprep-video2audio: - image: ${REGISTRY:-opea}/dataprep-video2audio:${TAG:-latest} - container_name: dataprep-video2audio-server - ports: - - "7078:7078" - ipc: host - environment: - V2A_ENDPOINT: ${V2A_ENDPOINT} - - dataprep-multimedia2text: - image: ${REGISTRY:-opea}/dataprep-multimedia2text:${TAG:-latest} - container_name: dataprep-multimedia2text - ports: - - "7079:7079" - ipc: host - environment: - V2A_ENDPOINT: ${V2A_ENDPOINT} - A2T_ENDPOINT: ${A2T_ENDPOINT} - docsum-gaudi-backend-server: image: ${REGISTRY:-opea}/docsum:${TAG:-latest} container_name: docsum-gaudi-backend-server depends_on: - tgi-server - llm-docsum-tgi - - dataprep-multimedia2text - - dataprep-video2audio - - dataprep-audio2text ports: - "8888:8888" environment: @@ -106,8 +75,8 @@ services: - https_proxy=${https_proxy} - http_proxy=${http_proxy} - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} - - DATA_SERVICE_HOST_IP=${DATA_SERVICE_HOST_IP} - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP} ipc: host restart: always diff --git a/DocSum/docker_compose/set_env.sh b/DocSum/docker_compose/set_env.sh index f48a48243..ffe52a04f 100644 --- a/DocSum/docker_compose/set_env.sh +++ b/DocSum/docker_compose/set_env.sh @@ -13,15 +13,7 @@ export no_proxy="${no_proxy},${host_ip}" export TGI_LLM_ENDPOINT="http://${host_ip}:8008" export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} +export ASR_SERVICE_HOST_IP=${host_ip} +export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" - -export V2A_SERVICE_HOST_IP=${host_ip} -export V2A_ENDPOINT=http://$host_ip:7078 - -export A2T_ENDPOINT=http://$host_ip:7066 -export A2T_SERVICE_HOST_IP=${host_ip} -export A2T_SERVICE_PORT=9099 - -export DATA_ENDPOINT=http://$host_ip:7079 -export DATA_SERVICE_HOST_IP=${host_ip} -export DATA_SERVICE_PORT=7079 diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index da777ebb7..9701c86d1 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -41,33 +41,6 @@ services: dockerfile: comps/asr/whisper/dependency/Dockerfile extends: docsum image: ${REGISTRY:-opea}/whisper:${TAG:-latest} - dataprep-multimedia2text: - build: - args: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - context: GenAIComps - dockerfile: comps/dataprep/multimedia2text/Dockerfile - extends: docsum - image: ${REGISTRY:-opea}/dataprep-multimedia2text:${TAG:-latest} - dataprep-audio2text: - build: - args: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - context: GenAIComps - dockerfile: comps/dataprep/multimedia2text/audio2text/Dockerfile - extends: docsum - image: ${REGISTRY:-opea}/dataprep-audio2text:${TAG:-latest} - dataprep-video2audio: - build: - args: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - context: GenAIComps - dockerfile: comps/dataprep/multimedia2text/video2audio/Dockerfile - extends: docsum - image: ${REGISTRY:-opea}/dataprep-video2audio:${TAG:-latest} llm-docsum-tgi: build: context: GenAIComps diff --git a/DocSum/docsum.py b/DocSum/docsum.py index 86ecf6979..a640c0f08 100644 --- a/DocSum/docsum.py +++ b/DocSum/docsum.py @@ -2,7 +2,10 @@ # SPDX-License-Identifier: Apache-2.0 import asyncio +import base64 import os +import subprocess +import uuid from typing import List from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType @@ -20,8 +23,8 @@ from fastapi.responses import StreamingResponse MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888)) -DATA_SERVICE_HOST_IP = os.getenv("DATA_SERVICE_HOST_IP", "0.0.0.0") -DATA_SERVICE_PORT = int(os.getenv("DATA_SERVICE_PORT", 7079)) +ASR_SERVICE_HOST_IP = os.getenv("ASR_SERVICE_HOST_IP", "0.0.0.0") +ASR_SERVICE_PORT = int(os.getenv("ASR_SERVICE_PORT", 7066)) LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0") LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000)) @@ -29,11 +32,20 @@ LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000)) def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs): if self.services[cur_node].service_type == ServiceType.LLM: + for key_to_replace in ["text", "asr_result"]: + if key_to_replace in inputs: + inputs["query"] = inputs[key_to_replace] + del inputs[key_to_replace] + docsum_parameters = kwargs.get("docsum_parameters", None) if docsum_parameters: docsum_parameters = docsum_parameters.model_dump() del docsum_parameters["query"] inputs.update(docsum_parameters) + elif self.services[cur_node].service_type == ServiceType.ASR: + if "video" in inputs: + audio_base64 = video2audio(inputs["video"]) + inputs["audio"] = audio_base64 return inputs @@ -45,6 +57,44 @@ def read_pdf(file): return docs +def video2audio( + video_base64: str, +) -> str: + """Convert a base64 video string to a base64 audio string using ffmpeg. + + Args: + video_base64 (str): Base64 encoded video string. + + Returns: + str: Base64 encoded audio string. + """ + video_data = base64.b64decode(video_base64) + + uid = str(uuid.uuid4()) + temp_video_path = f"{uid}.mp4" + temp_audio_path = f"{uid}.mp3" + with open(temp_video_path, "wb") as video_file: + video_file.write(video_data) + + try: + subprocess.run( + ["ffmpeg", "-i", temp_video_path, "-q:a", "0", "-map", "a", temp_audio_path], + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT, + ) + # Read the extracted audio file and encode it to base64 + with open(temp_audio_path, "rb") as audio_file: + audio_base64 = base64.b64encode(audio_file.read()).decode("utf-8") + + finally: + # Clean up the temporary video file + os.remove(temp_video_path) + os.remove(temp_audio_path) + + return audio_base64 + + def read_text_from_file(file, save_file_name): import docx2txt from langchain.text_splitter import CharacterTextSplitter @@ -78,17 +128,18 @@ class DocSumService: self.port = port ServiceOrchestrator.align_inputs = align_inputs self.megaservice = ServiceOrchestrator() + self.megaservice_text_only = ServiceOrchestrator() self.endpoint = str(MegaServiceEndpoint.DOC_SUMMARY) def add_remote_service(self): - data = MicroService( - name="multimedia2text", - host=DATA_SERVICE_HOST_IP, - port=DATA_SERVICE_PORT, - endpoint="/v1/multimedia2text", + asr = MicroService( + name="asr", + host=ASR_SERVICE_HOST_IP, + port=ASR_SERVICE_PORT, + endpoint="/v1/asr", use_remote_service=True, - service_type=ServiceType.DATAPREP, + service_type=ServiceType.ASR, ) llm = MicroService( @@ -100,10 +151,12 @@ class DocSumService: service_type=ServiceType.LLM, ) - self.megaservice.add(data).add(llm) - self.megaservice.flow_to(data, llm) + self.megaservice.add(asr).add(llm) + self.megaservice.flow_to(asr, llm) + self.megaservice_text_only.add(llm) async def handle_request(self, request: Request, files: List[UploadFile] = File(default=None)): + """Accept pure text, or files .txt/.pdf.docx, audio/video base64 string.""" if "application/json" in request.headers.get("content-type"): data = await request.json() @@ -129,11 +182,15 @@ class DocSumService: file_summaries = [] if files: for file in files: - file_path = f"/tmp/{file.filename}" + # Fix concurrency issue with the same file name + # https://github.com/opea-project/GenAIExamples/issues/1279 + uid = str(uuid.uuid4()) + file_path = f"/tmp/{uid}" if data_type is not None and data_type in ["audio", "video"]: raise ValueError( - "Audio and Video file uploads are not supported in docsum with curl request, please use the UI." + "Audio and Video file uploads are not supported in docsum with curl request, \ + please use the UI or pass base64 string of the content directly." ) else: @@ -181,19 +238,34 @@ class DocSumService: chunk_overlap=chunk_overlap, chunk_size=chunk_size, ) + text_only = "text" in initial_inputs_data + if not text_only: + result_dict, runtime_graph = await self.megaservice.schedule( + initial_inputs=initial_inputs_data, docsum_parameters=docsum_parameters + ) - result_dict, runtime_graph = await self.megaservice.schedule( - initial_inputs=initial_inputs_data, docsum_parameters=docsum_parameters - ) + for node, response in result_dict.items(): + # Here it suppose the last microservice in the megaservice is LLM. + if ( + isinstance(response, StreamingResponse) + and node == list(self.megaservice.services.keys())[-1] + and self.megaservice.services[node].service_type == ServiceType.LLM + ): + return response + else: + result_dict, runtime_graph = await self.megaservice_text_only.schedule( + initial_inputs=initial_inputs_data, docsum_parameters=docsum_parameters + ) + + for node, response in result_dict.items(): + # Here it suppose the last microservice in the megaservice is LLM. + if ( + isinstance(response, StreamingResponse) + and node == list(self.megaservice.services.keys())[-1] + and self.megaservice.services[node].service_type == ServiceType.LLM + ): + return response - for node, response in result_dict.items(): - # Here it suppose the last microservice in the megaservice is LLM. - if ( - isinstance(response, StreamingResponse) - and node == list(self.megaservice.services.keys())[-1] - and self.megaservice.services[node].service_type == ServiceType.LLM - ): - return response last_node = runtime_graph.all_leaves()[-1] response = result_dict[last_node]["text"] choices = [] diff --git a/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml b/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml index f51c01f2a..fe708a77e 100644 --- a/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml +++ b/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml @@ -135,67 +135,6 @@ data: HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" HF_HOME: "/tmp/.cache/huggingface" --- -# Source: docsum/charts/tgi/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: docsum-audio2text-config - labels: - helm.sh/chart: audio2text-1.0.0 - app.kubernetes.io/name: audio2text - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -data: - http_proxy: "" - https_proxy: "" - no_proxy: "" - A2T_ENDPOINT: "http://docsum-whisper" ---- -# Source: docsum/charts/tgi/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: docsum-video2audio-config - labels: - helm.sh/chart: video2audio-1.0.0 - app.kubernetes.io/name: video2audio - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -data: - http_proxy: "" - https_proxy: "" - no_proxy: "" - V2A_ENDPOINT: "http://docsum-video2audio" ---- -# Source: docsum/charts/tgi/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: docsum-multimedia2text-config - labels: - helm.sh/chart: multimedia2text-1.0.0 - app.kubernetes.io/name: multimedia2text - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -data: - http_proxy: "" - https_proxy: "" - no_proxy: "" - V2A_ENDPOINT: "http://docsum-video2audio" - A2T_ENDPOINT: "http://docsum-whisper" ---- # Source: docsum/charts/tgi/templates/service.yaml # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -221,81 +160,6 @@ spec: app.kubernetes.io/name: whisper app.kubernetes.io/instance: docsum --- -# Source: docsum/charts/tgi/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: docsum-audio2text - labels: - helm.sh/chart: audio2text-1.0.0 - app.kubernetes.io/name: audio2text - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 9099 - targetPort: 9199 - protocol: TCP - name: audio2text - selector: - app.kubernetes.io/name: audio2text - app.kubernetes.io/instance: docsum ---- -# Source: docsum/charts/tgi/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: docsum-video2audio - labels: - helm.sh/chart: video2audio-1.0.0 - app.kubernetes.io/name: video2audio - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 7078 - targetPort: 7078 - protocol: TCP - name: video2audio - selector: - app.kubernetes.io/name: video2audio - app.kubernetes.io/instance: docsum ---- -# Source: docsum/charts/tgi/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: docsum-multimedia2text - labels: - helm.sh/chart: multimedia2text-1.0.0 - app.kubernetes.io/name: multimedia2text - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 7079 - targetPort: 7079 - protocol: TCP - name: multimedia2text - selector: - app.kubernetes.io/name: multimedia2text - app.kubernetes.io/instance: docsum ---- # Source: docsum/charts/docsum-ui/templates/service.yaml # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -801,214 +665,3 @@ spec: volumes: - name: tmp emptyDir: {} ---- -# Source: docsum/charts/audio2text/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -apiVersion: apps/v1 -kind: Deployment -metadata: - name: docsum-audio2text - labels: - helm.sh/chart: audio2text-1.0.0 - app.kubernetes.io/name: audio2text - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: audio2text - app.kubernetes.io/instance: docsum - template: - metadata: - labels: - app.kubernetes.io/name: audio2text - app.kubernetes.io/instance: docsum - spec: - securityContext: - {} - containers: - - name: docsum - envFrom: - - configMapRef: - name: docsum-audio2text-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/dataprep-audio2text:latest" - imagePullPolicy: IfNotPresent - ports: - - name: audio2text - containerPort: 9199 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: docsum/charts/video2audio/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -apiVersion: apps/v1 -kind: Deployment -metadata: - name: docsum-video2audio - labels: - helm.sh/chart: video2audio-1.0.0 - app.kubernetes.io/name: video2audio - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: video2audio - app.kubernetes.io/instance: docsum - template: - metadata: - labels: - app.kubernetes.io/name: video2audio - app.kubernetes.io/instance: docsum - spec: - securityContext: - {} - containers: - - name: docsum - envFrom: - - configMapRef: - name: docsum-video2audio-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/dataprep-video2audio:latest" - imagePullPolicy: IfNotPresent - ports: - - name: video2audio - containerPort: 7078 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: video2audio - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: video2audio - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: video2audio - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: docsum/charts/multimedia2text/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -apiVersion: apps/v1 -kind: Deployment -metadata: - name: docsum-multimedia2text - labels: - helm.sh/chart: multimedia2text-1.0.0 - app.kubernetes.io/name: multimedia2text - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: multimedia2text - app.kubernetes.io/instance: docsum - template: - metadata: - labels: - app.kubernetes.io/name: multimedia2text - app.kubernetes.io/instance: docsum - spec: - securityContext: - {} - containers: - - name: docsum - envFrom: - - configMapRef: - name: docsum-multimedia2text-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/dataprep-multimedia2text:latest" - imagePullPolicy: IfNotPresent - ports: - - name: multimedia2text - containerPort: 7079 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: multimedia2text - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: multimedia2text - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: multimedia2text - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} diff --git a/DocSum/kubernetes/intel/hpu/gaudi/manifest/docsum.yaml b/DocSum/kubernetes/intel/hpu/gaudi/manifest/docsum.yaml index 9eae01e68..c3d1128ab 100644 --- a/DocSum/kubernetes/intel/hpu/gaudi/manifest/docsum.yaml +++ b/DocSum/kubernetes/intel/hpu/gaudi/manifest/docsum.yaml @@ -136,66 +136,30 @@ data: HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" HF_HOME: "/tmp/.cache/huggingface" --- -# Source: docsum/charts/tgi/templates/configmap.yaml +# Source: docsum/charts/tgi/templates/service.yaml # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 -kind: ConfigMap +kind: Service metadata: - name: docsum-audio2text-config + name: docsum-whisper labels: - helm.sh/chart: audio2text-1.0.0 - app.kubernetes.io/name: audio2text + helm.sh/chart: whisper-1.0.0 + app.kubernetes.io/name: whisper app.kubernetes.io/instance: docsum app.kubernetes.io/version: "2.1.0" app.kubernetes.io/managed-by: Helm -data: - http_proxy: "" - https_proxy: "" - no_proxy: "" - A2T_ENDPOINT: "http://docsum-whisper" ---- -# Source: docsum/charts/tgi/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: docsum-video2audio-config - labels: - helm.sh/chart: video2audio-1.0.0 - app.kubernetes.io/name: video2audio +spec: + type: ClusterIP + ports: + - port: 7066 + targetPort: 7066 + protocol: TCP + name: whisper + selector: + app.kubernetes.io/name: whisper app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -data: - http_proxy: "" - https_proxy: "" - no_proxy: "" - V2A_ENDPOINT: "http://docsum-video2audio" ---- -# Source: docsum/charts/tgi/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: docsum-multimedia2text-config - labels: - helm.sh/chart: multimedia2text-1.0.0 - app.kubernetes.io/name: multimedia2text - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -data: - http_proxy: "" - https_proxy: "" - no_proxy: "" - V2A_ENDPOINT: "http://docsum-video2audio" - A2T_ENDPOINT: "http://docsum-whisper" --- # Source: docsum/charts/docsum-ui/templates/service.yaml # Copyright (C) 2024 Intel Corporation @@ -288,106 +252,6 @@ spec: app: docsum-nginx type: NodePort --- -# Source: docsum/charts/tgi/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: docsum-whisper - labels: - helm.sh/chart: whisper-1.0.0 - app.kubernetes.io/name: whisper - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 7066 - targetPort: 7066 - protocol: TCP - name: whisper - selector: - app.kubernetes.io/name: whisper - app.kubernetes.io/instance: docsum ---- -# Source: docsum/charts/tgi/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: docsum-audio2text - labels: - helm.sh/chart: audio2text-1.0.0 - app.kubernetes.io/name: audio2text - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 9099 - targetPort: 9199 - protocol: TCP - name: audio2text - selector: - app.kubernetes.io/name: audio2text - app.kubernetes.io/instance: docsum ---- -# Source: docsum/charts/tgi/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: docsum-video2audio - labels: - helm.sh/chart: video2audio-1.0.0 - app.kubernetes.io/name: video2audio - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 7078 - targetPort: 7078 - protocol: TCP - name: video2audio - selector: - app.kubernetes.io/name: video2audio - app.kubernetes.io/instance: docsum ---- -# Source: docsum/charts/tgi/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: docsum-multimedia2text - labels: - helm.sh/chart: multimedia2text-1.0.0 - app.kubernetes.io/name: multimedia2text - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 7079 - targetPort: 7079 - protocol: TCP - name: multimedia2text - selector: - app.kubernetes.io/name: multimedia2text - app.kubernetes.io/instance: docsum ---- # Source: docsum/templates/service.yaml # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -805,214 +669,3 @@ spec: volumes: - name: tmp emptyDir: {} ---- -# Source: docsum/charts/audio2text/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -apiVersion: apps/v1 -kind: Deployment -metadata: - name: docsum-audio2text - labels: - helm.sh/chart: audio2text-1.0.0 - app.kubernetes.io/name: audio2text - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: audio2text - app.kubernetes.io/instance: docsum - template: - metadata: - labels: - app.kubernetes.io/name: audio2text - app.kubernetes.io/instance: docsum - spec: - securityContext: - {} - containers: - - name: docsum - envFrom: - - configMapRef: - name: docsum-audio2text-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/dataprep-audio2text:latest" - imagePullPolicy: IfNotPresent - ports: - - name: audio2text - containerPort: 9199 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: docsum/charts/video2audio/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -apiVersion: apps/v1 -kind: Deployment -metadata: - name: docsum-video2audio - labels: - helm.sh/chart: video2audio-1.0.0 - app.kubernetes.io/name: video2audio - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: video2audio - app.kubernetes.io/instance: docsum - template: - metadata: - labels: - app.kubernetes.io/name: video2audio - app.kubernetes.io/instance: docsum - spec: - securityContext: - {} - containers: - - name: docsum - envFrom: - - configMapRef: - name: docsum-video2audio-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/dataprep-video2audio:latest" - imagePullPolicy: IfNotPresent - ports: - - name: video2audio - containerPort: 7078 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: video2audio - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: video2audio - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: video2audio - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: docsum/charts/multimedia2text/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -apiVersion: apps/v1 -kind: Deployment -metadata: - name: docsum-multimedia2text - labels: - helm.sh/chart: multimedia2text-1.0.0 - app.kubernetes.io/name: multimedia2text - app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: multimedia2text - app.kubernetes.io/instance: docsum - template: - metadata: - labels: - app.kubernetes.io/name: multimedia2text - app.kubernetes.io/instance: docsum - spec: - securityContext: - {} - containers: - - name: docsum - envFrom: - - configMapRef: - name: docsum-multimedia2text-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/dataprep-multimedia2text:latest" - imagePullPolicy: IfNotPresent - ports: - - name: multimedia2text - containerPort: 7079 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: multimedia2text - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: multimedia2text - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: multimedia2text - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} diff --git a/DocSum/tests/test_compose_on_gaudi.sh b/DocSum/tests/test_compose_on_gaudi.sh index e554f7268..6287ade8c 100644 --- a/DocSum/tests/test_compose_on_gaudi.sh +++ b/DocSum/tests/test_compose_on_gaudi.sh @@ -21,20 +21,10 @@ export TGI_LLM_ENDPOINT="http://${host_ip}:8008" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} +export ASR_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" export no_proxy="${no_proxy},${host_ip}" -export V2A_SERVICE_HOST_IP=${host_ip} -export V2A_ENDPOINT=http://$host_ip:7078 - -export A2T_ENDPOINT=http://$host_ip:7066 -export A2T_SERVICE_HOST_IP=${host_ip} -export A2T_SERVICE_PORT=9199 - -export DATA_ENDPOINT=http://$host_ip:7079 -export DATA_SERVICE_HOST_IP=${host_ip} -export DATA_SERVICE_PORT=7079 - WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" @@ -47,7 +37,7 @@ function build_docker_images() { git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="docsum docsum-gradio-ui whisper dataprep-multimedia2text dataprep-audio2text dataprep-video2audio llm-docsum-tgi" + service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 @@ -187,46 +177,6 @@ function validate_microservices() { "whisper-server" \ "{\"audio\": \"$(input_data_for_test "audio")\"}" - # Audio2Text service - validate_services_json \ - "${host_ip}:9199/v1/audio/transcriptions" \ - '"query":"well"' \ - "dataprep-audio2text" \ - "dataprep-audio2text-server" \ - "{\"byte_str\": \"$(input_data_for_test "audio")\"}" - - # Video2Audio service - validate_services_json \ - "${host_ip}:7078/v1/video2audio" \ - "SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU4LjI5LjEwMAAAAAAAAAAAAAAA//tQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAASW5mbwAAAA8AAAAIAAAN3wAtLS0tLS0tLS0tLS1LS0tLS0tLS0tLS0tpaWlpaWlpaWlpaWlph4eHh4eHh4eHh4eHpaWlpaWlpaWlpaWlpcPDw8PDw8PDw8PDw+Hh4eHh4eHh4eHh4eH///////////////8AAAAATGF2YzU4LjU0AAAAAAAAAAAAAAAAJAYwAAAAAAAADd95t4qPAAAAAAAAAAAAAAAAAAAAAP/7kGQAAAMhClSVMEACMOAabaCMAREA" \ - "dataprep-video2audio" \ - "dataprep-video2audio-server" \ - "{\"byte_str\": \"$(input_data_for_test "video")\"}" - - # Docsum Data service - video - validate_services_json \ - "${host_ip}:7079/v1/multimedia2text" \ - "well" \ - "dataprep-multimedia2text" \ - "dataprep-multimedia2text" \ - "{\"video\": \"$(input_data_for_test "video")\"}" - - # Docsum Data service - audio - validate_services_json \ - "${host_ip}:7079/v1/multimedia2text" \ - "well" \ - "dataprep-multimedia2text" \ - "dataprep-multimedia2text" \ - "{\"audio\": \"$(input_data_for_test "audio")\"}" - - # Docsum Data service - text - validate_services_json \ - "${host_ip}:7079/v1/multimedia2text" \ - "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco" \ - "dataprep-multimedia2text" \ - "dataprep-multimedia2text" \ - "{\"text\": \"$(input_data_for_test "text")\"}" - } function validate_megaservice_text() { diff --git a/DocSum/tests/test_compose_on_rocm.sh b/DocSum/tests/test_compose_on_rocm.sh index 0045f1064..5f3083d8f 100644 --- a/DocSum/tests/test_compose_on_rocm.sh +++ b/DocSum/tests/test_compose_on_rocm.sh @@ -29,24 +29,17 @@ export DOCSUM_BACKEND_SERVER_PORT="8888" export DOCSUM_FRONTEND_PORT="5552" export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} +export ASR_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum" export DOCSUM_CARD_ID="card1" export DOCSUM_RENDER_ID="renderD136" -export V2A_SERVICE_HOST_IP=${host_ip} -export V2A_ENDPOINT=http://${host_ip}:7078 -export A2T_ENDPOINT=http://${host_ip}:7066 -export A2T_SERVICE_HOST_IP=${host_ip} -export A2T_SERVICE_PORT=9099 -export DATA_ENDPOINT=http://${host_ip}:7079 -export DATA_SERVICE_HOST_IP=${host_ip} -export DATA_SERVICE_PORT=7079 function build_docker_images() { cd $WORKPATH/docker_image_build git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="docsum docsum-gradio-ui whisper dataprep-multimedia2text dataprep-audio2text dataprep-video2audio llm-docsum-tgi" + service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-generation-inference:1.4 @@ -141,46 +134,6 @@ function validate_microservices() { "whisper-service" \ "{\"audio\": \"$(input_data_for_test "audio")\"}" - # Audio2Text service - validate_services \ - "${host_ip}:9099/v1/audio/transcriptions" \ - '"query":"well"' \ - "dataprep-audio2text" \ - "dataprep-audio2text-service" \ - "{\"byte_str\": \"$(input_data_for_test "audio")\"}" - - # Video2Audio service - validate_services \ - "${host_ip}:7078/v1/video2audio" \ - "SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU4LjI5LjEwMAAAAAAAAAAAAAAA//tQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAASW5mbwAAAA8AAAAIAAAN3wAtLS0tLS0tLS0tLS1LS0tLS0tLS0tLS0tpaWlpaWlpaWlpaWlph4eHh4eHh4eHh4eHpaWlpaWlpaWlpaWlpcPDw8PDw8PDw8PDw+Hh4eHh4eHh4eHh4eH///////////////8AAAAATGF2YzU4LjU0AAAAAAAAAAAAAAAAJAYwAAAAAAAADd95t4qPAAAAAAAAAAAAAAAAAAAAAP/7kGQAAAMhClSVMEACMOAabaCMAREA" \ - "dataprep-video2audio" \ - "dataprep-video2audio-service" \ - "{\"byte_str\": \"$(input_data_for_test "video")\"}" - - # Docsum Data service - video - validate_services \ - "${host_ip}:7079/v1/multimedia2text" \ - "well" \ - "dataprep-multimedia2text-service" \ - "dataprep-multimedia2text" \ - "{\"video\": \"$(input_data_for_test "video")\"}" - - # Docsum Data service - audio - validate_services \ - "${host_ip}:7079/v1/multimedia2text" \ - "well" \ - "dataprep-multimedia2text-service" \ - "dataprep-multimedia2text" \ - "{\"audio\": \"$(input_data_for_test "audio")\"}" - - # Docsum Data service - text - validate_services \ - "${host_ip}:7079/v1/multimedia2text" \ - "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco" \ - "dataprep-multimedia2text-service" \ - "dataprep-multimedia2text" \ - "{\"text\": \"$(input_data_for_test "text")\"}" - # tgi for llm service validate_services \ "${host_ip}:8008/generate" \ diff --git a/DocSum/tests/test_compose_on_xeon.sh b/DocSum/tests/test_compose_on_xeon.sh index da664a775..91d5ece1b 100644 --- a/DocSum/tests/test_compose_on_xeon.sh +++ b/DocSum/tests/test_compose_on_xeon.sh @@ -21,20 +21,10 @@ export TGI_LLM_ENDPOINT="http://${host_ip}:8008" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} +export ASR_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" export no_proxy="${no_proxy},${host_ip}" -export V2A_SERVICE_HOST_IP=${host_ip} -export V2A_ENDPOINT=http://$host_ip:7078 - -export A2T_ENDPOINT=http://$host_ip:7066 -export A2T_SERVICE_HOST_IP=${host_ip} -export A2T_SERVICE_PORT=9099 - -export DATA_ENDPOINT=http://$host_ip:7079 -export DATA_SERVICE_HOST_IP=${host_ip} -export DATA_SERVICE_PORT=7079 - WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" @@ -46,7 +36,7 @@ function build_docker_images() { git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="docsum docsum-gradio-ui whisper dataprep-multimedia2text dataprep-audio2text dataprep-video2audio llm-docsum-tgi" + service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-generation-inference:1.4 @@ -189,46 +179,6 @@ function validate_microservices() { "whisper-server" \ "{\"audio\": \"$(input_data_for_test "audio")\"}" - # Audio2Text service - validate_services_json \ - "${host_ip}:9099/v1/audio/transcriptions" \ - '"query":"well"' \ - "dataprep-audio2text" \ - "dataprep-audio2text-server" \ - "{\"byte_str\": \"$(input_data_for_test "audio")\"}" - - # Video2Audio service - validate_services_json \ - "${host_ip}:7078/v1/video2audio" \ - "SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU4LjI5LjEwMAAAAAAAAAAAAAAA//tQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAASW5mbwAAAA8AAAAIAAAN3wAtLS0tLS0tLS0tLS1LS0tLS0tLS0tLS0tpaWlpaWlpaWlpaWlph4eHh4eHh4eHh4eHpaWlpaWlpaWlpaWlpcPDw8PDw8PDw8PDw+Hh4eHh4eHh4eHh4eH///////////////8AAAAATGF2YzU4LjU0AAAAAAAAAAAAAAAAJAYwAAAAAAAADd95t4qPAAAAAAAAAAAAAAAAAAAAAP/7kGQAAAMhClSVMEACMOAabaCMAREA" \ - "dataprep-video2audio" \ - "dataprep-video2audio-server" \ - "{\"byte_str\": \"$(input_data_for_test "video")\"}" - - # Docsum Data service - video - validate_services_json \ - "${host_ip}:7079/v1/multimedia2text" \ - "well" \ - "dataprep-multimedia2text" \ - "dataprep-multimedia2text" \ - "{\"video\": \"$(input_data_for_test "video")\"}" - - # Docsum Data service - audio - validate_services_json \ - "${host_ip}:7079/v1/multimedia2text" \ - "well" \ - "dataprep-multimedia2text" \ - "dataprep-multimedia2text" \ - "{\"audio\": \"$(input_data_for_test "audio")\"}" - - # Docsum Data service - text - validate_services_json \ - "${host_ip}:7079/v1/multimedia2text" \ - "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco" \ - "dataprep-multimedia2text" \ - "dataprep-multimedia2text" \ - "{\"text\": \"$(input_data_for_test "text")\"}" - } function validate_megaservice_text() { diff --git a/DocSum/ui/gradio/docsum_ui_gradio.py b/DocSum/ui/gradio/docsum_ui_gradio.py index 01e1c3121..fa2b78cea 100644 --- a/DocSum/ui/gradio/docsum_ui_gradio.py +++ b/DocSum/ui/gradio/docsum_ui_gradio.py @@ -77,7 +77,7 @@ class DocSumUI: """ logger.info(">>> Reading audio file: %s", file.name) base64_str = self.encode_file_to_base64(file) - return self.generate_summary(base64_str, document_type="audio") + return base64_str def read_video_file(self, file): """Read and process the content of a video file. @@ -90,7 +90,7 @@ class DocSumUI: """ logger.info(">>> Reading video file: %s", file.name) base64_str = self.encode_file_to_base64(file) - return self.generate_summary(base64_str, document_type="video") + return base64_str def is_valid_url(self, url): try: @@ -193,7 +193,7 @@ class DocSumUI: return str(response.status_code) - def create_upload_ui(self, label, file_types, process_function): + def create_upload_ui(self, label, file_types, process_function, document_type="text"): """Create a Gradio UI for file uploads. Args: @@ -213,7 +213,11 @@ class DocSumUI: generated_text = gr.TextArea( label="Text Summary", placeholder="Summarized text will be displayed here" ) - upload_btn.upload(lambda file: self.generate_summary(process_function(file)), upload_btn, generated_text) + upload_btn.upload( + lambda file: self.generate_summary(process_function(file), document_type=document_type), + upload_btn, + generated_text, + ) return upload_ui def render(self): @@ -269,11 +273,15 @@ class DocSumUI: label="Please upload audio file (.wav, .mp3)", file_types=[".wav", ".mp3"], process_function=self.read_audio_file, + document_type="audio", ) # Video Upload UI video_ui = self.create_upload_ui( - label="Please upload Video file (.mp4)", file_types=[".mp4"], process_function=self.read_video_file + label="Please upload Video file (.mp4)", + file_types=[".mp4"], + process_function=self.read_video_file, + document_type="video", ) # Render all the UI in separate tabs