Move file processing from UI to DocSum backend service (#1899)

Signed-off-by: Melanie Buehler <melanie.h.buehler@intel.com>
2025-05-07 18:05:30 -07:00
parent f6013b8679
commit 7bb05585b6
9 changed files with 299 additions and 147 deletions
--- a/DocSum/docker_compose/amd/gpu/rocm/README.md
+++ b/DocSum/docker_compose/amd/gpu/rocm/README.md
@@ -239,13 +239,16 @@ curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \
   -F "language=en" \
 ```

+Note that the `-F "messages="` flag is required, even for file uploads. Multiple files can be uploaded in a single call with multiple `-F "files=@/path"` inputs.
+
 ### Query with audio and video

-> Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI.
+> Audio and video can be passed as base64 strings or uploaded by providing a local file path.

 Audio:

 ```bash
+# Send base64 string
 curl -X POST http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \
   -H "Content-Type: application/json" \
   -d '{"type": "audio", "messages": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}'
@@ -257,11 +260,21 @@ curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \
   -F "max_tokens=32" \
   -F "language=en" \
   -F "stream=True"
+
+# Upload file
+curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \
+   -H "Content-Type: multipart/form-data" \
+   -F "type=audio" \
+   -F "messages=" \
+   -F "files=@/path to your file (.mp3, .wav)" \
+   -F "max_tokens=32" \
+   -F "language=en"
 ```

 Video:

 ```bash
+# Send base64 string
 curl -X POST http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \
   -H "Content-Type: application/json" \
   -d '{"type": "video", "messages": "convert your video to base64 data type"}'
@@ -273,6 +286,15 @@ curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \
   -F "max_tokens=32" \
   -F "language=en" \
   -F "stream=True"
+
+# Upload file
+curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \
+   -H "Content-Type: multipart/form-data" \
+   -F "type=video" \
+   -F "messages=" \
+   -F "files=@/path to your file (.mp4)" \
+   -F "max_tokens=32" \
+   -F "language=en"
 ```

 ### Query with long context
--- a/DocSum/docker_compose/intel/cpu/xeon/README.md
+++ b/DocSum/docker_compose/intel/cpu/xeon/README.md
@@ -156,16 +156,19 @@ curl http://${host_ip}:8888/v1/docsum \
   -F "messages=" \
   -F "files=@/path to your file (.txt, .docx, .pdf)" \
   -F "max_tokens=32" \
-   -F "language=en" \
+   -F "language=en"
 ```

+Note that the `-F "messages="` flag is required, even for file uploads. Multiple files can be uploaded in a single call with multiple `-F "files=@/path"` inputs.
+
 ### Query with audio and video

-> Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI.
+> Audio and video can be passed as base64 strings or uploaded by providing a local file path.

 Audio:

 ```bash
+# Send base64 string
 curl -X POST http://${host_ip}:8888/v1/docsum \
   -H "Content-Type: application/json" \
   -d '{"type": "audio", "messages": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}'
@@ -177,11 +180,21 @@ curl http://${host_ip}:8888/v1/docsum \
   -F "max_tokens=32" \
   -F "language=en" \
   -F "stream=True"
+
+# Upload file
+curl http://${host_ip}:8888/v1/docsum \
+   -H "Content-Type: multipart/form-data" \
+   -F "type=audio" \
+   -F "messages=" \
+   -F "files=@/path to your file (.mp3, .wav)" \
+   -F "max_tokens=32" \
+   -F "language=en"
 ```

 Video:

 ```bash
+# Send base64 string
 curl -X POST http://${host_ip}:8888/v1/docsum \
   -H "Content-Type: application/json" \
   -d '{"type": "video", "messages": "convert your video to base64 data type"}'
@@ -193,6 +206,15 @@ curl http://${host_ip}:8888/v1/docsum \
   -F "max_tokens=32" \
   -F "language=en" \
   -F "stream=True"
+
+# Upload file
+curl http://${host_ip}:8888/v1/docsum \
+   -H "Content-Type: multipart/form-data" \
+   -F "type=video" \
+   -F "messages=" \
+   -F "files=@/path to your file (.mp4)" \
+   -F "max_tokens=32" \
+   -F "language=en"
 ```

 ### Query with long context
--- a/DocSum/docker_compose/intel/hpu/gaudi/README.md
+++ b/DocSum/docker_compose/intel/hpu/gaudi/README.md
@@ -161,13 +161,16 @@ curl http://${host_ip}:8888/v1/docsum \
   -F "language=en" \
 ```

+Note that the `-F "messages="` flag is required, even for file uploads. Multiple files can be uploaded in a single call with multiple `-F "files=@/path"` inputs.
+
 ### Query with audio and video

-> Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI.
+> Audio and video can be passed as base64 strings or uploaded by providing a local file path.

 Audio:

 ```bash
+# Send base64 string
 curl -X POST http://${host_ip}:8888/v1/docsum \
   -H "Content-Type: application/json" \
   -d '{"type": "audio", "messages": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}'
@@ -179,11 +182,21 @@ curl http://${host_ip}:8888/v1/docsum \
   -F "max_tokens=32" \
   -F "language=en" \
   -F "stream=True"
+
+# Upload file
+curl http://${host_ip}:8888/v1/docsum \
+   -H "Content-Type: multipart/form-data" \
+   -F "type=audio" \
+   -F "messages=" \
+   -F "files=@/path to your file (.mp3, .wav)" \
+   -F "max_tokens=32" \
+   -F "language=en"
 ```

 Video:

 ```bash
+# Send base64 string
 curl -X POST http://${host_ip}:8888/v1/docsum \
   -H "Content-Type: application/json" \
   -d '{"type": "video", "messages": "convert your video to base64 data type"}'
@@ -195,6 +208,15 @@ curl http://${host_ip}:8888/v1/docsum \
   -F "max_tokens=32" \
   -F "language=en" \
   -F "stream=True"
+
+# Upload file
+curl http://${host_ip}:8888/v1/docsum \
+   -H "Content-Type: multipart/form-data" \
+   -F "type=video" \
+   -F "messages=" \
+   -F "files=@/path to your file (.mp4)" \
+   -F "max_tokens=32" \
+   -F "language=en"
 ```

 ### Query with long context
--- a/DocSum/docsum.py
+++ b/DocSum/docsum.py
@@ -63,6 +63,20 @@ def read_pdf(file):
    return docs


+def encode_file_to_base64(file_path):
+    """Encode the content of a file to a base64 string.
+
+    Args:
+        file_path (str): The path to the file to be encoded.
+
+    Returns:
+        str: The base64 encoded string of the file content.
+    """
+    with open(file_path, "rb") as f:
+        base64_str = base64.b64encode(f.read()).decode("utf-8")
+    return base64_str
+
+
 def video2audio(
    video_base64: str,
 ) -> str:
@@ -163,7 +177,6 @@ class DocSumService:

    async def handle_request(self, request: Request, files: List[UploadFile] = File(default=None)):
        """Accept pure text, or files .txt/.pdf.docx, audio/video base64 string."""
-
        if "application/json" in request.headers.get("content-type"):
            data = await request.json()
            stream_opt = data.get("stream", True)
@@ -193,25 +206,24 @@ class DocSumService:
                    uid = str(uuid.uuid4())
                    file_path = f"/tmp/{uid}"

-                    if data_type is not None and data_type in ["audio", "video"]:
-                        raise ValueError(
-                            "Audio and Video file uploads are not supported in docsum with curl request, \
-                                please use the UI or pass base64 string of the content directly."
-                        )
+                    import aiofiles

-                    else:
-                        import aiofiles
-
-                        async with aiofiles.open(file_path, "wb") as f:
-                            await f.write(await file.read())
+                    async with aiofiles.open(file_path, "wb") as f:
+                        await f.write(await file.read())

+                    if data_type == "text":
                        docs = read_text_from_file(file, file_path)
-                        os.remove(file_path)
+                    elif data_type in ["audio", "video"]:
+                        docs = encode_file_to_base64(file_path)
+                    else:
+                        raise ValueError(f"Data type not recognized: {data_type}")

-                        if isinstance(docs, list):
-                            file_summaries.extend(docs)
-                        else:
-                            file_summaries.append(docs)
+                    os.remove(file_path)
+
+                    if isinstance(docs, list):
+                        file_summaries.extend(docs)
+                    else:
+                        file_summaries.append(docs)

            if file_summaries:
                prompt = handle_message(chat_request.messages) + "\n".join(file_summaries)
--- a/DocSum/tests/test_compose_on_gaudi.sh
+++ b/DocSum/tests/test_compose_on_gaudi.sh
@@ -237,6 +237,20 @@ function validate_megaservice_multimedia() {
        "language=en" \
        "stream=False"

+    echo ">>> Checking audio data in form format, upload file"
+    validate_service \
+        "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
+        "well" \
+        "docsum-gaudi-backend-server" \
+        "docsum-gaudi-backend-server" \
+        "media" "" \
+        "type=audio" \
+        "messages=" \
+        "files=@$ROOT_FOLDER/data/test.wav" \
+        "max_tokens=32" \
+        "language=en" \
+        "stream=False"
+
    echo ">>> Checking video data in json format"
    validate_service \
        "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
@@ -258,6 +272,20 @@ function validate_megaservice_multimedia() {
        "max_tokens=32" \
        "language=en" \
        "stream=False"
+
+    echo ">>> Checking video data in form format, upload file"
+    validate_service \
+        "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
+        "bye" \
+        "docsum-gaudi-backend-server" \
+        "docsum-gaudi-backend-server" \
+        "media" "" \
+        "type=video" \
+        "messages=" \
+        "files=@$ROOT_FOLDER/data/test.mp4" \
+        "max_tokens=32" \
+        "language=en" \
+        "stream=False"
 }

 function validate_megaservice_long_text() {
--- a/DocSum/tests/test_compose_on_xeon.sh
+++ b/DocSum/tests/test_compose_on_xeon.sh
@@ -237,6 +237,20 @@ function validate_megaservice_multimedia() {
        "language=en" \
        "stream=False"

+    echo ">>> Checking audio data in form format, upload file"
+    validate_service \
+        "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
+        "well" \
+        "docsum-xeon-backend-server" \
+        "docsum-xeon-backend-server" \
+        "media" "" \
+        "type=audio" \
+        "messages=" \
+        "files=@$ROOT_FOLDER/data/test.wav" \
+        "max_tokens=32" \
+        "language=en" \
+        "stream=False"
+
    echo ">>> Checking video data in json format"
    validate_service \
        "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
@@ -258,6 +272,20 @@ function validate_megaservice_multimedia() {
        "max_tokens=32" \
        "language=en" \
        "stream=False"
+
+    echo ">>> Checking video data in form format, upload file"
+    validate_service \
+        "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
+        "bye" \
+        "docsum-xeon-backend-server" \
+        "docsum-xeon-backend-server" \
+        "media" "" \
+        "type=video" \
+        "messages=" \
+        "files=@$ROOT_FOLDER/data/test.mp4" \
+        "max_tokens=32" \
+        "language=en" \
+        "stream=False"
 }

 function validate_megaservice_long_text() {
--- a/DocSum/tests/test_compose_tgi_on_gaudi.sh
+++ b/DocSum/tests/test_compose_tgi_on_gaudi.sh
@@ -229,6 +229,20 @@ function validate_megaservice_multimedia() {
        "language=en" \
        "stream=False"

+    echo ">>> Checking audio data in form format, upload file"
+    validate_service \
+        "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
+        "well" \
+        "docsum-gaudi-backend-server" \
+        "docsum-gaudi-backend-server" \
+        "media" "" \
+        "type=audio" \
+        "messages=" \
+        "files=@$ROOT_FOLDER/data/test.wav" \
+        "max_tokens=32" \
+        "language=en" \
+        "stream=False"
+
    echo ">>> Checking video data in json format"
    validate_service \
        "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
@@ -250,6 +264,20 @@ function validate_megaservice_multimedia() {
        "max_tokens=32" \
        "language=en" \
        "stream=False"
+
+    echo ">>> Checking video data in form format, upload file"
+    validate_service \
+        "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
+        "bye" \
+        "docsum-gaudi-backend-server" \
+        "docsum-gaudi-backend-server" \
+        "media" "" \
+        "type=video" \
+        "messages=" \
+        "files=@$ROOT_FOLDER/data/test.mp4" \
+        "max_tokens=32" \
+        "language=en" \
+        "stream=False"
 }

 function validate_megaservice_long_text() {
--- a/DocSum/tests/test_compose_tgi_on_xeon.sh
+++ b/DocSum/tests/test_compose_tgi_on_xeon.sh
@@ -229,6 +229,20 @@ function validate_megaservice_multimedia() {
        "language=en" \
        "stream=False"

+    echo ">>> Checking audio data in form format, upload file"
+    validate_service \
+        "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
+        "well" \
+        "docsum-xeon-backend-server" \
+        "docsum-xeon-backend-server" \
+        "media" "" \
+        "type=audio" \
+        "messages=" \
+        "files=@$ROOT_FOLDER/data/test.wav" \
+        "max_tokens=32" \
+        "language=en" \
+        "stream=False"
+
    echo ">>> Checking video data in json format"
    validate_service \
        "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
@@ -250,6 +264,20 @@ function validate_megaservice_multimedia() {
        "max_tokens=32" \
        "language=en" \
        "stream=False"
+
+    echo ">>> Checking video data in form format, upload file"
+    validate_service \
+        "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
+        "bye" \
+        "docsum-xeon-backend-server" \
+        "docsum-xeon-backend-server" \
+        "media" "" \
+        "type=video" \
+        "messages=" \
+        "files=@$ROOT_FOLDER/data/test.mp4" \
+        "max_tokens=32" \
+        "language=en" \
+        "stream=False"
 }

 function validate_megaservice_long_text() {
--- a/DocSum/ui/gradio/docsum_ui_gradio.py
+++ b/DocSum/ui/gradio/docsum_ui_gradio.py
@@ -22,76 +22,12 @@ logger = logging.getLogger(__name__)
 class DocSumUI:
    def __init__(self):
        """Initialize the DocSumUI class with accepted file types, headers, and backend service endpoint."""
-        self.ACCEPTED_FILE_TYPES = ["pdf", "doc", "docx"]
+        self.ACCEPTED_TEXT_FILE_TYPES = [".pdf", ".doc", ".docx"]
+        self.ACCEPTED_AUDIO_FILE_TYPES = [".mp3", ".wav"]
+        self.ACCEPTED_VIDEO_FILE_TYPES = [".mp4"]
        self.HEADERS = {"Content-Type": "application/json"}
        self.BACKEND_SERVICE_ENDPOINT = os.getenv("BACKEND_SERVICE_ENDPOINT", "http://localhost:8888/v1/docsum")

-    def encode_file_to_base64(self, file_path):
-        """Encode the content of a file to a base64 string.
-
-        Args:
-            file_path (str): The path to the file to be encoded.
-
-        Returns:
-            str: The base64 encoded string of the file content.
-        """
-        logger.info(">>> Encoding file to base64: %s", file_path)
-        with open(file_path, "rb") as f:
-            base64_str = base64.b64encode(f.read()).decode("utf-8")
-        return base64_str
-
-    def read_file(self, file):
-        """Read and process the content of a file.
-
-        Args:
-            file (file-like object): The file to be read.
-
-        Returns:
-            str: The content of the file or an error message if the file type is unsupported.
-        """
-        self.page_content = ""
-        self.pages = []
-
-        if file.name.endswith(".pdf"):
-            loader = PyPDFLoader(file)
-        elif file.name.endswith((".doc", ".docx")):
-            loader = Docx2txtLoader(file)
-        else:
-            msg = f"Unsupported file type '{file.name}'. Choose from {self.ACCEPTED_FILE_TYPES}"
-            logger.error(msg)
-            return msg
-
-        for page in loader.lazy_load():
-            self.page_content += page.page_content
-
-        return self.page_content
-
-    def read_audio_file(self, file):
-        """Read and process the content of an audio file.
-
-        Args:
-            file (file-like object): The audio file to be read.
-
-        Returns:
-            str: The base64 encoded content of the audio file.
-        """
-        logger.info(">>> Reading audio file: %s", file.name)
-        base64_str = self.encode_file_to_base64(file)
-        return base64_str
-
-    def read_video_file(self, file):
-        """Read and process the content of a video file.
-
-        Args:
-            file (file-like object): The video file to be read.
-
-        Returns:
-            str: The base64 encoded content of the video file.
-        """
-        logger.info(">>> Reading video file: %s", file.name)
-        base64_str = self.encode_file_to_base64(file)
-        return base64_str
-
    def is_valid_url(self, url):
        try:
            result = urlparse(url)
@@ -128,78 +64,107 @@ class DocSumUI:

        return self.page_content

-    def generate_summary(self, doc_content, document_type="text"):
+    def process_response(self, response):
+        if response.status_code == 200:
+            try:
+                # Check if the specific log path is in the response text
+                if "/logs/LLMChain/final_output" in response.text:
+                    # Extract the relevant part of the response
+                    temp = ast.literal_eval(
+                        [
+                            i.split("data: ")[1]
+                            for i in response.text.split("\n\n")
+                            if "/logs/LLMChain/final_output" in i
+                        ][0]
+                    )["ops"]
+
+                    # Find the final output value
+                    final_output = [i["value"] for i in temp if i["path"] == "/logs/LLMChain/final_output"][0]
+                    return final_output["text"]
+                else:
+                    # Perform string replacements to clean the response text
+                    cleaned_text = response.text
+                    replacements = [
+                        ("'\n\ndata: b'", ""),
+                        ("data: b' ", ""),
+                        ("</s>'\n\ndata: [DONE]\n\n", ""),
+                        ("\n\ndata: b", ""),
+                        ("'\n\n", ""),
+                        ("'\n", ""),
+                        ('''\'"''', ""),
+                    ]
+                    for old, new in replacements:
+                        cleaned_text = cleaned_text.replace(old, new)
+                    return cleaned_text
+            except (IndexError, KeyError, ValueError) as e:
+                # Handle potential errors during parsing
+                logger.error("Error parsing response: %s", e)
+                return response.text
+
+    def generate_summary(self, document, document_type="text"):
        """Generate a summary for the given document content.

        Args:
-            doc_content (str): The content of the document.
+            document (str): The content or path of the document.
            document_type (str): The type of the document (default is "text").

        Returns:
            str: The generated summary or an error message.
        """
-
        logger.info(">>> BACKEND_SERVICE_ENDPOINT - %s", self.BACKEND_SERVICE_ENDPOINT)

-        data = {"max_tokens": 256, "type": document_type, "messages": doc_content}
+        data = {"max_tokens": 256, "type": document_type, "messages": ""}

-        try:
-            response = requests.post(
-                url=self.BACKEND_SERVICE_ENDPOINT,
-                headers=self.HEADERS,
-                data=json.dumps(data),
-                proxies={"http_proxy": os.environ["http_proxy"], "https_proxy": os.environ["https_proxy"]},
-            )
+        if os.path.exists(document):
+            file_header = "text/plain"
+            file_ext = os.path.splitext(document)[-1]
+            if file_ext == ".pdf":
+                file_header = "application/pdf"
+            elif file_ext in [".doc", ".docx"]:
+                file_header = "application/octet-stream"
+            elif file_ext in self.ACCEPTED_AUDIO_FILE_TYPES + self.ACCEPTED_VIDEO_FILE_TYPES:
+                file_header = f"{document_type}/{file_ext[-3:]}"
+            files = {"files": (os.path.basename(document), open(document, "rb"), file_header)}
+            try:
+                response = requests.post(
+                    url=self.BACKEND_SERVICE_ENDPOINT,
+                    headers={},
+                    files=files,
+                    data=data,
+                    proxies={"http_proxy": os.environ["http_proxy"], "https_proxy": os.environ["https_proxy"]},
+                )

-            if response.status_code == 200:
-                try:
-                    # Check if the specific log path is in the response text
-                    if "/logs/LLMChain/final_output" in response.text:
-                        # Extract the relevant part of the response
-                        temp = ast.literal_eval(
-                            [
-                                i.split("data: ")[1]
-                                for i in response.text.split("\n\n")
-                                if "/logs/LLMChain/final_output" in i
-                            ][0]
-                        )["ops"]
+                return self.process_response(response)

-                        # Find the final output value
-                        final_output = [i["value"] for i in temp if i["path"] == "/logs/LLMChain/final_output"][0]
-                        return final_output["text"]
-                    else:
-                        # Perform string replacements to clean the response text
-                        cleaned_text = response.text
-                        replacements = [
-                            ("'\n\ndata: b'", ""),
-                            ("data: b' ", ""),
-                            ("</s>'\n\ndata: [DONE]\n\n", ""),
-                            ("\n\ndata: b", ""),
-                            ("'\n\n", ""),
-                            ("'\n", ""),
-                            ('''\'"''', ""),
-                        ]
-                        for old, new in replacements:
-                            cleaned_text = cleaned_text.replace(old, new)
-                        return cleaned_text
-                except (IndexError, KeyError, ValueError) as e:
-                    # Handle potential errors during parsing
-                    logger.error("Error parsing response: %s", e)
-                    return response.text
+            except requests.exceptions.RequestException as e:
+                logger.error("Request exception: %s", e)
+                return str(e)

-        except requests.exceptions.RequestException as e:
-            logger.error("Request exception: %s", e)
-            return str(e)
+        else:
+            data["messages"] = document
+            try:
+                response = requests.post(
+                    url=self.BACKEND_SERVICE_ENDPOINT,
+                    headers=self.HEADERS,
+                    data=json.dumps(data),
+                    proxies={"http_proxy": os.environ["http_proxy"], "https_proxy": os.environ["https_proxy"]},
+                )
+
+                return self.process_response(response)
+
+            except requests.exceptions.RequestException as e:
+                logger.error("Request exception: %s", e)
+                return str(e)

        return str(response.status_code)

-    def create_upload_ui(self, label, file_types, process_function, document_type="text"):
+    def create_upload_ui(self, label, file_types, document_type="text"):
        """Create a Gradio UI for file uploads.

        Args:
            label (str): The label for the upload button.
            file_types (list): The list of accepted file types.
-            process_function (function): The function to process the uploaded file.
+            document_type (str): The document type (text, audio, or video). Default is text.

        Returns:
            gr.Blocks: The Gradio Blocks object representing the upload UI.
@@ -214,7 +179,7 @@ class DocSumUI:
                        label="Text Summary", placeholder="Summarized text will be displayed here"
                    )
            upload_btn.upload(
-                lambda file: self.generate_summary(process_function(file), document_type=document_type),
+                lambda file: self.generate_summary(file, document_type=document_type),
                upload_btn,
                generated_text,
            )
@@ -263,24 +228,21 @@ class DocSumUI:

        # File Upload UI
        file_ui = self.create_upload_ui(
-            label="Please upload a document (.pdf, .doc, .docx)",
-            file_types=[".pdf", ".doc", ".docx"],
-            process_function=self.read_file,
+            label=f"Please upload a document ({', '.join(self.ACCEPTED_TEXT_FILE_TYPES)})",
+            file_types=self.ACCEPTED_TEXT_FILE_TYPES,
        )

        # Audio Upload UI
        audio_ui = self.create_upload_ui(
-            label="Please upload audio file (.wav, .mp3)",
-            file_types=[".wav", ".mp3"],
-            process_function=self.read_audio_file,
+            label=f"Please upload audio file ({', '.join(self.ACCEPTED_AUDIO_FILE_TYPES)})",
+            file_types=self.ACCEPTED_AUDIO_FILE_TYPES,
            document_type="audio",
        )

        # Video Upload UI
        video_ui = self.create_upload_ui(
-            label="Please upload Video file (.mp4)",
-            file_types=[".mp4"],
-            process_function=self.read_video_file,
+            label=f"Please upload video file ({', '.join(self.ACCEPTED_VIDEO_FILE_TYPES)})",
+            file_types=self.ACCEPTED_VIDEO_FILE_TYPES,
            document_type="video",
        )