Add MultimodalQnA as MMRAG usecase in Example (#751)

Signed-off-by: Tiep Le <tiep.le@intel.com> Signed-off-by: siddhivelankar23 <siddhi.velankar@intel.com> Signed-off-by: sjagtap1803 <siddhant.jagtap@intel.com>
2024-09-14 01:55:29 -07:00
parent 06696c8e58
commit b6cce35a93
21 changed files with 2558 additions and 0 deletions
--- a/MultimodalQnA/ui/docker/Dockerfile
+++ b/MultimodalQnA/ui/docker/Dockerfile
@@ -0,0 +1,35 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+ENV LANG=C.UTF-8
+
+ARG ARCH="cpu"
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    build-essential \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    default-jre \
+    wget \
+    vim
+
+# Install ffmpeg static build
+WORKDIR /root
+RUN wget https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz && \
+    mkdir ffmpeg-git-amd64-static && tar -xvf ffmpeg-git-amd64-static.tar.xz -C ffmpeg-git-amd64-static --strip-components 1 && \
+    export PATH=/root/ffmpeg-git-amd64-static:$PATH && \
+    cp /root/ffmpeg-git-amd64-static/ffmpeg /usr/local/bin/ && \
+    cp /root/ffmpeg-git-amd64-static/ffprobe /usr/local/bin/
+
+RUN mkdir -p /home/user
+
+COPY gradio /home/user/gradio
+
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
+pip install --no-cache-dir -r /home/user/gradio/requirements.txt
+
+WORKDIR /home/user/gradio
+ENTRYPOINT ["python", "multimodalqna_ui_gradio.py"]
+# ENTRYPOINT ["/usr/bin/sleep", "infinity"]
--- a/MultimodalQnA/ui/gradio/conversation.py
+++ b/MultimodalQnA/ui/gradio/conversation.py
@@ -0,0 +1,155 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import dataclasses
+from enum import Enum, auto
+from typing import List
+
+from utils import get_b64_frame_from_timestamp
+
+
+class SeparatorStyle(Enum):
+    """Different separator style."""
+
+    SINGLE = auto()
+
+
+@dataclasses.dataclass
+class Conversation:
+    """A class that keeps all conversation history."""
+
+    system: str
+    roles: List[str]
+    messages: List[List[str]]
+    offset: int
+    sep_style: SeparatorStyle = SeparatorStyle.SINGLE
+    sep: str = "\n"
+    video_file: str = None
+    caption: str = None
+    time_of_frame_ms: str = None
+    base64_frame: str = None
+    skip_next: bool = False
+    split_video: str = None
+
+    def _template_caption(self):
+        out = ""
+        if self.caption is not None:
+            out = f"The caption associated with the image is '{self.caption}'. "
+        return out
+
+    def get_prompt(self):
+        messages = self.messages
+        if len(messages) > 1 and messages[1][1] is None:
+            # Need to do RAG. prompt is the query only
+            ret = messages[0][1]
+        else:
+            # No need to do RAG. Thus, prompt of chatcompletion format
+            conv_dict = []
+            if self.sep_style == SeparatorStyle.SINGLE:
+                for i, (role, message) in enumerate(messages):
+                    if message:
+                        if i != 0:
+                            dic = {"role": role, "content": message}
+                        else:
+                            dic = {"role": role}
+                            if self.time_of_frame_ms and self.video_file:
+                                content = [{"type": "text", "text": message}]
+                                if self.base64_frame:
+                                    base64_frame = self.base64_frame
+                                else:
+                                    base64_frame = get_b64_frame_from_timestamp(self.video_file, self.time_of_frame_ms)
+                                    self.base64_frame = base64_frame
+                                content.append({"type": "image_url", "image_url": {"url": base64_frame}})
+                            else:
+                                content = message
+                            dic["content"] = content
+                        conv_dict.append(dic)
+            else:
+                raise ValueError(f"Invalid style: {self.sep_style}")
+            ret = conv_dict
+        return ret
+
+    def append_message(self, role, message):
+        self.messages.append([role, message])
+
+    def get_b64_image(self):
+        b64_img = None
+        if self.time_of_frame_ms and self.video_file:
+            time_of_frame_ms = self.time_of_frame_ms
+            video_file = self.video_file
+            b64_img = get_b64_frame_from_timestamp(video_file, time_of_frame_ms)
+        return b64_img
+
+    def to_gradio_chatbot(self):
+        ret = []
+        for i, (role, msg) in enumerate(self.messages[self.offset :]):
+            if i % 2 == 0:
+                if type(msg) is tuple:
+                    import base64
+                    from io import BytesIO
+
+                    msg, image, image_process_mode = msg
+                    max_hw, min_hw = max(image.size), min(image.size)
+                    aspect_ratio = max_hw / min_hw
+                    max_len, min_len = 800, 400
+                    shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw))
+                    longest_edge = int(shortest_edge * aspect_ratio)
+                    W, H = image.size
+                    if H > W:
+                        H, W = longest_edge, shortest_edge
+                    else:
+                        H, W = shortest_edge, longest_edge
+                    image = image.resize((W, H))
+                    buffered = BytesIO()
+                    image.save(buffered, format="JPEG")
+                    img_b64_str = base64.b64encode(buffered.getvalue()).decode()
+                    img_str = f'<img src="data:image/png;base64,{img_b64_str}" alt="user upload image" />'
+                    msg = img_str + msg.replace("<image>", "").strip()
+                    ret.append([msg, None])
+                else:
+                    ret.append([msg, None])
+            else:
+                ret[-1][-1] = msg
+        return ret
+
+    def copy(self):
+        return Conversation(
+            system=self.system,
+            roles=self.roles,
+            messages=[[x, y] for x, y in self.messages],
+            offset=self.offset,
+            sep_style=self.sep_style,
+            sep=self.sep,
+            video_file=self.video_file,
+            caption=self.caption,
+            base64_frame=self.base64_frame,
+        )
+
+    def dict(self):
+        return {
+            "system": self.system,
+            "roles": self.roles,
+            "messages": self.messages,
+            "offset": self.offset,
+            "sep": self.sep,
+            "time_of_frame_ms": self.time_of_frame_ms,
+            "video_file": self.video_file,
+            "caption": self.caption,
+            "base64_frame": self.base64_frame,
+            "split_video": self.split_video,
+        }
+
+
+multimodalqna_conv = Conversation(
+    system="",
+    roles=("user", "assistant"),
+    messages=(),
+    offset=0,
+    sep_style=SeparatorStyle.SINGLE,
+    sep="\n",
+    video_file=None,
+    caption=None,
+    time_of_frame_ms=None,
+    base64_frame=None,
+    split_video=None,
+)
--- a/MultimodalQnA/ui/gradio/multimodalqna_ui_gradio.py
+++ b/MultimodalQnA/ui/gradio/multimodalqna_ui_gradio.py
@@ -0,0 +1,337 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import argparse
+import os
+import shutil
+import time
+from pathlib import Path
+
+import gradio as gr
+import requests
+import uvicorn
+from conversation import multimodalqna_conv
+from fastapi import FastAPI
+from fastapi.staticfiles import StaticFiles
+from utils import build_logger, moderation_msg, server_error_msg, split_video
+
+logger = build_logger("gradio_web_server", "gradio_web_server.log")
+
+headers = {"Content-Type": "application/json"}
+
+css = """
+h1 {
+    text-align: center;
+    display:block;
+}
+"""
+
+# create a FastAPI app
+app = FastAPI()
+cur_dir = os.getcwd()
+static_dir = Path(os.path.join(cur_dir, "static/"))
+tmp_dir = Path(os.path.join(cur_dir, "split_tmp_videos/"))
+
+Path(static_dir).mkdir(parents=True, exist_ok=True)
+app.mount("/static", StaticFiles(directory=static_dir), name="static")
+
+description = "This Space lets you engage with MultimodalQnA on a video through a chat box."
+
+no_change_btn = gr.Button()
+enable_btn = gr.Button(interactive=True)
+disable_btn = gr.Button(interactive=False)
+
+
+def clear_history(state, request: gr.Request):
+    logger.info(f"clear_history. ip: {request.client.host}")
+    if state.split_video and os.path.exists(state.split_video):
+        os.remove(state.split_video)
+    state = multimodalqna_conv.copy()
+    return (state, state.to_gradio_chatbot(), "", None) + (disable_btn,) * 1
+
+
+def add_text(state, text, request: gr.Request):
+    logger.info(f"add_text. ip: {request.client.host}. len: {len(text)}")
+    if len(text) <= 0:
+        state.skip_next = True
+        return (state, state.to_gradio_chatbot(), "", None) + (no_change_btn,) * 1
+
+    text = text[:2000]  # Hard cut-off
+
+    state.append_message(state.roles[0], text)
+    state.append_message(state.roles[1], None)
+    state.skip_next = False
+    return (state, state.to_gradio_chatbot(), "") + (disable_btn,) * 1
+
+
+def http_bot(state, request: gr.Request):
+    global gateway_addr
+    logger.info(f"http_bot. ip: {request.client.host}")
+    url = gateway_addr
+    is_very_first_query = False
+    if state.skip_next:
+        # This generate call is skipped due to invalid inputs
+        path_to_sub_videos = state.get_path_to_subvideos()
+        yield (state, state.to_gradio_chatbot(), path_to_sub_videos) + (no_change_btn,) * 1
+        return
+
+    if len(state.messages) == state.offset + 2:
+        # First round of conversation
+        is_very_first_query = True
+        new_state = multimodalqna_conv.copy()
+        new_state.append_message(new_state.roles[0], state.messages[-2][1])
+        new_state.append_message(new_state.roles[1], None)
+        state = new_state
+
+    # Construct prompt
+    prompt = state.get_prompt()
+
+    # Make requests
+
+    pload = {
+        "messages": prompt,
+    }
+
+    logger.info(f"==== request ====\n{pload}")
+    logger.info(f"==== url request ====\n{gateway_addr}")
+
+    state.messages[-1][-1] = "▌"
+    yield (state, state.to_gradio_chatbot(), state.split_video) + (disable_btn,) * 1
+
+    try:
+        response = requests.post(
+            url,
+            headers=headers,
+            json=pload,
+            timeout=100,
+        )
+        print(response.status_code)
+        print(response.json())
+        if response.status_code == 200:
+            response = response.json()
+            choice = response["choices"][-1]
+            metadata = choice["metadata"]
+            message = choice["message"]["content"]
+            if (
+                is_very_first_query
+                and not state.video_file
+                and "source_video" in metadata
+                and not state.time_of_frame_ms
+                and "time_of_frame_ms" in metadata
+            ):
+                video_file = metadata["source_video"]
+                state.video_file = os.path.join(static_dir, metadata["source_video"])
+                state.time_of_frame_ms = metadata["time_of_frame_ms"]
+                splited_video_path = split_video(
+                    state.video_file, state.time_of_frame_ms, tmp_dir, f"{state.time_of_frame_ms}__{video_file}"
+                )
+                state.split_video = splited_video_path
+                print(splited_video_path)
+        else:
+            raise requests.exceptions.RequestException
+    except requests.exceptions.RequestException as e:
+        state.messages[-1][-1] = server_error_msg
+        yield (state, state.to_gradio_chatbot(), None) + (enable_btn,)
+        return
+
+    state.messages[-1][-1] = message
+    yield (state, state.to_gradio_chatbot(), state.split_video) + (enable_btn,) * 1
+
+    logger.info(f"{state.messages[-1][-1]}")
+    return
+
+
+def ingest_video_gen_transcript(filepath, request: gr.Request):
+    yield (gr.Textbox(visible=True, value="Please wait for ingesting your uploaded video into database..."))
+    basename = os.path.basename(filepath)
+    dest = os.path.join(static_dir, basename)
+    shutil.copy(filepath, dest)
+    print("Done copy uploaded file to static folder!")
+    headers = {
+        # 'Content-Type': 'multipart/form-data'
+    }
+    files = {
+        "files": open(dest, "rb"),
+    }
+    response = requests.post(dataprep_gen_transcript_addr, headers=headers, files=files)
+    print(response.status_code)
+    if response.status_code == 200:
+        response = response.json()
+        print(response)
+        yield (gr.Textbox(visible=True, value="Video ingestion is done. Saving your uploaded video..."))
+        time.sleep(2)
+        fn_no_ext = Path(dest).stem
+        if "video_id_maps" in response and fn_no_ext in response["video_id_maps"]:
+            new_dst = os.path.join(static_dir, response["video_id_maps"][fn_no_ext])
+            print(response["video_id_maps"][fn_no_ext])
+            os.rename(dest, new_dst)
+            yield (
+                gr.Textbox(
+                    visible=True,
+                    value="Congratulation! Your upload is done!\nClick the X button on the top right of the video upload box to upload another video.",
+                )
+            )
+            return
+    else:
+        yield (
+            gr.Textbox(
+                visible=True,
+                value="Something wrong!\nPlease click the X button on the top right of the video upload boxreupload your video!",
+            )
+        )
+        time.sleep(2)
+    return
+
+
+def ingest_video_gen_caption(filepath, request: gr.Request):
+    yield (gr.Textbox(visible=True, value="Please wait for ingesting your uploaded video into database..."))
+    basename = os.path.basename(filepath)
+    dest = os.path.join(static_dir, basename)
+    shutil.copy(filepath, dest)
+    print("Done copy uploaded file to static folder!")
+    headers = {
+        # 'Content-Type': 'multipart/form-data'
+    }
+    files = {
+        "files": open(dest, "rb"),
+    }
+    response = requests.post(dataprep_gen_captiono_addr, headers=headers, files=files)
+    print(response.status_code)
+    if response.status_code == 200:
+        response = response.json()
+        print(response)
+        yield (gr.Textbox(visible=True, value="Video ingestion is done. Saving your uploaded video..."))
+        time.sleep(2)
+        fn_no_ext = Path(dest).stem
+        if "video_id_maps" in response and fn_no_ext in response["video_id_maps"]:
+            new_dst = os.path.join(static_dir, response["video_id_maps"][fn_no_ext])
+            print(response["video_id_maps"][fn_no_ext])
+            os.rename(dest, new_dst)
+            yield (
+                gr.Textbox(
+                    visible=True,
+                    value="Congratulation! Your upload is done!\nClick the X button on the top right of the video upload box to upload another video.",
+                )
+            )
+            return
+    else:
+        yield (
+            gr.Textbox(
+                visible=True,
+                value="Something wrong!\nPlease click the X button on the top right of the video upload boxreupload your video!",
+            )
+        )
+        time.sleep(2)
+    return
+
+
+def clear_uploaded_video(request: gr.Request):
+    return gr.Textbox(visible=False)
+
+
+with gr.Blocks() as upload_gen_trans:
+    gr.Markdown("# Ingest Your Own Video - Utilizing Generated Transcripts")
+    gr.Markdown(
+        "Please use this interface to ingest your own video if the video has meaningful audio (e.g., announcements, discussions, etc...)"
+    )
+    with gr.Row():
+        with gr.Column(scale=6):
+            video_upload = gr.Video(sources="upload", height=512, width=512, elem_id="video_upload")
+        with gr.Column(scale=3):
+            text_upload_result = gr.Textbox(visible=False, interactive=False, label="Upload Status")
+        video_upload.upload(ingest_video_gen_transcript, [video_upload], [text_upload_result])
+        video_upload.clear(clear_uploaded_video, [], [text_upload_result])
+
+with gr.Blocks() as upload_gen_captions:
+    gr.Markdown("# Ingest Your Own Video - Utilizing Generated Captions")
+    gr.Markdown(
+        "Please use this interface to ingest your own video if the video has meaningless audio (e.g., background musics, etc...)"
+    )
+    with gr.Row():
+        with gr.Column(scale=6):
+            video_upload_cap = gr.Video(sources="upload", height=512, width=512, elem_id="video_upload_cap")
+        with gr.Column(scale=3):
+            text_upload_result_cap = gr.Textbox(visible=False, interactive=False, label="Upload Status")
+        video_upload_cap.upload(ingest_video_gen_transcript, [video_upload_cap], [text_upload_result_cap])
+        video_upload_cap.clear(clear_uploaded_video, [], [text_upload_result_cap])
+
+with gr.Blocks() as qna:
+    state = gr.State(multimodalqna_conv.copy())
+    with gr.Row():
+        with gr.Column(scale=4):
+            video = gr.Video(height=512, width=512, elem_id="video")
+        with gr.Column(scale=7):
+            chatbot = gr.Chatbot(elem_id="chatbot", label="MultimodalQnA Chatbot", height=390)
+            with gr.Row():
+                with gr.Column(scale=6):
+                    # textbox.render()
+                    textbox = gr.Textbox(
+                        # show_label=False,
+                        # container=False,
+                        label="Query",
+                        info="Enter your query here!",
+                    )
+                with gr.Column(scale=1, min_width=100):
+                    with gr.Row():
+                        submit_btn = gr.Button(value="Send", variant="primary", interactive=True)
+                    with gr.Row(elem_id="buttons") as button_row:
+                        clear_btn = gr.Button(value="🗑️  Clear", interactive=False)
+
+    clear_btn.click(
+        clear_history,
+        [
+            state,
+        ],
+        [state, chatbot, textbox, video, clear_btn],
+    )
+
+    submit_btn.click(
+        add_text,
+        [state, textbox],
+        [state, chatbot, textbox, clear_btn],
+    ).then(
+        http_bot,
+        [
+            state,
+        ],
+        [state, chatbot, video, clear_btn],
+    )
+with gr.Blocks(css=css) as demo:
+    gr.Markdown("# MultimodalQnA")
+    with gr.Tabs():
+        with gr.TabItem("MultimodalQnA With Your Videos"):
+            qna.render()
+        with gr.TabItem("Upload Your Own Videos"):
+            upload_gen_trans.render()
+        with gr.TabItem("Upload Your Own Videos"):
+            upload_gen_captions.render()
+
+demo.queue()
+app = gr.mount_gradio_app(app, demo, path="/")
+share = False
+enable_queue = True
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="0.0.0.0")
+    parser.add_argument("--port", type=int, default=5173)
+    parser.add_argument("--concurrency-count", type=int, default=20)
+    parser.add_argument("--share", action="store_true")
+
+    backend_service_endpoint = os.getenv("BACKEND_SERVICE_ENDPOINT", "http://localhost:8888/v1/multimodalqna")
+    dataprep_gen_transcript_endpoint = os.getenv(
+        "DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT", "http://localhost:6007/v1/generate_transcripts"
+    )
+    dataprep_gen_caption_endpoint = os.getenv(
+        "DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT", "http://localhost:6007/v1/generate_captions"
+    )
+    args = parser.parse_args()
+    logger.info(f"args: {args}")
+    global gateway_addr
+    gateway_addr = backend_service_endpoint
+    global dataprep_gen_transcript_addr
+    dataprep_gen_transcript_addr = dataprep_gen_transcript_endpoint
+    global dataprep_gen_captiono_addr
+    dataprep_gen_captiono_addr = dataprep_gen_caption_endpoint
+
+    uvicorn.run(app, host=args.host, port=args.port)
--- a/MultimodalQnA/ui/gradio/requirements.txt
+++ b/MultimodalQnA/ui/gradio/requirements.txt
@@ -0,0 +1,5 @@
+gradio==4.44.0
+moviepy==1.0.3
+numpy==1.26.4
+opencv-python==4.10.0.82
+Pillow==10.3.0
--- a/MultimodalQnA/ui/gradio/utils.py
+++ b/MultimodalQnA/ui/gradio/utils.py
@@ -0,0 +1,169 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import base64
+import logging
+import logging.handlers
+import os
+import sys
+from pathlib import Path
+
+import cv2
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+LOGDIR = "."
+
+server_error_msg = "**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
+moderation_msg = "YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES. PLEASE TRY AGAIN."
+
+handler = None
+save_log = False
+
+
+def build_logger(logger_name, logger_filename):
+    global handler
+
+    formatter = logging.Formatter(
+        fmt="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+
+    # Set the format of root handlers
+    if not logging.getLogger().handlers:
+        logging.basicConfig(level=logging.INFO)
+    logging.getLogger().handlers[0].setFormatter(formatter)
+
+    # Redirect stdout and stderr to loggers
+    stdout_logger = logging.getLogger("stdout")
+    stdout_logger.setLevel(logging.INFO)
+    sl = StreamToLogger(stdout_logger, logging.INFO)
+    sys.stdout = sl
+
+    stderr_logger = logging.getLogger("stderr")
+    stderr_logger.setLevel(logging.ERROR)
+    sl = StreamToLogger(stderr_logger, logging.ERROR)
+    sys.stderr = sl
+
+    # Get logger
+    logger = logging.getLogger(logger_name)
+    logger.setLevel(logging.INFO)
+
+    # Add a file handler for all loggers
+    if save_log and handler is None:
+        os.makedirs(LOGDIR, exist_ok=True)
+        filename = os.path.join(LOGDIR, logger_filename)
+        handler = logging.handlers.TimedRotatingFileHandler(filename, when="D", utc=True)
+        handler.setFormatter(formatter)
+
+        for name, item in logging.root.manager.loggerDict.items():
+            if isinstance(item, logging.Logger):
+                item.addHandler(handler)
+
+    return logger
+
+
+class StreamToLogger(object):
+    """Fake file-like stream object that redirects writes to a logger instance."""
+
+    def __init__(self, logger, log_level=logging.INFO):
+        self.terminal = sys.stdout
+        self.logger = logger
+        self.log_level = log_level
+        self.linebuf = ""
+
+    def __getattr__(self, attr):
+        return getattr(self.terminal, attr)
+
+    def write(self, buf):
+        temp_linebuf = self.linebuf + buf
+        self.linebuf = ""
+        for line in temp_linebuf.splitlines(True):
+            # From the io.TextIOWrapper docs:
+            #   On output, if newline is None, any '\n' characters written
+            #   are translated to the system default line separator.
+            # By default sys.stdout.write() expects '\n' newlines and then
+            # translates them so this is still cross platform.
+            if line[-1] == "\n":
+                self.logger.log(self.log_level, line.rstrip())
+            else:
+                self.linebuf += line
+
+    def flush(self):
+        if self.linebuf != "":
+            self.logger.log(self.log_level, self.linebuf.rstrip())
+        self.linebuf = ""
+
+
+def maintain_aspect_ratio_resize(image, width=None, height=None, inter=cv2.INTER_AREA):
+    # Grab the image size and initialize dimensions
+    dim = None
+    (h, w) = image.shape[:2]
+
+    # Return original image if no need to resize
+    if width is None and height is None:
+        return image
+
+    # We are resizing height if width is none
+    if width is None:
+        # Calculate the ratio of the height and construct the dimensions
+        r = height / float(h)
+        dim = (int(w * r), height)
+    # We are resizing width if height is none
+    else:
+        # Calculate the ratio of the width and construct the dimensions
+        r = width / float(w)
+        dim = (width, int(h * r))
+
+    # Return the resized image
+    return cv2.resize(image, dim, interpolation=inter)
+
+
+# function to split video at a timestamp
+def split_video(
+    video_path,
+    timestamp_in_ms,
+    output_video_path: str = "./public/splitted_videos",
+    output_video_name: str = "video_tmp.mp4",
+    play_before_sec: int = 5,
+    play_after_sec: int = 5,
+):
+    timestamp_in_sec = int(timestamp_in_ms) / 1000
+    # create output_video_name folder if not exist:
+    Path(output_video_path).mkdir(parents=True, exist_ok=True)
+    output_video = os.path.join(output_video_path, output_video_name)
+    with VideoFileClip(video_path) as video:
+        duration = video.duration
+        start_time = max(timestamp_in_sec - play_before_sec, 0)
+        end_time = min(timestamp_in_sec + play_after_sec, duration)
+        new = video.subclip(start_time, end_time)
+        new.write_videofile(output_video, audio_codec="aac")
+    return output_video
+
+
+def delete_split_video(video_path):
+    if os.path.exists(video_path):
+        os.remove(video_path)
+        return True
+    else:
+        print("The file does not exist")
+        return False
+
+
+def convert_img_to_base64(image):
+    "Convert image to base64 string"
+    _, buffer = cv2.imencode(".png", image)
+    encoded_string = base64.b64encode(buffer)
+    return encoded_string.decode("utf-8")
+
+
+def get_b64_frame_from_timestamp(video_path, timestamp_in_ms, maintain_aspect_ratio: bool = False):
+    print(f"video path: {video_path}")
+    vidcap = cv2.VideoCapture(video_path)
+    vidcap.set(cv2.CAP_PROP_POS_MSEC, int(timestamp_in_ms))
+    success, frame = vidcap.read()
+    if success:
+        if maintain_aspect_ratio:
+            frame = maintain_aspect_ratio_resize(frame, height=350)
+        b64_img_str = convert_img_to_base64(frame)
+        return b64_img_str
+    return None