@@ -8,6 +8,7 @@ import re
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.responses import Response
|
||||
from prometheus_fastapi_instrumentator import Instrumentator
|
||||
from uvicorn import Config, Server
|
||||
|
||||
@@ -73,6 +74,11 @@ class HTTPService(BaseService):
|
||||
"""Get the health status of this GenAI microservice."""
|
||||
return {"Service Title": self.title, "Service Description": self.description}
|
||||
|
||||
@app.get("/health")
|
||||
async def _health() -> Response:
|
||||
"""Health check."""
|
||||
return Response(status_code=200)
|
||||
|
||||
@app.get(
|
||||
path="/v1/statistics",
|
||||
summary="Get the statistics of GenAI services",
|
||||
|
||||
@@ -11,7 +11,7 @@ apt-get install libreoffice
|
||||
|
||||
## Use LVM (Large Vision Model) for Summarizing Image Data
|
||||
|
||||
Occasionally unstructured data will contain image data, to convert the image data to the text data, LVM can be used to summarize the image. To leverage LVM, please refer to this [readme](../lvms/llava/README.md) to start the LVM microservice first and then set the below environment variable, before starting any dataprep microservice.
|
||||
Occasionally unstructured data will contain image data, to convert the image data to the text data, LVM can be used to summarize the image. To leverage LVM, please refer to this [readme](../lvms/src/README.md) to start the LVM microservice first and then set the below environment variable, before starting any dataprep microservice.
|
||||
|
||||
```bash
|
||||
export SUMMARIZE_IMAGE_VIA_LVM=1
|
||||
|
||||
@@ -38,7 +38,7 @@ export PYTHONPATH=${path_to_comps}
|
||||
|
||||
This is required only if you are going to consume the _generate_captions_ API of this microservice as in [Section 4.3](#43-consume-generate_captions-api).
|
||||
|
||||
Please refer to this [readme](../../../../lvms/llava/README.md) to start the LVM microservice.
|
||||
Please refer to this [readme](../../../../lvms/src/README.md) to start the LVM microservice.
|
||||
After LVM is up, set up environment variables.
|
||||
|
||||
```bash
|
||||
@@ -64,7 +64,7 @@ Please refer to this [readme](../../../../vectorstores/redis/README.md).
|
||||
|
||||
This is required only if you are going to consume the _generate_captions_ API of this microservice as described [here](#43-consume-generate_captions-api).
|
||||
|
||||
Please refer to this [readme](../../../../lvms/llava/README.md) to start the LVM microservice.
|
||||
Please refer to this [readme](../../../../lvms/src/README.md) to start the LVM microservice.
|
||||
After LVM is up, set up environment variables.
|
||||
|
||||
```bash
|
||||
|
||||
@@ -11,7 +11,7 @@ apt-get install libreoffice
|
||||
|
||||
## Use LVM (Large Vision Model) for Summarizing Image Data
|
||||
|
||||
Occasionally unstructured data will contain image data, to convert the image data to the text data, LVM can be used to summarize the image. To leverage LVM, please refer to this [readme](../../lvms/llava/README.md) to start the LVM microservice first and then set the below environment variable, before starting any dataprep microservice.
|
||||
Occasionally unstructured data will contain image data, to convert the image data to the text data, LVM can be used to summarize the image. To leverage LVM, please refer to this [readme](../../lvms/src/README.md) to start the LVM microservice first and then set the below environment variable, before starting any dataprep microservice.
|
||||
|
||||
```bash
|
||||
export SUMMARIZE_IMAGE_VIA_LVM=1
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
FROM python:3.11-slim
|
||||
|
||||
ARG ARCH="cpu" # Set this to "cpu" or "gpu"
|
||||
|
||||
# Set environment variables
|
||||
ENV LANG=en_US.UTF-8
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
mkdir -p /home/user && \
|
||||
chown -R user /home/user/
|
||||
USER user
|
||||
|
||||
COPY comps /home/user/comps
|
||||
|
||||
RUN pip install --no-cache-dir --upgrade pip setuptools && \
|
||||
if [ ${ARCH} = "cpu" ]; then \
|
||||
pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/lvms/llava/requirements.txt; \
|
||||
else \
|
||||
pip install --no-cache-dir -r /home/user/comps/lvms/llava/requirements.txt; \
|
||||
fi
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user
|
||||
|
||||
WORKDIR /home/user/comps/lvms/llava
|
||||
|
||||
ENTRYPOINT ["python", "lvm.py"]
|
||||
@@ -1,2 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -1,22 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
import base64
|
||||
import json
|
||||
from io import BytesIO
|
||||
|
||||
import PIL.Image
|
||||
import requests
|
||||
|
||||
image_path = "https://avatars.githubusercontent.com/u/39623753?v=4"
|
||||
|
||||
image = PIL.Image.open(requests.get(image_path, stream=True, timeout=3000).raw)
|
||||
buffered = BytesIO()
|
||||
image.save(buffered, format="PNG")
|
||||
img_b64_str = base64.b64encode(buffered.getvalue()).decode()
|
||||
|
||||
endpoint = "http://localhost:9399/v1/lvm"
|
||||
inputs = {"image": img_b64_str, "prompt": "What is this?", "max_new_tokens": 32}
|
||||
response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None})
|
||||
print(response.json())
|
||||
@@ -1,103 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from typing import Union
|
||||
|
||||
import requests
|
||||
from fastapi import HTTPException
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
from template import ChatTemplate
|
||||
|
||||
from comps import (
|
||||
CustomLogger,
|
||||
LVMDoc,
|
||||
LVMSearchedMultimodalDoc,
|
||||
MetadataTextDoc,
|
||||
ServiceType,
|
||||
TextDoc,
|
||||
opea_microservices,
|
||||
register_microservice,
|
||||
register_statistics,
|
||||
statistics_dict,
|
||||
)
|
||||
|
||||
logger = CustomLogger("lvm")
|
||||
logflag = os.getenv("LOGFLAG", False)
|
||||
|
||||
|
||||
@register_microservice(
|
||||
name="opea_service@lvm",
|
||||
service_type=ServiceType.LVM,
|
||||
endpoint="/v1/lvm",
|
||||
host="0.0.0.0",
|
||||
port=9399,
|
||||
)
|
||||
@register_statistics(names=["opea_service@lvm"])
|
||||
async def lvm(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> Union[TextDoc, MetadataTextDoc]:
|
||||
if logflag:
|
||||
logger.info(request)
|
||||
start = time.time()
|
||||
if isinstance(request, LVMSearchedMultimodalDoc):
|
||||
if logflag:
|
||||
logger.info("[LVMSearchedMultimodalDoc ] input from retriever microservice")
|
||||
retrieved_metadatas = request.metadata
|
||||
if retrieved_metadatas is None or len(retrieved_metadatas) == 0:
|
||||
# there is no video segments retrieved.
|
||||
# Raise HTTPException status_code 204
|
||||
# due to llava-tgi-gaudi should receive image as input; Otherwise, the generated text is bad.
|
||||
raise HTTPException(status_code=500, detail="There is no video segments retrieved given the query!")
|
||||
|
||||
img_b64_str = retrieved_metadatas[0]["b64_img_str"]
|
||||
has_image = img_b64_str != ""
|
||||
initial_query = request.initial_query
|
||||
context = retrieved_metadatas[0]["transcript_for_inference"]
|
||||
prompt = initial_query
|
||||
if request.chat_template is None:
|
||||
prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, context, has_image)
|
||||
else:
|
||||
prompt_template = PromptTemplate.from_template(request.chat_template)
|
||||
input_variables = prompt_template.input_variables
|
||||
if sorted(input_variables) == ["context", "question"]:
|
||||
prompt = prompt_template.format(question=initial_query, context=context)
|
||||
else:
|
||||
logger.info(
|
||||
f"[ LVMSearchedMultimodalDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']"
|
||||
)
|
||||
max_new_tokens = request.max_new_tokens
|
||||
if logflag:
|
||||
logger.info(f"prompt generated for [LVMSearchedMultimodalDoc ] input from retriever microservice: {prompt}")
|
||||
|
||||
else:
|
||||
img_b64_str = request.image
|
||||
prompt = request.prompt
|
||||
max_new_tokens = request.max_new_tokens
|
||||
|
||||
inputs = {"img_b64_str": img_b64_str, "prompt": prompt, "max_new_tokens": max_new_tokens}
|
||||
# forward to the LLaVA server
|
||||
response = requests.post(url=f"{lvm_endpoint}/generate", data=json.dumps(inputs), proxies={"http": None})
|
||||
|
||||
statistics_dict["opea_service@lvm"].append_latency(time.time() - start, None)
|
||||
result = response.json()["text"]
|
||||
if logflag:
|
||||
logger.info(result)
|
||||
if isinstance(request, LVMSearchedMultimodalDoc):
|
||||
retrieved_metadata = request.metadata[0]
|
||||
return_metadata = {} # this metadata will be used to construct proof for generated text
|
||||
return_metadata["video_id"] = retrieved_metadata["video_id"]
|
||||
return_metadata["source_video"] = retrieved_metadata["source_video"]
|
||||
return_metadata["time_of_frame_ms"] = retrieved_metadata["time_of_frame_ms"]
|
||||
return_metadata["transcript_for_inference"] = retrieved_metadata["transcript_for_inference"]
|
||||
return MetadataTextDoc(text=result, metadata=return_metadata)
|
||||
else:
|
||||
return TextDoc(text=result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399")
|
||||
|
||||
logger.info("[LVM] LVM initialized.")
|
||||
opea_microservices["opea_service@lvm"].start()
|
||||
@@ -1,17 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
class ChatTemplate:
|
||||
|
||||
@staticmethod
|
||||
def generate_multimodal_rag_on_videos_prompt(question: str, context: str, has_image: bool = False):
|
||||
|
||||
if has_image:
|
||||
template = """The transcript associated with the image is '{context}'. {question}"""
|
||||
else:
|
||||
template = (
|
||||
"""Refer to the following results obtained from the local knowledge base: '{context}'. {question}"""
|
||||
)
|
||||
|
||||
return template.format(context=context, question=question)
|
||||
23
comps/lvms/src/Dockerfile
Normal file
23
comps/lvms/src/Dockerfile
Normal file
@@ -0,0 +1,23 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
FROM python:3.11-slim
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
mkdir -p /home/user && \
|
||||
chown -R user /home/user/
|
||||
|
||||
USER user
|
||||
|
||||
ENV LANG=C.UTF-8
|
||||
|
||||
COPY comps /home/user/comps
|
||||
|
||||
RUN pip install --no-cache-dir --upgrade pip setuptools && \
|
||||
pip install --no-cache-dir -r /home/user/comps/lvms/src/requirements.txt
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user
|
||||
|
||||
WORKDIR /home/user/comps/lvms/src
|
||||
|
||||
ENTRYPOINT ["python", "opea_lvm_microservice.py"]
|
||||
31
comps/lvms/src/README.md
Normal file
31
comps/lvms/src/README.md
Normal file
@@ -0,0 +1,31 @@
|
||||
# LVM Microservice
|
||||
|
||||
Visual Question and Answering is one of the multimodal tasks empowered by LVMs (Large Visual Models). This microservice supports visual Q&A by using LLaVA as the base large visual model. It accepts two inputs: a prompt and an image. It outputs the answer to the prompt about the image.
|
||||
|
||||
## Build Image & Run
|
||||
|
||||
Before this, you have to start the [dependency](./integrations/dependency/) service based on your demands.
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/lvm:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/Dockerfile .
|
||||
# Change LVM_ENDPOINT to you dependency service endpoint
|
||||
docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://localhost:8399 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9399:9399 --ipc=host opea/lvm:comps
|
||||
```
|
||||
|
||||
## Test
|
||||
|
||||
- LLaVA & llama-vision & PredictionGuard & TGI LLaVA
|
||||
|
||||
```bash
|
||||
# curl with an image and a prompt
|
||||
http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json'
|
||||
|
||||
# curl with only the prompt
|
||||
http_proxy="" curl http://localhost:9399/v1/lvm --silent --write-out "HTTPSTATUS:%{http_code}" -XPOST -d '{"image": "", "prompt":"What is deep learning?"}' -H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
- video-llama
|
||||
|
||||
```bash
|
||||
http_proxy="" curl -X POST http://localhost:9399/v1/lvm -d '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 9,"prompt":"What is the person doing?","max_new_tokens": 150}' -H 'Content-Type: application/json'
|
||||
```
|
||||
@@ -21,12 +21,12 @@ RUN git lfs install
|
||||
|
||||
COPY comps /home/user/comps
|
||||
|
||||
RUN cd /home/user/comps/lvms/llama-vision/ && \
|
||||
RUN cd /home/user/comps/lvms/src/integrations/dependency/llama-vision/ && \
|
||||
pip install --no-cache-dir -r requirements.txt && \
|
||||
pip install --no-cache-dir --upgrade Pillow
|
||||
|
||||
ENV PYTHONPATH=/root:/home/user
|
||||
|
||||
WORKDIR /home/user/comps/lvms/llama-vision/
|
||||
WORKDIR /home/user/comps/lvms/src/integrations/dependency/llama-vision/
|
||||
|
||||
ENTRYPOINT ["python", "lvm.py"]
|
||||
@@ -21,12 +21,12 @@ RUN git lfs install
|
||||
|
||||
COPY comps /home/user/comps
|
||||
|
||||
RUN cd /home/user/comps/lvms/llama-vision/ && \
|
||||
RUN cd /home/user/comps/lvms/src/integrations/dependency/llama-vision/ && \
|
||||
pip install --no-cache-dir -r requirements.txt && \
|
||||
pip install --no-cache-dir --upgrade Pillow
|
||||
|
||||
ENV PYTHONPATH=/root:/home/user
|
||||
|
||||
WORKDIR /home/user/comps/lvms/llama-vision/
|
||||
WORKDIR /home/user/comps/lvms/src/integrations/dependency/llama-vision/
|
||||
|
||||
ENTRYPOINT ["python", "lvm_guard.py"]
|
||||
@@ -22,13 +22,13 @@ COPY comps /home/user/comps
|
||||
RUN pip install --no-cache-dir git+https://github.com/HabanaAI/DeepSpeed.git@1.17.1
|
||||
RUN pip install --no-cache-dir git+https://github.com/huggingface/optimum-habana@v1.13.2
|
||||
|
||||
RUN cd /home/user/comps/lvms/llama-vision/ \
|
||||
RUN cd /home/user/comps/lvms/src/integrations/dependency/llama-vision/ \
|
||||
pip install --no-cache-dir --upgrade pip && \
|
||||
bash update && \
|
||||
pip install --no-cache-dir -r /home/user/comps/lvms/llama-vision/requirements_tp.txt
|
||||
pip install --no-cache-dir -r /home/user/comps/lvms/src/integrations/dependency/llama-vision/requirements_tp.txt
|
||||
|
||||
ENV PYTHONPATH=/root:/home/user
|
||||
|
||||
WORKDIR /home/user/comps/lvms/llama-vision/
|
||||
WORKDIR /home/user/comps/lvms/src/integrations/dependency/llama-vision/
|
||||
|
||||
ENTRYPOINT ["bash", "run_tp.sh"]
|
||||
@@ -10,7 +10,7 @@ Visual Question and Answering is one of the multimodal tasks empowered by LVMs (
|
||||
|
||||
```bash
|
||||
cd ../../../
|
||||
docker build -t opea/lvm-llama-vision:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llama-vision/Dockerfile .
|
||||
docker build -t opea/lvm-llama-vision:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/llama-vision/Dockerfile .
|
||||
```
|
||||
|
||||
#### Build Llama Vision Model with deepspeed
|
||||
@@ -18,14 +18,14 @@ docker build -t opea/lvm-llama-vision:latest --build-arg https_proxy=$https_prox
|
||||
If you need to build the image for 90B models, use the following command:
|
||||
|
||||
```bash
|
||||
docker build -t opea/lvm-llama-vision-tp:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llama-vision/Dockerfile_tp .
|
||||
docker build -t opea/lvm-llama-vision-tp:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/llama-vision/Dockerfile_tp .
|
||||
```
|
||||
|
||||
#### Build Llama Vision Guard Model
|
||||
|
||||
```bash
|
||||
cd ../../../
|
||||
docker build -t opea/lvm-llama-vision-guard:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llama-vision/Dockerfile_guard .
|
||||
docker build -t opea/lvm-llama-vision-guard:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/llama-vision/Dockerfile_guard .
|
||||
```
|
||||
|
||||
### Start Llama LVM Service
|
||||
@@ -13,10 +13,10 @@ ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana
|
||||
COPY comps /home/user/comps
|
||||
|
||||
RUN pip install --no-cache-dir --upgrade pip setuptools && \
|
||||
pip install --no-cache-dir -r /home/user/comps/lvms/llava/requirements.txt
|
||||
pip install --no-cache-dir -r /home/user/comps/lvms/src/integrations/dependency/llava/requirements.txt
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user
|
||||
|
||||
WORKDIR /home/user/comps/lvms/llava/dependency
|
||||
WORKDIR /home/user/comps/lvms/src/integrations/dependency/llava
|
||||
|
||||
ENTRYPOINT ["python", "llava_server.py", "--device", "cpu"]
|
||||
@@ -17,11 +17,11 @@ COPY comps /home/user/comps
|
||||
|
||||
# Install requirements and optimum habana
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/user/comps/lvms/llava/requirements.txt && \
|
||||
pip install --no-cache-dir -r /home/user/comps/lvms/src/integrations/dependency/llava/requirements.txt && \
|
||||
pip install --no-cache-dir optimum[habana]
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user
|
||||
|
||||
WORKDIR /home/user/comps/lvms/llava/dependency
|
||||
WORKDIR /home/user/comps/lvms/src/integrations/dependency/llava/
|
||||
|
||||
ENTRYPOINT ["python", "llava_server.py"]
|
||||
@@ -16,7 +16,6 @@ pip install -r requirements.txt
|
||||
|
||||
```bash
|
||||
# Start LLaVA service
|
||||
cd dependency/
|
||||
nohup python llava_server.py --device=cpu &
|
||||
# Wait until the server is up
|
||||
# Test
|
||||
@@ -30,23 +29,12 @@ pip install optimum[habana]
|
||||
```
|
||||
|
||||
```bash
|
||||
cd dependency/
|
||||
# Start LLaVA service
|
||||
nohup python llava_server.py &
|
||||
# Test
|
||||
python check_llava_server.py
|
||||
```
|
||||
|
||||
### 1.3 Start Image To Text Service/Test
|
||||
|
||||
```bash
|
||||
cd ..
|
||||
# Start the OPEA Microservice
|
||||
python lvm.py
|
||||
# Test
|
||||
python check_lvm.py
|
||||
```
|
||||
|
||||
## 🚀2. Start Microservice with Docker (Option 2)
|
||||
|
||||
### 2.1 Build Images
|
||||
@@ -57,21 +45,14 @@ python check_lvm.py
|
||||
|
||||
```bash
|
||||
cd ../../../
|
||||
docker build -t opea/lvm-llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/dependency/Dockerfile .
|
||||
docker build -t opea/lvm-llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/llava/Dockerfile .
|
||||
```
|
||||
|
||||
- Gaudi2 HPU
|
||||
|
||||
```bash
|
||||
cd ../../../
|
||||
docker build -t opea/lvm-llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/dependency/Dockerfile.intel_hpu .
|
||||
```
|
||||
|
||||
#### 2.1.2 LVM Service Image
|
||||
|
||||
```bash
|
||||
cd ../../../
|
||||
docker build -t opea/lvm-llava-svc:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/Dockerfile .
|
||||
docker build -t opea/lvm-llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu .
|
||||
```
|
||||
|
||||
### 2.2 Start LLaVA and LVM Service
|
||||
@@ -90,25 +71,9 @@ docker run -p 8399:8399 -e http_proxy=$http_proxy --ipc=host -e https_proxy=$htt
|
||||
docker run -p 8399:8399 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/lvm-llava:latest
|
||||
```
|
||||
|
||||
#### 2.2.2 Start LVM service
|
||||
#### 2.2.2 Test
|
||||
|
||||
```bash
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
docker run -p 9399:9399 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LVM_ENDPOINT=http://$ip_address:8399 opea/lvm-llava-svc:latest
|
||||
```
|
||||
|
||||
#### 2.2.3 Test
|
||||
|
||||
```bash
|
||||
# Use curl/python
|
||||
|
||||
# curl with an image and a prompt
|
||||
http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json'
|
||||
|
||||
# curl with a prompt only (no image)
|
||||
http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "", "prompt":"What is deep learning?"}' -H 'Content-Type: application/json'
|
||||
|
||||
# python
|
||||
python check_lvm.py
|
||||
# Test
|
||||
python check_llava_server.py
|
||||
```
|
||||
@@ -9,10 +9,10 @@ ENV LANG=en_US.UTF-8
|
||||
COPY comps /home/comps
|
||||
|
||||
RUN pip install --no-cache-dir --upgrade pip setuptools && \
|
||||
pip install --no-cache-dir -r /home/comps/lvms/predictionguard/requirements.txt
|
||||
pip install --no-cache-dir -r /home/comps/lvms/src/integrations/dependency/predictionguard/requirements.txt
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home
|
||||
|
||||
WORKDIR /home/comps/lvms/predictionguard
|
||||
WORKDIR /home/comps/lvms/src/integrations/dependency/predictionguard
|
||||
|
||||
ENTRYPOINT ["python", "lvm.py"]
|
||||
@@ -32,7 +32,7 @@ export PREDICTIONGUARD_API_KEY=${your_predictionguard_api_key}
|
||||
|
||||
```bash
|
||||
cd ../../..
|
||||
docker build -t opea/lvm-predictionguard:latest -f comps/lvms/predictionguard/Dockerfile .
|
||||
docker build -t opea/lvm-predictionguard:latest -f comps/lvms/src/integrations/dependency/predictionguard/Dockerfile .
|
||||
```
|
||||
|
||||
### 2.2 Start Service
|
||||
@@ -17,10 +17,10 @@ RUN mkdir /home/user/model && chown user:user -R /home/user/model
|
||||
USER user
|
||||
|
||||
COPY --chown=user:user comps /home/user/comps
|
||||
WORKDIR /home/user/comps/lvms/video-llama/dependency
|
||||
WORKDIR /home/user/comps/lvms/src/integrations/dependency/video-llama/
|
||||
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/user/comps/lvms/video-llama/dependency/requirements.txt
|
||||
pip install --no-cache-dir -r /home/user/comps/lvms/src/integrations/dependency/video-llama/requirements.txt
|
||||
|
||||
ARG VIDEO_LLAMA_REPO=https://github.com/DAMO-NLP-SG/Video-LLaMA.git
|
||||
ARG VIDEO_LLAMA_COMMIT=0adb19e
|
||||
@@ -9,9 +9,9 @@ This is a Docker-based microservice that runs Video-Llama as a Large Vision Mode
|
||||
```bash
|
||||
cd GenAIComps
|
||||
# Video-Llama Server Image
|
||||
docker build --no-cache -t opea/video-llama-lvm-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/video-llama/dependency/Dockerfile .
|
||||
docker build --no-cache -t opea/lvm-video-llama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/video-llama/Dockerfile .
|
||||
# LVM Service Image
|
||||
docker build --no-cache -t opea/lvm-video-llama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/video-llama/Dockerfile .
|
||||
docker build --no-cache -t opea/lvm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/video-llama/Dockerfile .
|
||||
```
|
||||
|
||||
### 1.2 Start Video-Llama and LVM Services
|
||||
@@ -24,9 +24,9 @@ export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export no_proxy=$no_proxy,${ip_address}
|
||||
export LVM_ENDPOINT=http://${ip_address}:9009
|
||||
# Start service
|
||||
docker compose -f comps/lvms/video-llama/docker_compose.yaml up -d
|
||||
docker compose -f comps/lvms/src/integrations/dependency/video-llama/docker_compose.yaml up -d
|
||||
# it should take about 1.5 hours for the model to download in the video-llama server, assuming a maximum download speed of 100 Mbps
|
||||
until docker logs video-llama-lvm-server 2>&1 | grep -q "Uvicorn running on"; do
|
||||
until docker logs lvm-video-llama 2>&1 | grep -q "Uvicorn running on"; do
|
||||
sleep 5m
|
||||
done
|
||||
```
|
||||
@@ -34,7 +34,7 @@ done
|
||||
If you've run the microservice before, it's recommended to keep the downloaded model so it won't be redownloaded each time you run it. To achieve this, you need to modify the following configuration:
|
||||
|
||||
```yaml
|
||||
# comps/lvms/video-llama/docker_compose.yaml
|
||||
# comps/lvms/src/integrations/dependency/video-llama/docker_compose.yaml
|
||||
services:
|
||||
lvm-video-llama:
|
||||
...
|
||||
@@ -49,13 +49,6 @@ services:
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
## check video-llama
|
||||
http_proxy="" curl -X POST "http://${ip_address}:9009/generate?video_url=https%3A%2F%2Fgithub.com%2FDAMO-NLP-SG%2FVideo-LLaMA%2Fraw%2Fmain%2Fexamples%2Fsilence_girl.mp4&start=0.0&duration=9&prompt=What%20is%20the%20person%20doing%3F&max_new_tokens=150" -H "accept: */*" -d ''
|
||||
|
||||
## check lvm
|
||||
http_proxy="" curl -X POST http://${ip_address}:9000/v1/lvm -d '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 9,"prompt":"What is the person doing?","max_new_tokens": 150}' -H 'Content-Type: application/json'
|
||||
|
||||
# or use python
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
python comps/lvms/video-llama/check_lvm.py
|
||||
```
|
||||
|
||||
## ♻️ 3. Clean
|
||||
@@ -4,8 +4,8 @@
|
||||
version: "3"
|
||||
services:
|
||||
lvm-video-llama:
|
||||
image: opea/video-llama-lvm-server:latest
|
||||
container_name: video-llama-lvm-server
|
||||
image: opea/lvm-video-llama:latest
|
||||
container_name: lvm-video-llama
|
||||
ports:
|
||||
- "9009:9009"
|
||||
ipc: host
|
||||
@@ -29,7 +29,7 @@ timm
|
||||
torch==1.13.1 --index-url https://download.pytorch.org/whl/cpu
|
||||
torchaudio==0.13.1 --index-url https://download.pytorch.org/whl/cpu
|
||||
torchvision==0.14.1 --index-url https://download.pytorch.org/whl/cpu
|
||||
transformers
|
||||
transformers==4.47.1
|
||||
uvicorn
|
||||
validators
|
||||
webdataset
|
||||
@@ -37,7 +37,7 @@ context_db = None
|
||||
streamer = None
|
||||
chat = None
|
||||
|
||||
VIDEO_DIR = "/home/user/comps/lvms/video-llama/dependency/data"
|
||||
VIDEO_DIR = "/home/user/comps/lvms/src/integrations/dependency/video-llama/data"
|
||||
|
||||
CFG_PATH = "video_llama_config/video_llama_eval_only_vl.yaml"
|
||||
MODEL_TYPE = "llama_v2"
|
||||
60
comps/lvms/src/integrations/opea_llama_vision.py
Normal file
60
comps/lvms/src/integrations/opea_llama_vision.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import Union
|
||||
|
||||
import requests
|
||||
|
||||
from comps import CustomLogger, LVMDoc, OpeaComponent, OpeaComponentRegistry, ServiceType, TextDoc
|
||||
|
||||
logger = CustomLogger("opea_llama_vision")
|
||||
logflag = os.getenv("LOGFLAG", False)
|
||||
|
||||
|
||||
@OpeaComponentRegistry.register("OPEA_LLAVA_VISION_LVM")
|
||||
class OpeaLlamaVisionLvm(OpeaComponent):
|
||||
"""A specialized LVM component derived from OpeaComponent for LLaMA-Vision services."""
|
||||
|
||||
def __init__(self, name: str, description: str, config: dict = None):
|
||||
super().__init__(name, ServiceType.LVM.name.lower(), description, config)
|
||||
self.base_url = os.getenv("LVM_ENDPOINT", "http://localhost:9399")
|
||||
health_status = self.check_health()
|
||||
if not health_status:
|
||||
logger.error("specialized health check failed.")
|
||||
|
||||
async def invoke(
|
||||
self,
|
||||
request: Union[LVMDoc],
|
||||
) -> Union[TextDoc]:
|
||||
"""Involve the LVM service to generate answer for the provided input."""
|
||||
if logflag:
|
||||
logger.info(request)
|
||||
|
||||
inputs = {"image": request.image, "prompt": request.prompt, "max_new_tokens": request.max_new_tokens}
|
||||
# forward to the LLaMA Vision server
|
||||
response = requests.post(url=f"{self.base_url}/v1/lvm", data=json.dumps(inputs), proxies={"http": None})
|
||||
|
||||
result = response.json()["text"]
|
||||
if logflag:
|
||||
logger.info(result)
|
||||
|
||||
return TextDoc(text=result)
|
||||
|
||||
def check_health(self) -> bool:
|
||||
"""Checks the health of the embedding service.
|
||||
|
||||
Returns:
|
||||
bool: True if the service is reachable and healthy, False otherwise.
|
||||
"""
|
||||
try:
|
||||
response = requests.get(f"{self.base_url}/health")
|
||||
if response.status_code == 200:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
except Exception as e:
|
||||
# Handle connection errors, timeouts, etc.
|
||||
logger.error(f"Health check failed: {e}")
|
||||
return False
|
||||
130
comps/lvms/src/integrations/opea_llava.py
Normal file
130
comps/lvms/src/integrations/opea_llava.py
Normal file
@@ -0,0 +1,130 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import Union
|
||||
|
||||
import requests
|
||||
from fastapi import HTTPException
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
|
||||
from comps import (
|
||||
CustomLogger,
|
||||
LVMDoc,
|
||||
LVMSearchedMultimodalDoc,
|
||||
MetadataTextDoc,
|
||||
OpeaComponent,
|
||||
OpeaComponentRegistry,
|
||||
ServiceType,
|
||||
TextDoc,
|
||||
)
|
||||
|
||||
logger = CustomLogger("opea_llava")
|
||||
logflag = os.getenv("LOGFLAG", False)
|
||||
|
||||
|
||||
class ChatTemplate:
|
||||
|
||||
@staticmethod
|
||||
def generate_multimodal_rag_on_videos_prompt(question: str, context: str, has_image: bool = False):
|
||||
|
||||
if has_image:
|
||||
template = """The transcript associated with the image is '{context}'. {question}"""
|
||||
else:
|
||||
template = (
|
||||
"""Refer to the following results obtained from the local knowledge base: '{context}'. {question}"""
|
||||
)
|
||||
|
||||
return template.format(context=context, question=question)
|
||||
|
||||
|
||||
@OpeaComponentRegistry.register("OPEA_LLAVA_LVM")
|
||||
class OpeaLlavaLvm(OpeaComponent):
|
||||
"""A specialized LVM component derived from OpeaComponent for LLaVA LVM services."""
|
||||
|
||||
def __init__(self, name: str, description: str, config: dict = None):
|
||||
super().__init__(name, ServiceType.LVM.name.lower(), description, config)
|
||||
self.base_url = os.getenv("LVM_ENDPOINT", "http://localhost:8399")
|
||||
health_status = self.check_health()
|
||||
if not health_status:
|
||||
logger.error("OpeaLlavaLvm health check failed.")
|
||||
|
||||
async def invoke(
|
||||
self,
|
||||
request: Union[LVMDoc, LVMSearchedMultimodalDoc],
|
||||
) -> Union[TextDoc, MetadataTextDoc]:
|
||||
"""Involve the LVM service to generate answer for the provided input."""
|
||||
if logflag:
|
||||
logger.info(request)
|
||||
if isinstance(request, LVMSearchedMultimodalDoc):
|
||||
if logflag:
|
||||
logger.info("[LVMSearchedMultimodalDoc ] input from retriever microservice")
|
||||
retrieved_metadatas = request.metadata
|
||||
if retrieved_metadatas is None or len(retrieved_metadatas) == 0:
|
||||
# there is no video segments retrieved.
|
||||
# Raise HTTPException status_code 204
|
||||
# due to llava-tgi-gaudi should receive image as input; Otherwise, the generated text is bad.
|
||||
raise HTTPException(status_code=500, detail="There is no video segments retrieved given the query!")
|
||||
|
||||
img_b64_str = retrieved_metadatas[0]["b64_img_str"]
|
||||
has_image = img_b64_str != ""
|
||||
initial_query = request.initial_query
|
||||
context = retrieved_metadatas[0]["transcript_for_inference"]
|
||||
prompt = initial_query
|
||||
if request.chat_template is None:
|
||||
prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, context, has_image)
|
||||
else:
|
||||
prompt_template = PromptTemplate.from_template(request.chat_template)
|
||||
input_variables = prompt_template.input_variables
|
||||
if sorted(input_variables) == ["context", "question"]:
|
||||
prompt = prompt_template.format(question=initial_query, context=context)
|
||||
else:
|
||||
logger.info(
|
||||
f"[ LVMSearchedMultimodalDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']"
|
||||
)
|
||||
max_new_tokens = request.max_new_tokens
|
||||
if logflag:
|
||||
logger.info(
|
||||
f"prompt generated for [LVMSearchedMultimodalDoc ] input from retriever microservice: {prompt}"
|
||||
)
|
||||
|
||||
else:
|
||||
img_b64_str = request.image
|
||||
prompt = request.prompt
|
||||
max_new_tokens = request.max_new_tokens
|
||||
|
||||
inputs = {"img_b64_str": img_b64_str, "prompt": prompt, "max_new_tokens": max_new_tokens}
|
||||
# forward to the LLaVA server
|
||||
response = requests.post(url=f"{self.base_url}/generate", data=json.dumps(inputs), proxies={"http": None})
|
||||
|
||||
result = response.json()["text"]
|
||||
if logflag:
|
||||
logger.info(result)
|
||||
if isinstance(request, LVMSearchedMultimodalDoc):
|
||||
retrieved_metadata = request.metadata[0]
|
||||
return_metadata = {} # this metadata will be used to construct proof for generated text
|
||||
return_metadata["video_id"] = retrieved_metadata["video_id"]
|
||||
return_metadata["source_video"] = retrieved_metadata["source_video"]
|
||||
return_metadata["time_of_frame_ms"] = retrieved_metadata["time_of_frame_ms"]
|
||||
return_metadata["transcript_for_inference"] = retrieved_metadata["transcript_for_inference"]
|
||||
return MetadataTextDoc(text=result, metadata=return_metadata)
|
||||
else:
|
||||
return TextDoc(text=result)
|
||||
|
||||
def check_health(self) -> bool:
|
||||
"""Checks the health of the embedding service.
|
||||
|
||||
Returns:
|
||||
bool: True if the service is reachable and healthy, False otherwise.
|
||||
"""
|
||||
try:
|
||||
response = requests.get(f"{self.base_url}/health")
|
||||
if response.status_code == 200:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
except Exception as e:
|
||||
# Handle connection errors, timeouts, etc.
|
||||
logger.error(f"Health check failed: {e}")
|
||||
return False
|
||||
60
comps/lvms/src/integrations/opea_predictionguard.py
Normal file
60
comps/lvms/src/integrations/opea_predictionguard.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import Union
|
||||
|
||||
import requests
|
||||
|
||||
from comps import CustomLogger, LVMDoc, OpeaComponent, OpeaComponentRegistry, ServiceType, TextDoc
|
||||
|
||||
logger = CustomLogger("opea_predictionguard")
|
||||
logflag = os.getenv("LOGFLAG", False)
|
||||
|
||||
|
||||
@OpeaComponentRegistry.register("OPEA_PREDICTION_GUARD_LVM")
|
||||
class OpeaPredictionguardLvm(OpeaComponent):
|
||||
"""A specialized LVM component derived from OpeaComponent for Predictionguard services."""
|
||||
|
||||
def __init__(self, name: str, description: str, config: dict = None):
|
||||
super().__init__(name, ServiceType.LVM.name.lower(), description, config)
|
||||
self.base_url = os.getenv("LVM_ENDPOINT", "http://localhost:9399")
|
||||
health_status = self.check_health()
|
||||
if not health_status:
|
||||
logger.error("OpeaPredictionguardLvm health check failed.")
|
||||
|
||||
async def invoke(
|
||||
self,
|
||||
request: Union[LVMDoc],
|
||||
) -> Union[TextDoc]:
|
||||
"""Involve the LVM service to generate answer for the provided input."""
|
||||
if logflag:
|
||||
logger.info(request)
|
||||
|
||||
inputs = {"image": request.image, "prompt": request.prompt, "max_new_tokens": request.max_new_tokens}
|
||||
# forward to the PredictionGuard server
|
||||
response = requests.post(url=f"{self.base_url}/v1/lvm", data=json.dumps(inputs), proxies={"http": None})
|
||||
|
||||
result = response.json()["text"]
|
||||
if logflag:
|
||||
logger.info(result)
|
||||
|
||||
return TextDoc(text=result)
|
||||
|
||||
def check_health(self) -> bool:
|
||||
"""Checks the health of the embedding service.
|
||||
|
||||
Returns:
|
||||
bool: True if the service is reachable and healthy, False otherwise.
|
||||
"""
|
||||
try:
|
||||
response = requests.get(f"{self.base_url}/health")
|
||||
if response.status_code == 200:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
except Exception as e:
|
||||
# Handle connection errors, timeouts, etc.
|
||||
logger.error(f"Health check failed: {e}")
|
||||
return False
|
||||
187
comps/lvms/src/integrations/opea_tgi_llava.py
Normal file
187
comps/lvms/src/integrations/opea_tgi_llava.py
Normal file
@@ -0,0 +1,187 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
import time
|
||||
from typing import Union
|
||||
|
||||
import requests
|
||||
from fastapi import HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
from huggingface_hub import AsyncInferenceClient
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
|
||||
from comps import (
|
||||
CustomLogger,
|
||||
LVMDoc,
|
||||
LVMSearchedMultimodalDoc,
|
||||
MetadataTextDoc,
|
||||
OpeaComponent,
|
||||
OpeaComponentRegistry,
|
||||
ServiceType,
|
||||
TextDoc,
|
||||
statistics_dict,
|
||||
)
|
||||
|
||||
logger = CustomLogger("opea_tgi_llava")
|
||||
logflag = os.getenv("LOGFLAG", False)
|
||||
|
||||
|
||||
class ChatTemplate:
|
||||
|
||||
@staticmethod
|
||||
def generate_multimodal_rag_on_videos_prompt(question: str, context: str, has_image: bool = False):
|
||||
|
||||
if has_image:
|
||||
template = """The transcript associated with the image is '{context}'. {question}"""
|
||||
else:
|
||||
template = (
|
||||
"""Refer to the following results obtained from the local knowledge base: '{context}'. {question}"""
|
||||
)
|
||||
|
||||
return template.format(context=context, question=question)
|
||||
|
||||
|
||||
@OpeaComponentRegistry.register("OPEA_TGI_LLAVA_LVM")
|
||||
class OpeaTgiLlavaLvm(OpeaComponent):
|
||||
"""A specialized TGI LVM component derived from OpeaComponent for LLaVA LVM services."""
|
||||
|
||||
def __init__(self, name: str, description: str, config: dict = None):
|
||||
super().__init__(name, ServiceType.LVM.name.lower(), description, config)
|
||||
self.base_url = os.getenv("LVM_ENDPOINT", "http://localhost:8399")
|
||||
self.lvm_client = AsyncInferenceClient(self.base_url)
|
||||
health_status = self.check_health()
|
||||
if not health_status:
|
||||
logger.error("OpeaTgiLlavaLvm health check failed.")
|
||||
|
||||
async def invoke(
|
||||
self,
|
||||
request: Union[LVMDoc, LVMSearchedMultimodalDoc],
|
||||
) -> Union[TextDoc, MetadataTextDoc]:
|
||||
"""Involve the LVM service to generate answer for the provided input."""
|
||||
if logflag:
|
||||
logger.info(request)
|
||||
if isinstance(request, LVMSearchedMultimodalDoc):
|
||||
if logflag:
|
||||
logger.info("[LVMSearchedMultimodalDoc ] input from retriever microservice")
|
||||
retrieved_metadatas = request.metadata
|
||||
if retrieved_metadatas is None or len(retrieved_metadatas) == 0:
|
||||
# there is no video segments retrieved.
|
||||
# Raise HTTPException status_code 204
|
||||
# due to llava-tgi-gaudi should receive image as input; Otherwise, the generated text is bad.
|
||||
raise HTTPException(status_code=500, detail="There is no video segments retrieved given the query!")
|
||||
|
||||
img_b64_str = retrieved_metadatas[0]["b64_img_str"]
|
||||
has_image = img_b64_str != ""
|
||||
initial_query = request.initial_query
|
||||
context = retrieved_metadatas[0]["transcript_for_inference"]
|
||||
prompt = initial_query
|
||||
if request.chat_template is None:
|
||||
prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, context, has_image)
|
||||
else:
|
||||
prompt_template = PromptTemplate.from_template(request.chat_template)
|
||||
input_variables = prompt_template.input_variables
|
||||
if sorted(input_variables) == ["context", "question"]:
|
||||
prompt = prompt_template.format(question=initial_query, context=context)
|
||||
else:
|
||||
logger.info(
|
||||
f"[ LVMSearchedMultimodalDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']"
|
||||
)
|
||||
max_new_tokens = request.max_new_tokens
|
||||
stream = request.stream
|
||||
repetition_penalty = request.repetition_penalty
|
||||
temperature = request.temperature
|
||||
top_k = request.top_k
|
||||
top_p = request.top_p
|
||||
if logflag:
|
||||
logger.info(
|
||||
f"prompt generated for [LVMSearchedMultimodalDoc ] input from retriever microservice: {prompt}"
|
||||
)
|
||||
|
||||
else:
|
||||
img_b64_str = request.image
|
||||
prompt = request.prompt
|
||||
max_new_tokens = request.max_new_tokens
|
||||
stream = request.stream
|
||||
repetition_penalty = request.repetition_penalty
|
||||
temperature = request.temperature
|
||||
top_k = request.top_k
|
||||
top_p = request.top_p
|
||||
|
||||
if not img_b64_str:
|
||||
# Work around an issue where LLaVA-NeXT is not providing good responses when prompted without an image.
|
||||
# Provide an image and then instruct the model to ignore the image. The base64 string below is the encoded png:
|
||||
# https://raw.githubusercontent.com/opea-project/GenAIExamples/refs/tags/v1.0/AudioQnA/ui/svelte/src/lib/assets/icons/png/audio1.png
|
||||
img_b64_str = "iVBORw0KGgoAAAANSUhEUgAAADUAAAAlCAYAAADiMKHrAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAKPSURBVHgB7Zl/btowFMefnUTqf+MAHYMTjN4gvcGOABpM+8E0doLSE4xpsE3rKuAG3KC5Ad0J6MYOkP07YnvvhR9y0lVzupTIVT5SwDjB9fd97WfsMkCef1rUXM8dY9HHK4hWUevzi/oVWAqnF8fzLmAtiPA3Aq0lFsVA1fRKxlgNLIbDPaQUZQuu6YO98aIipHOiFGtIqaYfn1UnUCDds6WPyeANlTFbv9WztbFTK+HNUVAPiz7nbPzq7HsPCoKWIBREGfsJXZit5xT07X0jp6iRdIbEHOnjyyD97OvzH00lVS2K5OS2ax11cBXxJgYxlEIE6XZclzdTX6n8XjkkcEIfbj2nMO0/SNd1vy4vsCNjYPyEovfyy88GZIQCSKOCMf6ORgStoboLJuSWKDYCfK2q4jjrMZ+GOh7Pib/gek5DHxVUJtcgA7mJ4kwZRbN7viQXFzQn0Nl52gXG4Fo7DKAYp0yI3VHQ16oaWV0wYa+iGE8nG+wAdx5DzpS/KGyhFGULpShbKEXZQinqLlBK/IKc2asoh4sZvoXJWhlAzuxV1KBVD3HrfYTFAK8ZHgu0hu36DHLG+Izinw250WUkXHJht02QUnxLP7fZxR7f1I6S7Ir2GgmYvIQM5OYUuYBdainATq2ZjTqPBlnbGXYeBrg9Od18DKmc1U0jpw4OIIwEJFxQSl2b4MN2lf74fw8nFNbHt/5N9xWKTZvJ2S6YZk6RC3j2cKpVhSIShZ0mea6caCOCAjyNHd5gPPxGncMBTvI6hunYdaJ6kf8VoSCP2odxX6RkR6NOtanfj13EswKVqEQrPzzFL1lK+YvCFraiEqs8TrwQLGYraqpX4kr/Hixml+63Z+CoM9DTo438AUmP+KyMWT+tAAAAAElFTkSuQmCC"
|
||||
prompt = f"Please disregard the image and answer the question. {prompt}"
|
||||
|
||||
image = f"data:image/png;base64,{img_b64_str}"
|
||||
image_prompt = f"\n{prompt}\nASSISTANT:"
|
||||
|
||||
if stream:
|
||||
t_start = time.time()
|
||||
|
||||
async def stream_generator(time_start):
|
||||
first_token_latency = None
|
||||
chat_response = ""
|
||||
text_generation = await self.lvm_client.text_generation(
|
||||
prompt=image_prompt,
|
||||
stream=stream,
|
||||
max_new_tokens=max_new_tokens,
|
||||
repetition_penalty=repetition_penalty,
|
||||
temperature=temperature,
|
||||
top_k=top_k,
|
||||
top_p=top_p,
|
||||
)
|
||||
async for text in text_generation:
|
||||
if first_token_latency is None:
|
||||
first_token_latency = time.time() - time_start
|
||||
chat_response += text
|
||||
chunk_repr = repr(text.encode("utf-8"))
|
||||
if logflag:
|
||||
logger.info(f"[llm - chat_stream] chunk:{chunk_repr}")
|
||||
yield f"data: {chunk_repr}\n\n"
|
||||
if logflag:
|
||||
logger.info(f"[llm - chat_stream] stream response: {chat_response}")
|
||||
statistics_dict["opea_service@lvm"].append_latency(time.time() - time_start, first_token_latency)
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
return StreamingResponse(stream_generator(t_start), media_type="text/event-stream")
|
||||
else:
|
||||
generated_str = await self.lvm_client.text_generation(
|
||||
image_prompt,
|
||||
max_new_tokens=max_new_tokens,
|
||||
repetition_penalty=repetition_penalty,
|
||||
temperature=temperature,
|
||||
top_k=top_k,
|
||||
top_p=top_p,
|
||||
)
|
||||
if logflag:
|
||||
logger.info(generated_str)
|
||||
if isinstance(request, LVMSearchedMultimodalDoc):
|
||||
retrieved_metadata = request.metadata[0]
|
||||
return_metadata = {} # this metadata will be used to construct proof for generated text
|
||||
return_metadata["video_id"] = retrieved_metadata["video_id"]
|
||||
return_metadata["source_video"] = retrieved_metadata["source_video"]
|
||||
return_metadata["time_of_frame_ms"] = retrieved_metadata["time_of_frame_ms"]
|
||||
return_metadata["transcript_for_inference"] = retrieved_metadata["transcript_for_inference"]
|
||||
return MetadataTextDoc(text=generated_str, metadata=return_metadata)
|
||||
else:
|
||||
return TextDoc(text=generated_str)
|
||||
|
||||
def check_health(self) -> bool:
|
||||
"""Checks the health of the embedding service.
|
||||
|
||||
Returns:
|
||||
bool: True if the service is reachable and healthy, False otherwise.
|
||||
"""
|
||||
try:
|
||||
response = requests.get(f"{self.base_url}/health")
|
||||
if response.status_code == 200:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
except Exception as e:
|
||||
# Handle connection errors, timeouts, etc.
|
||||
logger.error(f"Health check failed: {e}")
|
||||
return False
|
||||
100
comps/lvms/src/integrations/opea_video_llama.py
Normal file
100
comps/lvms/src/integrations/opea_video_llama.py
Normal file
@@ -0,0 +1,100 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
import time
|
||||
from typing import Union
|
||||
|
||||
import requests
|
||||
from fastapi import HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
from comps import CustomLogger, LVMVideoDoc, OpeaComponent, OpeaComponentRegistry, ServiceType, statistics_dict
|
||||
|
||||
logger = CustomLogger("opea_video_llama")
|
||||
logflag = os.getenv("LOGFLAG", False)
|
||||
|
||||
|
||||
@OpeaComponentRegistry.register("OPEA_VIDEO_LLAMA_LVM")
|
||||
class OpeaVideoLlamaLvm(OpeaComponent):
|
||||
"""A specialized LVM component derived from OpeaComponent for Video-LLaMA services."""
|
||||
|
||||
def __init__(self, name: str, description: str, config: dict = None):
|
||||
super().__init__(name, ServiceType.LVM.name.lower(), description, config)
|
||||
self.base_url = os.getenv("LVM_ENDPOINT", "http://localhost:9099")
|
||||
health_status = self.check_health()
|
||||
if not health_status:
|
||||
logger.error("OpeaVideoLlamaLvm health check failed.")
|
||||
|
||||
async def invoke(
|
||||
self,
|
||||
request: Union[LVMVideoDoc],
|
||||
) -> Union[StreamingResponse]:
|
||||
"""Involve the LVM service to generate answer for the provided request.
|
||||
|
||||
Parameters:
|
||||
request (LVMVideoDoc): The request containing the video URL, start time, duration, prompt, and maximum new tokens.
|
||||
|
||||
Returns:
|
||||
StreamingResponse: A streaming response containing the generated text in text/event-stream format, or a JSON error response if the upstream API responds with an error.
|
||||
"""
|
||||
if logflag:
|
||||
logger.info("[lvm] Received input")
|
||||
logger.info(request)
|
||||
|
||||
video_url = request.video_url
|
||||
chunk_start = request.chunk_start
|
||||
chunk_duration = request.chunk_duration
|
||||
prompt = request.prompt
|
||||
max_new_tokens = request.max_new_tokens
|
||||
|
||||
params = {
|
||||
"video_url": video_url,
|
||||
"start": chunk_start,
|
||||
"duration": chunk_duration,
|
||||
"prompt": prompt,
|
||||
"max_new_tokens": max_new_tokens,
|
||||
}
|
||||
logger.info(f"[lvm] Params: {params}")
|
||||
|
||||
t_start = time.time()
|
||||
|
||||
response = requests.post(url=f"{self.base_url}/generate", params=params, proxies={"http": None}, stream=True)
|
||||
logger.info(f"[lvm] Response status code: {response.status_code}")
|
||||
if response.status_code == 200:
|
||||
|
||||
def streamer(time_start):
|
||||
first_token_latency = None
|
||||
yield f"{{'video_url': '{video_url}', 'chunk_start': {chunk_start}, 'chunk_duration': {chunk_duration}}}\n".encode(
|
||||
"utf-8"
|
||||
)
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
if first_token_latency is None:
|
||||
first_token_latency = time.time() - time_start
|
||||
yield chunk
|
||||
logger.info(f"[lvm - chat_stream] Streaming chunk of size {len(chunk)}")
|
||||
logger.info("[lvm - chat_stream] stream response finished")
|
||||
statistics_dict["opea_service@lvm"].append_latency(time.time() - time_start, first_token_latency)
|
||||
|
||||
return StreamingResponse(streamer(t_start), media_type="text/event-stream")
|
||||
else:
|
||||
logger.error(f"[lvm] Error: {response.text}")
|
||||
raise HTTPException(status_code=500, detail="The upstream API responded with an error.")
|
||||
|
||||
def check_health(self) -> bool:
|
||||
"""Checks the health of the embedding service.
|
||||
|
||||
Returns:
|
||||
bool: True if the service is reachable and healthy, False otherwise.
|
||||
"""
|
||||
try:
|
||||
response = requests.get(f"{self.base_url}/health")
|
||||
if response.status_code == 200:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
except Exception as e:
|
||||
# Handle connection errors, timeouts, etc.
|
||||
logger.error(f"Health check failed: {e}")
|
||||
return False
|
||||
72
comps/lvms/src/opea_lvm_microservice.py
Normal file
72
comps/lvms/src/opea_lvm_microservice.py
Normal file
@@ -0,0 +1,72 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
import time
|
||||
from typing import Union
|
||||
|
||||
from integrations.opea_llama_vision import OpeaLlamaVisionLvm
|
||||
from integrations.opea_llava import OpeaLlavaLvm
|
||||
from integrations.opea_predictionguard import OpeaPredictionguardLvm
|
||||
from integrations.opea_tgi_llava import OpeaTgiLlavaLvm
|
||||
from integrations.opea_video_llama import OpeaVideoLlamaLvm
|
||||
|
||||
from comps import (
|
||||
CustomLogger,
|
||||
LVMDoc,
|
||||
LVMSearchedMultimodalDoc,
|
||||
LVMVideoDoc,
|
||||
MetadataTextDoc,
|
||||
OpeaComponentLoader,
|
||||
ServiceType,
|
||||
TextDoc,
|
||||
opea_microservices,
|
||||
register_microservice,
|
||||
register_statistics,
|
||||
statistics_dict,
|
||||
)
|
||||
|
||||
logger = CustomLogger("opea_lvm_microservice")
|
||||
logflag = os.getenv("LOGFLAG", False)
|
||||
|
||||
lvm_component_name = os.getenv("LVM_COMPONENT_NAME", "OPEA_LLAVA_LVM")
|
||||
# Initialize OpeaComponentController
|
||||
loader = OpeaComponentLoader(lvm_component_name, description=f"OPEA LVM Component: {lvm_component_name}")
|
||||
|
||||
|
||||
@register_microservice(
|
||||
name="opea_service@lvm",
|
||||
service_type=ServiceType.LVM,
|
||||
endpoint="/v1/lvm",
|
||||
host="0.0.0.0",
|
||||
port=9399,
|
||||
)
|
||||
@register_statistics(names=["opea_service@lvm"])
|
||||
async def lvm(
|
||||
request: Union[LVMDoc, LVMSearchedMultimodalDoc, LVMVideoDoc]
|
||||
) -> Union[TextDoc, MetadataTextDoc]: # can also return a StreamingResponse but omit it in annotation for FastAPI
|
||||
start = time.time()
|
||||
|
||||
try:
|
||||
# Use the controller to invoke the active component
|
||||
lvm_response = await loader.invoke(request)
|
||||
if logflag:
|
||||
logger.info(lvm_response)
|
||||
|
||||
if loader.component.name in ["OpeaVideoLlamaLvm"] or (
|
||||
loader.component.name in ["OpeaTgiLlavaLvm"] and request.streaming
|
||||
):
|
||||
# statistics for StreamingResponse are handled inside the integrations
|
||||
# here directly return the response
|
||||
return lvm_response
|
||||
statistics_dict["opea_service@lvm"].append_latency(time.time() - start, None)
|
||||
return lvm_response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during lvm invocation: {e}")
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger.info("OPEA LVM Microservice is starting....")
|
||||
opea_microservices["opea_service@lvm"].start()
|
||||
@@ -1,25 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Set environment variables
|
||||
ENV LANG=en_US.UTF-8
|
||||
|
||||
ARG ARCH="cpu"
|
||||
|
||||
COPY comps /home/comps
|
||||
|
||||
RUN pip install --no-cache-dir --upgrade pip setuptools && \
|
||||
if [ ${ARCH} = "cpu" ]; then \
|
||||
pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/comps/lvms/tgi-llava/requirements.txt; \
|
||||
else \
|
||||
pip install --no-cache-dir -r /home/comps/lvms/tgi-llava/requirements.txt; \
|
||||
fi;
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home
|
||||
|
||||
WORKDIR /home/comps/lvms/tgi-llava
|
||||
|
||||
ENTRYPOINT ["python", "lvm_tgi.py"]
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@@ -1,155 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
import time
|
||||
from typing import Union
|
||||
|
||||
from fastapi import HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
from huggingface_hub import AsyncInferenceClient
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
from template import ChatTemplate
|
||||
|
||||
from comps import (
|
||||
CustomLogger,
|
||||
LVMDoc,
|
||||
LVMSearchedMultimodalDoc,
|
||||
MetadataTextDoc,
|
||||
ServiceType,
|
||||
TextDoc,
|
||||
opea_microservices,
|
||||
register_microservice,
|
||||
register_statistics,
|
||||
statistics_dict,
|
||||
)
|
||||
|
||||
logger = CustomLogger("lvm_tgi")
|
||||
logflag = os.getenv("LOGFLAG", False)
|
||||
|
||||
|
||||
@register_microservice(
|
||||
name="opea_service@lvm_tgi",
|
||||
service_type=ServiceType.LVM,
|
||||
endpoint="/v1/lvm",
|
||||
host="0.0.0.0",
|
||||
port=9399,
|
||||
input_datatype=LVMDoc,
|
||||
output_datatype=TextDoc,
|
||||
)
|
||||
@register_statistics(names=["opea_service@lvm_tgi"])
|
||||
async def lvm(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> Union[TextDoc, MetadataTextDoc]:
|
||||
if logflag:
|
||||
logger.info(request)
|
||||
start = time.time()
|
||||
stream_gen_time = []
|
||||
|
||||
if isinstance(request, LVMSearchedMultimodalDoc):
|
||||
if logflag:
|
||||
logger.info("[LVMSearchedMultimodalDoc ] input from retriever microservice")
|
||||
retrieved_metadatas = request.metadata
|
||||
if not retrieved_metadatas or len(retrieved_metadatas) == 0:
|
||||
# there is no video segments retrieved.
|
||||
# Raise HTTPException status_code 204
|
||||
# due to llava-tgi-gaudi should receive image as input; Otherwise, the generated text is bad.
|
||||
raise HTTPException(status_code=500, detail="There is no video segments retrieved given the query!")
|
||||
img_b64_str = retrieved_metadatas[0]["b64_img_str"]
|
||||
has_image = img_b64_str != ""
|
||||
initial_query = request.initial_query
|
||||
context = retrieved_metadatas[0]["transcript_for_inference"]
|
||||
prompt = initial_query
|
||||
if request.chat_template is None:
|
||||
prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, context, has_image)
|
||||
else:
|
||||
prompt_template = PromptTemplate.from_template(request.chat_template)
|
||||
input_variables = prompt_template.input_variables
|
||||
if sorted(input_variables) == ["context", "question"]:
|
||||
prompt = prompt_template.format(question=initial_query, context=context)
|
||||
else:
|
||||
logger.info(
|
||||
f"[ LVMSearchedMultimodalDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']"
|
||||
)
|
||||
max_new_tokens = request.max_new_tokens
|
||||
stream = request.stream
|
||||
repetition_penalty = request.repetition_penalty
|
||||
temperature = request.temperature
|
||||
top_k = request.top_k
|
||||
top_p = request.top_p
|
||||
if logflag:
|
||||
logger.info(f"prompt generated for [LVMSearchedMultimodalDoc ] input from retriever microservice: {prompt}")
|
||||
|
||||
else:
|
||||
img_b64_str = request.image
|
||||
prompt = request.prompt
|
||||
max_new_tokens = request.max_new_tokens
|
||||
stream = request.stream
|
||||
repetition_penalty = request.repetition_penalty
|
||||
temperature = request.temperature
|
||||
top_k = request.top_k
|
||||
top_p = request.top_p
|
||||
|
||||
if not img_b64_str:
|
||||
# Work around an issue where LLaVA-NeXT is not providing good responses when prompted without an image.
|
||||
# Provide an image and then instruct the model to ignore the image. The base64 string below is the encoded png:
|
||||
# https://raw.githubusercontent.com/opea-project/GenAIExamples/refs/tags/v1.0/AudioQnA/ui/svelte/src/lib/assets/icons/png/audio1.png
|
||||
img_b64_str = "iVBORw0KGgoAAAANSUhEUgAAADUAAAAlCAYAAADiMKHrAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAKPSURBVHgB7Zl/btowFMefnUTqf+MAHYMTjN4gvcGOABpM+8E0doLSE4xpsE3rKuAG3KC5Ad0J6MYOkP07YnvvhR9y0lVzupTIVT5SwDjB9fd97WfsMkCef1rUXM8dY9HHK4hWUevzi/oVWAqnF8fzLmAtiPA3Aq0lFsVA1fRKxlgNLIbDPaQUZQuu6YO98aIipHOiFGtIqaYfn1UnUCDds6WPyeANlTFbv9WztbFTK+HNUVAPiz7nbPzq7HsPCoKWIBREGfsJXZit5xT07X0jp6iRdIbEHOnjyyD97OvzH00lVS2K5OS2ax11cBXxJgYxlEIE6XZclzdTX6n8XjkkcEIfbj2nMO0/SNd1vy4vsCNjYPyEovfyy88GZIQCSKOCMf6ORgStoboLJuSWKDYCfK2q4jjrMZ+GOh7Pib/gek5DHxVUJtcgA7mJ4kwZRbN7viQXFzQn0Nl52gXG4Fo7DKAYp0yI3VHQ16oaWV0wYa+iGE8nG+wAdx5DzpS/KGyhFGULpShbKEXZQinqLlBK/IKc2asoh4sZvoXJWhlAzuxV1KBVD3HrfYTFAK8ZHgu0hu36DHLG+Izinw250WUkXHJht02QUnxLP7fZxR7f1I6S7Ir2GgmYvIQM5OYUuYBdainATq2ZjTqPBlnbGXYeBrg9Od18DKmc1U0jpw4OIIwEJFxQSl2b4MN2lf74fw8nFNbHt/5N9xWKTZvJ2S6YZk6RC3j2cKpVhSIShZ0mea6caCOCAjyNHd5gPPxGncMBTvI6hunYdaJ6kf8VoSCP2odxX6RkR6NOtanfj13EswKVqEQrPzzFL1lK+YvCFraiEqs8TrwQLGYraqpX4kr/Hixml+63Z+CoM9DTo438AUmP+KyMWT+tAAAAAElFTkSuQmCC"
|
||||
prompt = f"Please disregard the image and answer the question. {prompt}"
|
||||
|
||||
image = f"data:image/png;base64,{img_b64_str}"
|
||||
image_prompt = f"\n{prompt}\nASSISTANT:"
|
||||
|
||||
if stream:
|
||||
|
||||
async def stream_generator():
|
||||
chat_response = ""
|
||||
text_generation = await lvm_client.text_generation(
|
||||
prompt=image_prompt,
|
||||
stream=stream,
|
||||
max_new_tokens=max_new_tokens,
|
||||
repetition_penalty=repetition_penalty,
|
||||
temperature=temperature,
|
||||
top_k=top_k,
|
||||
top_p=top_p,
|
||||
)
|
||||
async for text in text_generation:
|
||||
stream_gen_time.append(time.time() - start)
|
||||
chat_response += text
|
||||
chunk_repr = repr(text.encode("utf-8"))
|
||||
if logflag:
|
||||
logger.info(f"[llm - chat_stream] chunk:{chunk_repr}")
|
||||
yield f"data: {chunk_repr}\n\n"
|
||||
if logflag:
|
||||
logger.info(f"[llm - chat_stream] stream response: {chat_response}")
|
||||
statistics_dict["opea_service@lvm_tgi"].append_latency(stream_gen_time[-1], stream_gen_time[0])
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
return StreamingResponse(stream_generator(), media_type="text/event-stream")
|
||||
else:
|
||||
generated_str = await lvm_client.text_generation(
|
||||
image_prompt,
|
||||
max_new_tokens=max_new_tokens,
|
||||
repetition_penalty=repetition_penalty,
|
||||
temperature=temperature,
|
||||
top_k=top_k,
|
||||
top_p=top_p,
|
||||
)
|
||||
statistics_dict["opea_service@lvm_tgi"].append_latency(time.time() - start, None)
|
||||
if logflag:
|
||||
logger.info(generated_str)
|
||||
if isinstance(request, LVMSearchedMultimodalDoc):
|
||||
retrieved_metadata = request.metadata[0]
|
||||
return_metadata = {} # this metadata will be used to construct proof for generated text
|
||||
return_metadata["video_id"] = retrieved_metadata["video_id"]
|
||||
return_metadata["source_video"] = retrieved_metadata["source_video"]
|
||||
return_metadata["time_of_frame_ms"] = retrieved_metadata["time_of_frame_ms"]
|
||||
return_metadata["transcript_for_inference"] = retrieved_metadata["transcript_for_inference"]
|
||||
return MetadataTextDoc(text=generated_str, metadata=return_metadata)
|
||||
else:
|
||||
return TextDoc(text=generated_str)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399")
|
||||
lvm_client = AsyncInferenceClient(lvm_endpoint)
|
||||
logger.info("[LVM] LVM initialized.")
|
||||
opea_microservices["opea_service@lvm_tgi"].start()
|
||||
@@ -1,17 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
class ChatTemplate:
|
||||
|
||||
@staticmethod
|
||||
def generate_multimodal_rag_on_videos_prompt(question: str, context: str, has_image: bool = False):
|
||||
|
||||
if has_image:
|
||||
template = """The transcript associated with the image is '{context}'. {question}"""
|
||||
else:
|
||||
template = (
|
||||
"""Refer to the following results obtained from the local knowledge base: '{context}'. {question}"""
|
||||
)
|
||||
|
||||
return template.format(context=context, question=question)
|
||||
@@ -1,18 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Set environment variables
|
||||
ENV LANG=en_US.UTF-8
|
||||
|
||||
COPY comps /home/comps
|
||||
|
||||
RUN pip install --no-cache-dir --upgrade pip setuptools && \
|
||||
pip install --no-cache-dir -r /home/comps/lvms/video-llama/requirements.txt
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home
|
||||
|
||||
WORKDIR /home/comps/lvms/video-llama
|
||||
|
||||
ENTRYPOINT ["python", "lvm.py"]
|
||||
@@ -1,50 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
|
||||
import requests
|
||||
|
||||
ip_address = os.getenv("ip_address")
|
||||
####### video-llama request ########
|
||||
print("video-llama request")
|
||||
api_url = f"http://${ip_address}:9009/generate"
|
||||
content = {
|
||||
"video_url": "https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4",
|
||||
"start": 0.0,
|
||||
"duration": 9,
|
||||
"prompt": "What is the person doing?",
|
||||
"max_new_tokens": 150,
|
||||
}
|
||||
|
||||
start = datetime.datetime.now()
|
||||
with requests.post(api_url, params=content, stream=True) as response:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
print(chunk.decode("utf-8"), end="", flush=True) # Flush to ensure immediate output
|
||||
|
||||
end = datetime.datetime.now()
|
||||
print(f"\nTotal time: {end - start}")
|
||||
|
||||
####### lvm request ########
|
||||
print("lvm request")
|
||||
api_url = f"http://${ip_address}:9000/v1/lvm"
|
||||
headers = {"Content-Type": "application/json"}
|
||||
data = {
|
||||
"video_url": "https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4",
|
||||
"chunk_start": 0,
|
||||
"chunk_duration": 9,
|
||||
"prompt": "what is the person doing",
|
||||
"max_new_tokens": 150,
|
||||
}
|
||||
|
||||
start = datetime.datetime.now()
|
||||
with requests.post(api_url, headers=headers, data=json.dumps(data), stream=True) as response:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
print(chunk.decode("utf-8"), end="", flush=True) # Flush to ensure immediate output
|
||||
|
||||
end = datetime.datetime.now()
|
||||
print(f"\nTotal time: {end - start}")
|
||||
@@ -1,40 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
version: "3"
|
||||
services:
|
||||
lvm-video-llama:
|
||||
image: opea/video-llama-lvm-server:latest
|
||||
container_name: video-llama-lvm-server
|
||||
ports:
|
||||
- "9009:9009"
|
||||
ipc: host
|
||||
environment:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
no_proxy: ${no_proxy}
|
||||
llm_download: "True"
|
||||
volumes:
|
||||
- "/home/$USER/.cache:/home/user/.cache" # RECOMMENDED: use local cache to avoid download
|
||||
- video-llama-model:/home/user/model
|
||||
restart: unless-stopped
|
||||
|
||||
lvm:
|
||||
image: opea/lvm-video-llama:latest
|
||||
container_name: lvm-video-llama
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
no_proxy: ${no_proxy}
|
||||
LVM_ENDPOINT: ${LVM_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- lvm-video-llama
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
volumes:
|
||||
video-llama-model:
|
||||
@@ -1,80 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
# import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
import requests
|
||||
from fastapi import HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
from comps import LVMVideoDoc, ServiceType, opea_microservices, register_microservice, register_statistics
|
||||
|
||||
# import time
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
@register_microservice(
|
||||
name="opea_service@lvm",
|
||||
service_type=ServiceType.LVM,
|
||||
endpoint="/v1/lvm",
|
||||
host="0.0.0.0",
|
||||
port=9000,
|
||||
input_datatype=LVMVideoDoc,
|
||||
output_datatype=StreamingResponse,
|
||||
)
|
||||
@register_statistics(names=["opea_service@lvm"])
|
||||
async def lvm(input: LVMVideoDoc):
|
||||
"""This function handles the LVM microservice, which generates text based on a video URL, start time, duration, prompt, and maximum new tokens.
|
||||
|
||||
Parameters:
|
||||
input (LVMVideoDoc): The input containing the video URL, start time, duration, prompt, and maximum new tokens.
|
||||
|
||||
Returns:
|
||||
StreamingResponse: A streaming response containing the generated text in text/event-stream format, or a JSON error response if the upstream API responds with an error.
|
||||
"""
|
||||
logging.info("[lvm] Received input")
|
||||
|
||||
video_url = input.video_url
|
||||
chunk_start = input.chunk_start
|
||||
chunk_duration = input.chunk_duration
|
||||
prompt = input.prompt
|
||||
max_new_tokens = input.max_new_tokens
|
||||
|
||||
params = {
|
||||
"video_url": video_url,
|
||||
"start": chunk_start,
|
||||
"duration": chunk_duration,
|
||||
"prompt": prompt,
|
||||
"max_new_tokens": max_new_tokens,
|
||||
}
|
||||
logging.info(f"[lvm] Params: {params}")
|
||||
|
||||
response = requests.post(url=f"{lvm_endpoint}/generate", params=params, proxies={"http": None}, stream=True)
|
||||
logging.info(f"[lvm] Response status code: {response.status_code}")
|
||||
if response.status_code == 200:
|
||||
|
||||
def streamer():
|
||||
yield f"{{'video_url': '{video_url}', 'chunk_start': {chunk_start}, 'chunk_duration': {chunk_duration}}}\n".encode(
|
||||
"utf-8"
|
||||
)
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
yield chunk
|
||||
logging.info(f"[llm - chat_stream] Streaming: {chunk}")
|
||||
logging.info("[llm - chat_stream] stream response finished")
|
||||
|
||||
return StreamingResponse(streamer(), media_type="text/event-stream")
|
||||
else:
|
||||
logging.error(f"[lvm] Error: {response.text}")
|
||||
raise HTTPException(status_code=500, detail="The upstream API responded with an error.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
lvm_endpoint = os.getenv("LVM_ENDPOINT")
|
||||
|
||||
opea_microservices["opea_service@lvm"].start()
|
||||
@@ -1,11 +0,0 @@
|
||||
datasets
|
||||
docarray
|
||||
fastapi
|
||||
opentelemetry-api
|
||||
opentelemetry-exporter-otlp
|
||||
opentelemetry-sdk
|
||||
Pillow
|
||||
prometheus-fastapi-instrumentator
|
||||
pydub
|
||||
shortuuid
|
||||
uvicorn
|
||||
Reference in New Issue
Block a user