Clean deprecated VisualQnA code (#634)

* Clean deprecated VisualQnA code

Signed-off-by: lvliang-intel <liang1.lv@intel.com>

* update tgi repo

Signed-off-by: lvliang-intel <liang1.lv@intel.com>

---------

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
This commit is contained in:
lvliang-intel
2024-08-20 22:21:18 +08:00
committed by GitHub
parent db2d2bd1a1
commit 87617e761c
8 changed files with 1 additions and 456 deletions

View File

@@ -24,9 +24,8 @@ docker build --no-cache -t opea/lvm-tgi:latest --build-arg https_proxy=$https_pr
Since TGI Gaudi has not supported llava-next in main branch, we'll need to build it from a PR branch for now.
```bash
git clone https://github.com/yuanwu2017/tgi-gaudi.git
git clone https://github.com/huggingface/tgi-gaudi.git
cd tgi-gaudi/
git checkout v2.0.4
docker build -t opea/llava-tgi:latest .
cd ../
```

View File

@@ -1,44 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# SCRIPT USAGE NOTICE: By downloading and using any script file included
# with the associated software package (such as files with .bat, .cmd, or
# .JS extensions, Docker files, or any other type of file that, when executed,
# automatically downloads and/or installs files onto your system) (the “Script File”),
# it is your obligation to review the Script File to understand what files (e.g.,
# other software, AI models, AI Datasets) the Script File will download to your system
# (“Downloaded Files”). Furthermore, by downloading and using the Downloaded Files,
# even if they are installed through a silent install, you agree to any and all
# terms and conditions associated with such files, including but not limited to,
# license terms, notices, or disclaimers.
# HABANA environment
FROM vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1 AS hpu
RUN rm -rf /etc/ssh/ssh_host*
# Set environment variables
ENV LANG=en_US.UTF-8
ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana
# Install required branch
RUN git clone https://github.com/lkk12014402/optimum-habana.git /optimum-habana -b enable_llava_generation
RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/
USER user
COPY requirements.txt /tmp/requirements.txt
# Install dependency
RUN pip install --no-cache-dir -U -r /tmp/requirements.txt
# work dir should contains the server
WORKDIR /llava_server
COPY llava_server /llava_server
ENTRYPOINT ["python", "llava_server.py"]

View File

@@ -1,113 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import argparse
import base64
import time
from io import BytesIO
import PIL.Image
import requests
import torch
import uvicorn
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse, Response, StreamingResponse
from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
from transformers import pipeline
model_name_or_path = None
model_dtype = None
use_hpu_graphs = True
generator = None
app = FastAPI()
@app.get("/health")
async def health() -> Response:
"""Health check."""
return Response(status_code=200)
@app.post("/generate")
async def generate(request: Request) -> Response: # FIXME batch_size=1 for now, only accept single image
request_dict = await request.json()
prompt = request_dict.pop("prompt")
# image_path = request_dict.pop("image_path")
img_b64_str = request_dict.pop("image") # image is an encoded base64 string
max_new_tokens = request_dict.pop("max_new_tokens", 100)
# image = PIL.Image.open(requests.get(image_path, stream=True, timeout=3000).raw)
image = PIL.Image.open(BytesIO(base64.b64decode(img_b64_str)))
generate_kwargs = {
"lazy_mode": True,
"hpu_graphs": use_hpu_graphs,
"max_new_tokens": max_new_tokens,
"ignore_eos": False,
}
start = time.time()
result = generator(image, prompt=prompt, batch_size=1, generate_kwargs=generate_kwargs)
end = time.time()
result = result[0]["generated_text"].split("ASSISTANT: ")[-1]
print(f"result = {result}, time = {(end-start) * 1000 }ms")
ret = {"text": result}
return JSONResponse(ret)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default="0.0.0.0")
parser.add_argument("--port", type=int, default=8000)
parser.add_argument("--model_name_or_path", type=str, default="llava-hf/llava-1.5-7b-hf")
parser.add_argument("--use_hpu_graphs", default=True, action="store_true")
parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
parser.add_argument("--bf16", default=True, action="store_true")
args = parser.parse_args()
adapt_transformers_to_gaudi()
if args.bf16:
model_dtype = torch.bfloat16
else:
model_dtype = torch.float32
model_name_or_path = args.model_name_or_path
generator = pipeline(
"image-to-text",
model=args.model_name_or_path,
torch_dtype=model_dtype,
device="hpu",
)
# warmup
generate_kwargs = {
"lazy_mode": True,
"hpu_graphs": args.use_hpu_graphs,
"max_new_tokens": 100,
"ignore_eos": False,
}
if args.use_hpu_graphs:
from habana_frameworks.torch.hpu import wrap_in_hpu_graph
generator.model = wrap_in_hpu_graph(generator.model)
image_paths = ["https://llava-vl.github.io/static/images/view.jpg"]
images = []
for image_path in image_paths:
images.append(PIL.Image.open(requests.get(image_path, stream=True, timeout=3000).raw))
for i in range(args.warmup):
generator(
images,
prompt="<image>\nUSER: What's the content of the image?\nASSISTANT:",
batch_size=1,
generate_kwargs=generate_kwargs,
)
uvicorn.run(
app,
host=args.host,
port=args.port,
log_level="debug",
)

View File

@@ -1,4 +0,0 @@
eager
fastapi
optimum[habana]
uvicorn

View File

@@ -1,290 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# Some code adapted from https://github.com/haotian-liu/LLaVA/blob/main/llava/serve/gradio_web_server.py
# and https://github.com/haotian-liu/LLaVA/blob/main/llava/conversation.py
import argparse
import base64
import os
from io import BytesIO
import gradio as gr
import requests
title_markdown = """
# 🌋 LLaVA demo on Gaudi2
"""
tos_markdown = """
### Terms of use
By using this service, users are required to agree to the following terms:
The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research.
Please click the "Flag" button if you get any inappropriate answer! We will collect those to keep improving our moderator.
For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
"""
title_markdown_cn = """
# 🌋 在 Gaudi2 上展示 LLaVA
"""
tos_markdown_cn = """
### 使用条款
使用本服务即表示用户同意以下条款:
本服务为研究预览版,仅供非商业用途。它仅提供有限的安全措施,可能会生成冒犯性内容。严禁将本服务用于任何非法、有害、暴力、种族歧视或色情的目的。本服务可能会收集用户对话数据以用于未来研究。
为获得最佳体验,请使用台式电脑访问本演示,因为移动设备可能会影响其质量。
"""
block_css = """
#buttons button {
min-width: min(120px,100%);
}
.upload-container .wrap,
.upload-container .wrap .or {
color: #1f2937;
}
.upload-container .wrap .icon-wrap {
color: #e5e7eb;
margin-top: 4rem;
width: 4rem;
height: 3rem;
}
"""
no_change_btn = gr.Button()
enable_btn = gr.Button(interactive=True)
disable_btn = gr.Button(interactive=False)
def process_image(image, return_pil=False, image_format="PNG", max_len=1344, min_len=672):
if max(image.size) > max_len:
max_hw, min_hw = max(image.size), min(image.size)
aspect_ratio = max_hw / min_hw
shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw))
longest_edge = int(shortest_edge * aspect_ratio)
W, H = image.size
if H > W:
H, W = longest_edge, shortest_edge
else:
H, W = shortest_edge, longest_edge
image = image.resize((W, H))
if return_pil:
return image
else:
buffered = BytesIO()
image.save(buffered, format=image_format)
img_b64_str = base64.b64encode(buffered.getvalue()).decode()
return img_b64_str
def handle_llava_request(text, image, max_new_tokens, chat_history):
print(f"text: {text}, image: {image}, max_new_tokens: {max_new_tokens}\n")
img_b64_str = process_image(image, return_pil=False, image_format="JPEG")
img_str = f'<img src="data:image/jpeg;base64,{img_b64_str}" alt="user upload image" />'
# skip embedding the image in latter messages
if len(chat_history) < 1:
msg = img_str + text.replace("<image>", "").strip()
else:
msg = text.replace("<image>", "").strip()
req_dict = {"prompt": f"<image>\nUSER: {text}\nASSISTANT:", "image": img_b64_str, "max_new_tokens": max_new_tokens}
result = requests.post(f"{args.worker_addr}/generate", json=req_dict, proxies={"http": None})
answer = result.json()["text"]
chat_history.append([msg, answer])
return [chat_history] + [enable_btn]
def clear_history(chat_history, image, text):
chat_history = []
image = None
text = None
return [chat_history, image, text] + [disable_btn]
def build_demo_cn(embed_mode, cur_dir=None, concurrency_count=10):
textbox = gr.Textbox(show_label=False, placeholder="输入文字并按回车键", container=False)
with gr.Blocks(title="LLaVA", theme=gr.themes.Default(), css=block_css) as demo:
# demo.add(custom_html)
state = gr.State()
if not embed_mode:
gr.Markdown(title_markdown_cn)
with gr.Row():
with gr.Column(scale=3):
imagebox = gr.Image(type="pil", label="图片", interactive=True, elem_id="my_imagebox")
if cur_dir is None:
cur_dir = os.path.dirname(os.path.abspath(__file__))
gr.Examples(
examples=[
[f"{cur_dir}/resources/extreme_ironing.jpg", "这张图片有什么不寻常之处?"],
[
f"{cur_dir}/resources/waterview.jpg",
"当我去那里访问时,我应该注意哪些事情?",
],
],
label="请选择一个示例",
inputs=[imagebox, textbox],
)
with gr.Accordion("参数", open=False) as parameter_row:
max_output_tokens = gr.Slider(
minimum=0,
maximum=1024,
value=512,
step=64,
interactive=True,
label="最大输出标记数",
)
with gr.Column(scale=8):
chatbot = gr.Chatbot(
elem_id="chatbot",
label="LLaVA聊天机器人",
height=650,
layout="panel",
)
with gr.Row():
with gr.Column(scale=8):
textbox.render()
with gr.Column(scale=1, min_width=50):
submit_btn = gr.Button(value="发送", variant="primary")
with gr.Row(elem_id="buttons") as button_row:
clear_btn = gr.Button(value="🗑️ 清除", interactive=False)
if not embed_mode:
gr.Markdown(tos_markdown_cn)
btn_list = [clear_btn]
clear_btn.click(
clear_history,
[chatbot, imagebox, textbox],
[chatbot, imagebox, textbox] + btn_list,
)
textbox.submit(
handle_llava_request,
[textbox, imagebox, max_output_tokens, chatbot],
[chatbot] + btn_list,
)
submit_btn.click(
handle_llava_request,
[textbox, imagebox, max_output_tokens, chatbot],
[chatbot] + btn_list,
)
return demo
def build_demo(embed_mode, cur_dir=None, concurrency_count=10):
textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
with gr.Blocks(title="LLaVA", theme=gr.themes.Default(), css=block_css) as demo:
state = gr.State()
if not embed_mode:
gr.Markdown(title_markdown)
with gr.Row():
with gr.Column(scale=3):
imagebox = gr.Image(type="pil")
if cur_dir is None:
cur_dir = os.path.dirname(os.path.abspath(__file__))
gr.Examples(
examples=[
[f"{cur_dir}/resources/extreme_ironing.jpg", "What is unusual about this image?"],
[
f"{cur_dir}/resources/waterview.jpg",
"What are the things I should be cautious about when I visit here?",
],
],
inputs=[imagebox, textbox],
)
with gr.Accordion("Parameters", open=False) as parameter_row:
max_output_tokens = gr.Slider(
minimum=0,
maximum=1024,
value=512,
step=64,
interactive=True,
label="Max output tokens",
)
with gr.Column(scale=8):
chatbot = gr.Chatbot(
elem_id="chatbot",
label="LLaVA Chatbot",
height=650,
layout="panel",
)
with gr.Row():
with gr.Column(scale=8):
textbox.render()
with gr.Column(scale=1, min_width=50):
submit_btn = gr.Button(value="Send", variant="primary")
with gr.Row(elem_id="buttons") as button_row:
clear_btn = gr.Button(value="🗑️ Clear", interactive=False)
if not embed_mode:
gr.Markdown(tos_markdown)
btn_list = [clear_btn]
clear_btn.click(
clear_history,
[chatbot, imagebox, textbox],
[chatbot, imagebox, textbox] + btn_list,
)
textbox.submit(
handle_llava_request,
[textbox, imagebox, max_output_tokens, chatbot],
[chatbot] + btn_list,
)
submit_btn.click(
handle_llava_request,
[textbox, imagebox, max_output_tokens, chatbot],
[chatbot] + btn_list,
)
return demo
if __name__ == "__main__":
parser = argparse.ArgumentParser()
# frontend host and port
parser.add_argument("--host", type=str, default="0.0.0.0")
parser.add_argument("--port", type=int)
parser.add_argument("--lang", type=str, default="En")
# backend worker address
parser.add_argument(
"--worker-addr", type=str, default="http://localhost:8085", help="The worker address of the LLaVA server."
)
parser.add_argument("--share", action="store_true")
parser.add_argument("--embed", action="store_true")
parser.add_argument("--concurrency-count", type=int, default=16)
args = parser.parse_args()
print(args)
selectedLang = args.lang
if selectedLang == "CN":
demo = build_demo_cn(args.embed, concurrency_count=args.concurrency_count)
else:
demo = build_demo(args.embed, concurrency_count=args.concurrency_count)
demo.queue(api_open=False).launch(server_name=args.host, server_port=args.port, share=args.share)

View File

@@ -1,3 +0,0 @@
gradio
gradio_client
requests

Binary file not shown.

Before

Width:  |  Height:  |  Size: 61 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 93 KiB