Fix VideoQnA (#1696)
This PR fixes the VideoQnA example. Fixes Issues #1476 #1478 #1477 Signed-off-by: zhanmyz <yazhan.ma@intel.com> Signed-off-by: Lacewell, Chaunte W <chaunte.w.lacewell@intel.com>
This commit is contained in:
committed by
GitHub
parent
58b47c15c6
commit
c4763434b8
@@ -17,7 +17,7 @@ Port 8001 - Open to 0.0.0.0/0
|
||||
|
||||
embedding
|
||||
=========
|
||||
Port 6000 - Open to 0.0.0.0/0
|
||||
Port 6990 - Open to 0.0.0.0/0
|
||||
|
||||
retriever
|
||||
=========
|
||||
@@ -33,13 +33,13 @@ Port 9009 - Open to 0.0.0.0/0
|
||||
|
||||
lvm
|
||||
===
|
||||
Port 9000 - Open to 0.0.0.0/0
|
||||
Port 9399 - Open to 0.0.0.0/0
|
||||
|
||||
chaqna-xeon-backend-server
|
||||
videoqna-xeon-backend-server
|
||||
==========================
|
||||
Port 8888 - Open to 0.0.0.0/0
|
||||
|
||||
chaqna-xeon-ui-server
|
||||
videoqna-xeon-ui-server
|
||||
=====================
|
||||
Port 5173 - Open to 0.0.0.0/0
|
||||
```
|
||||
@@ -106,17 +106,14 @@ docker build -t opea/videoqna-ui:latest --build-arg https_proxy=$https_proxy --b
|
||||
|
||||
Then run the command `docker images`, you will have the following 8 Docker Images:
|
||||
|
||||
1. `opea/embedding-multimodal-clip:latest`
|
||||
1. `opea/retriever:latest`
|
||||
1. `opea/reranking:latest`
|
||||
1. `opea/lvm-video-llama:latest`
|
||||
1. `opea/lvm:latest`
|
||||
1. `opea/dataprep:latest`
|
||||
2. `opea/embedding-multimodal-clip:latest`
|
||||
3. `opea/retriever:latest`
|
||||
4. `opea/reranking:latest`
|
||||
5. `opea/video-llama-lvm-server:latest`
|
||||
6. # `opea/lvm-video-llama:latest`
|
||||
7. `opea/reranking-tei:latest`
|
||||
8. `opea/lvm-video-llama:latest`
|
||||
9. `opea/lvm:latest`
|
||||
10. `opea/videoqna:latest`
|
||||
11. `opea/videoqna-ui:latest`
|
||||
1. `opea/videoqna:latest`
|
||||
1. `opea/videoqna-ui:latest`
|
||||
|
||||
## 🚀 Start Microservices
|
||||
|
||||
@@ -132,18 +129,18 @@ Since the `compose.yaml` will consume some environment variables, you need to se
|
||||
export host_ip="External_Public_IP"
|
||||
```
|
||||
|
||||
**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable**
|
||||
**Export the value of your Huggingface API token to the `HF_TOKEN` environment variable**
|
||||
|
||||
> Change the `Your_Huggingface_API_Token` below with your actual Huggingface API Token value
|
||||
|
||||
```
|
||||
export your_hf_api_token="Your_Huggingface_API_Token"
|
||||
export HF_TOKEN="Your_Huggingface_API_Token"
|
||||
```
|
||||
|
||||
**Append the value of the public IP address to the no_proxy list**
|
||||
|
||||
```
|
||||
export your_no_proxy="${your_no_proxy},${host_ip}"
|
||||
export no_proxy="${your_no_proxy},${host_ip}"
|
||||
```
|
||||
|
||||
Then you can run below commands or `source set_env.sh` to set all the variables
|
||||
@@ -152,26 +149,52 @@ Then you can run below commands or `source set_env.sh` to set all the variables
|
||||
export no_proxy=${your_no_proxy}
|
||||
export http_proxy=${your_http_proxy}
|
||||
export https_proxy=${your_http_proxy}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export LVM_SERVICE_HOST_IP=${host_ip}
|
||||
|
||||
export LVM_ENDPOINT="http://${host_ip}:9009"
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/videoqna"
|
||||
export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:8888/v1/health_check"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get"
|
||||
export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
|
||||
export VDMS_HOST=${host_ip}
|
||||
export VDMS_PORT=8001
|
||||
export INDEX_NAME="mega-videoqna"
|
||||
export LLM_DOWNLOAD="True"
|
||||
export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download
|
||||
export RERANK_COMPONENT_NAME="OPEA_VIDEO_RERANKING"
|
||||
export LVM_COMPONENT_NAME="OPEA_VIDEO_LLAMA_LVM"
|
||||
export EMBEDDING_COMPONENT_NAME="OPEA_CLIP_EMBEDDING"
|
||||
export USECLIP=1
|
||||
export LOGFLAG=True
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export LVM_SERVICE_HOST_IP=${host_ip}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export VDMS_HOST=${host_ip}
|
||||
|
||||
export BACKEND_PORT=8888
|
||||
export DATAPREP_PORT=6007
|
||||
export EMBEDDER_PORT=6990
|
||||
export MULTIMODAL_CLIP_EMBEDDER_PORT=6991
|
||||
export LVM_PORT=9399
|
||||
export RERANKING_PORT=8000
|
||||
export RETRIEVER_PORT=7000
|
||||
export UI_PORT=5173
|
||||
export VDMS_PORT=8001
|
||||
export VIDEO_LLAMA_PORT=9009
|
||||
|
||||
export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/health_check"
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/videoqna"
|
||||
export CLIP_EMBEDDING_ENDPOINT="http://${host_ip}:${MULTIMODAL_CLIP_EMBEDDER_PORT}"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get"
|
||||
export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get_videos"
|
||||
export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/ingest"
|
||||
export EMBEDDING_ENDPOINT="http://${host_ip}:${EMBEDDER_PORT}/v1/embeddings"
|
||||
export FRONTEND_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health"
|
||||
export LVM_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}"
|
||||
export LVM_VIDEO_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}/generate"
|
||||
export RERANKING_ENDPOINT="http://${host_ip}:${RERANKING_PORT}/v1/reranking"
|
||||
export RETRIEVER_ENDPOINT="http://${host_ip}:${RETRIEVER_PORT}/v1/retrieval"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:${TEI_RERANKING_PORT}"
|
||||
export UI_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health"
|
||||
|
||||
export no_proxy="${NO_PROXY},${host_ip},vdms-vector-db,dataprep-vdms-server,clip-embedding-server,reranking-tei-server,retriever-vdms-server,lvm-video-llama,lvm,videoqna-xeon-backend-server,videoqna-xeon-ui-server"
|
||||
```
|
||||
|
||||
Note: Replace with `host_ip` with you external IP address, do not use localhost.
|
||||
@@ -190,12 +213,13 @@ In the deploy steps, you need to start the VDMS DB and dataprep firstly, then in
|
||||
```bash
|
||||
cd GenAIExamples/VideoQnA/docker_compose/intel/cpu/xeon/
|
||||
|
||||
docker volume create video-llama-model
|
||||
docker volume create video-llama-
|
||||
docker volume create videoqna-cache
|
||||
docker compose up vdms-vector-db dataprep -d
|
||||
sleep 1m # wait for the services ready
|
||||
sleep 30s
|
||||
|
||||
# Insert some sample data to the DB
|
||||
curl -X POST http://${host_ip}:6007/v1/dataprep/ingest \
|
||||
curl -X POST ${DATAPREP_INGEST_SERVICE_ENDPOINT} \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F "files=@./data/op_1_0320241830.mp4"
|
||||
|
||||
@@ -212,11 +236,12 @@ docker compose up -d
|
||||
|
||||
```bash
|
||||
# Single file upload
|
||||
curl -X POST ${DATAPREP_SERVICE_ENDPOINT} \
|
||||
curl -X POST ${DATAPREP_INGEST_SERVICE_ENDPOINT} \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F "files=@./file1.mp4"
|
||||
|
||||
# Multiple file upload
|
||||
curl -X POST ${DATAPREP_SERVICE_ENDPOINT} \
|
||||
curl -X POST ${DATAPREP_INGEST_SERVICE_ENDPOINT} \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F "files=@./file1.mp4" \
|
||||
-F "files=@./file2.mp4" \
|
||||
@@ -228,6 +253,7 @@ docker compose up -d
|
||||
```bash
|
||||
# List available videos
|
||||
curl -X 'GET' ${DATAPREP_GET_VIDEO_LIST_ENDPOINT} -H 'accept: application/json'
|
||||
|
||||
# Download available video
|
||||
curl -X 'GET' ${DATAPREP_GET_FILE_ENDPOINT}/video_name.mp4 -H 'accept: application/json'
|
||||
```
|
||||
@@ -235,9 +261,9 @@ docker compose up -d
|
||||
2. Embedding Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:6000/v1/embeddings \
|
||||
curl ${EMBEDDING_ENDPOINT} \
|
||||
-X POST \
|
||||
-d '{"text":"Sample text"}' \
|
||||
-d '{"input":"What is the man doing?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
@@ -251,16 +277,16 @@ docker compose up -d
|
||||
|
||||
```bash
|
||||
export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
|
||||
curl http://${host_ip}:7000/v1/retrieval \
|
||||
curl ${RETRIEVER_ENDPOINT} \
|
||||
-X POST \
|
||||
-d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \
|
||||
-d "{\"text\":\"What is the man doing?\",\"embedding\":${your_embedding},\"search_type\":\"mmr\", \"k\":4}" \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
4. Reranking Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8000/v1/reranking \
|
||||
curl ${RERANKING_ENDPOINT} \
|
||||
-X 'POST' \
|
||||
-H 'accept: application/json' \
|
||||
-H 'Content-Type: application/json' \
|
||||
@@ -282,7 +308,7 @@ docker compose up -d
|
||||
|
||||
```bash
|
||||
curl -X POST \
|
||||
"http://${host_ip}:9009/generate?video_url=silence_girl.mp4&start=0.0&duration=9&prompt=What%20is%20the%20person%20doing%3F&max_new_tokens=150" \
|
||||
"${LVM_VIDEO_ENDPOINT}?video_url=silence_girl.mp4&start=0.0&duration=9&prompt=What%20is%20the%20person%20doing%3F&max_new_tokens=150" \
|
||||
-H "accept: */*" \
|
||||
-d ''
|
||||
```
|
||||
@@ -294,9 +320,9 @@ docker compose up -d
|
||||
This service depends on above LLM backend service startup. It will be ready after long time, to wait for them being ready in first startup.
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:9000/v1/lvm\
|
||||
curl http://${host_ip}:${LVM_PORT}/v1/lvm \
|
||||
-X POST \
|
||||
-d '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the person doing?","max_new_tokens": 50}' \
|
||||
-d '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the man doing?","max_new_tokens": 50}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
@@ -305,7 +331,7 @@ docker compose up -d
|
||||
7. MegaService
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8888/v1/videoqna -H "Content-Type: application/json" -d '{
|
||||
curl ${BACKEND_SERVICE_ENDPOINT} -H "Content-Type: application/json" -d '{
|
||||
"messages": "What is the man doing?",
|
||||
"stream": "True"
|
||||
}'
|
||||
@@ -343,4 +369,5 @@ To clean the volume:
|
||||
|
||||
```bash
|
||||
docker volume rm video-llama-model
|
||||
docker volume rm videoqna-cache
|
||||
```
|
||||
|
||||
@@ -5,69 +5,74 @@
|
||||
|
||||
services:
|
||||
vdms-vector-db:
|
||||
image: intellabs/vdms:v2.8.0
|
||||
image: intellabs/vdms:latest
|
||||
container_name: vdms-vector-db
|
||||
ports:
|
||||
- "8001:55555"
|
||||
- "${VDMS_PORT}:55555"
|
||||
dataprep:
|
||||
image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
|
||||
container_name: dataprep-vdms-server
|
||||
depends_on:
|
||||
- vdms-vector-db
|
||||
ports:
|
||||
- "6007:5000"
|
||||
- "${DATAPREP_PORT}:5000"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
MULTIMODAL_DATAPREP: true
|
||||
DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALVDMS"
|
||||
VDMS_HOST: ${VDMS_HOST}
|
||||
VDMS_PORT: ${VDMS_PORT}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
MULTIMODAL_DATAPREP: true
|
||||
entrypoint: sh -c 'sleep 15 && python ingest_videos.py'
|
||||
COLLECTION_NAME: ${INDEX_NAME}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
volumes:
|
||||
- /home/$USER/.cache/clip:/home/user/.cache/clip
|
||||
- /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub
|
||||
- videoqna-cache:/home/user/.cache
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-multimodal-clip:${TAG:-latest}
|
||||
container_name: embedding-multimodal-server
|
||||
container_name: clip-embedding-server
|
||||
ports:
|
||||
- "6000:6000"
|
||||
- "${EMBEDDER_PORT:-6990}:6990"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
volumes:
|
||||
- /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub
|
||||
CLIP_EMBEDDING_ENDPOINT: ${CLIP_EMBEDDING_ENDPOINT}
|
||||
EMBEDDING_COMPONENT_NAME: "OPEA_CLIP_EMBEDDING"
|
||||
LOGFLAG: ${LOGFLAG:-False}
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- videoqna-cache:/home/user/.cache
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
|
||||
container_name: retriever-vdms-server
|
||||
depends_on:
|
||||
- vdms-vector-db
|
||||
ports:
|
||||
- "7000:7000"
|
||||
- "${RETRIEVER_PORT}:7000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_VDMS"
|
||||
VDMS_INDEX_NAME: ${INDEX_NAME}
|
||||
VDMS_HOST: ${VDMS_HOST}
|
||||
VDMS_PORT: ${VDMS_PORT}
|
||||
VDMS_USE_CLIP: ${USECLIP}
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_VDMS"
|
||||
entrypoint: sh -c 'sleep 30 && python retriever_vdms.py'
|
||||
NUMBA_CACHE_DIR: "/tmp/numba_cache"
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub
|
||||
- videoqna-cache:/home/user/.cache
|
||||
reranking:
|
||||
image: ${REGISTRY:-opea}/reranking:${TAG:-latest}
|
||||
container_name: reranking-tei-server
|
||||
ports:
|
||||
- "8000:8000"
|
||||
- "${RERANKING_PORT}:8000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
@@ -76,12 +81,13 @@ services:
|
||||
CHUNK_DURATION: ${CHUNK_DURATION}
|
||||
FILE_SERVER_ENDPOINT: ${DATAPREP_GET_FILE_ENDPOINT}
|
||||
DATAPREP_GET_VIDEO_LIST_ENDPOINT: ${DATAPREP_GET_VIDEO_LIST_ENDPOINT}
|
||||
RERANK_COMPONENT_NAME: ${RERANK_COMPONENT_NAME:-OPEA_VIDEO_RERANKING}
|
||||
restart: unless-stopped
|
||||
lvm-video-llama:
|
||||
image: ${REGISTRY:-opea}/lvm-video-llama:${TAG:-latest}
|
||||
container_name: lvm-video-llama
|
||||
ports:
|
||||
- "9009:9009"
|
||||
- ${VIDEO_LLAMA_PORT:-9009}:9009
|
||||
ipc: host
|
||||
environment:
|
||||
http_proxy: ${http_proxy}
|
||||
@@ -89,20 +95,20 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
llm_download: ${LLM_DOWNLOAD}
|
||||
volumes:
|
||||
- "/home/$USER/.cache:/home/user/.cache"
|
||||
- videoqna-cache:/home/user/.cache
|
||||
- video-llama-model:/home/user/model
|
||||
restart: unless-stopped
|
||||
lvm:
|
||||
image: ${REGISTRY:-opea}/lvm:${TAG:-latest}
|
||||
container_name: lvm
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- "${LVM_PORT}:9399"
|
||||
ipc: host
|
||||
environment:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
no_proxy: ${no_proxy}
|
||||
LVM_COMPONENT_NAME: "OPEA_VIDEO_LLAMA_LVM"
|
||||
LVM_COMPONENT_NAME: ${LVM_COMPONENT_NAME:-OPEA_VIDEO_LLAMA_LVM}
|
||||
LVM_ENDPOINT: ${LVM_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
@@ -119,13 +125,14 @@ services:
|
||||
- lvm-video-llama
|
||||
- lvm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
entrypoint: sh -c 'sleep 45 && python videoqna.py'
|
||||
- "${BACKEND_PORT}:8888"
|
||||
environment:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
no_proxy: ${no_proxy}
|
||||
LOGFLAG: ${LOGFLAG:-False}
|
||||
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
|
||||
BACKEND_PORT: ${BACKEND_PORT}
|
||||
EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
|
||||
RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
|
||||
RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
|
||||
@@ -138,18 +145,23 @@ services:
|
||||
depends_on:
|
||||
- videoqna-xeon-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
- "${UI_PORT}:5173"
|
||||
environment:
|
||||
https_proxy: ${https_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
no_proxy: ${no_proxy}
|
||||
BACKEND_SERVICE_ENDPOINT: ${BACKEND_SERVICE_ENDPOINT}
|
||||
BACKEND_HEALTH_CHECK_ENDPOINT: ${BACKEND_HEALTH_CHECK_ENDPOINT}
|
||||
DATAPREP_INGEST_SERVICE_ENDPOINT: ${DATAPREP_INGEST_SERVICE_ENDPOINT}
|
||||
DATAPREP_PORT: ${DATAPREP_PORT}
|
||||
BACKEND_PORT: ${BACKEND_PORT}
|
||||
UI_PORT: ${UI_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
volumes:
|
||||
video-llama-model:
|
||||
external: true
|
||||
videoqna-cache:
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
|
||||
@@ -8,21 +8,48 @@ popd > /dev/null
|
||||
|
||||
host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export LVM_SERVICE_HOST_IP=${host_ip}
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
|
||||
export LVM_ENDPOINT="http://${host_ip}:9009"
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/videoqna"
|
||||
export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:8888/v1/health_check"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get"
|
||||
export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
|
||||
|
||||
export VDMS_HOST=${host_ip}
|
||||
export VDMS_PORT=8001
|
||||
export INDEX_NAME="mega-videoqna"
|
||||
export USECLIP=1
|
||||
export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download
|
||||
export RERANK_COMPONENT_NAME="OPEA_VIDEO_RERANKING"
|
||||
export LVM_COMPONENT_NAME="OPEA_VIDEO_LLAMA_LVM"
|
||||
export EMBEDDING_COMPONENT_NAME="OPEA_CLIP_EMBEDDING"
|
||||
export USECLIP=1
|
||||
export LOGFLAG=True
|
||||
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export LVM_SERVICE_HOST_IP=${host_ip}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export VDMS_HOST=${host_ip}
|
||||
|
||||
export BACKEND_PORT=8888
|
||||
export DATAPREP_PORT=6007
|
||||
export EMBEDDER_PORT=6990
|
||||
export MULTIMODAL_CLIP_EMBEDDER_PORT=6991
|
||||
export LVM_PORT=9399
|
||||
export RERANKING_PORT=8000
|
||||
export RETRIEVER_PORT=7000
|
||||
export UI_PORT=5173
|
||||
export VDMS_PORT=8001
|
||||
export VIDEO_LLAMA_PORT=9009
|
||||
|
||||
export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/health_check"
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/videoqna"
|
||||
export CLIP_EMBEDDING_ENDPOINT="http://${host_ip}:${MULTIMODAL_CLIP_EMBEDDER_PORT}"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get"
|
||||
export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get_videos"
|
||||
export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/ingest_videos"
|
||||
export EMBEDDING_ENDPOINT="http://${host_ip}:${EMBEDDER_PORT}/v1/embeddings"
|
||||
export FRONTEND_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health"
|
||||
export LVM_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}"
|
||||
export LVM_VIDEO_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}/generate"
|
||||
export RERANKING_ENDPOINT="http://${host_ip}:${RERANKING_PORT}/v1/reranking"
|
||||
export RETRIEVER_ENDPOINT="http://${host_ip}:${RETRIEVER_PORT}/v1/retrieval"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:${TEI_RERANKING_PORT}"
|
||||
export UI_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health"
|
||||
|
||||
export no_proxy="${NO_PROXY},${host_ip},vdms-vector-db,dataprep-vdms-server,clip-embedding-server,reranking-tei-server,retriever-vdms-server,lvm-video-llama,lvm,videoqna-xeon-backend-server,videoqna-xeon-ui-server"
|
||||
|
||||
@@ -23,7 +23,7 @@ services:
|
||||
dockerfile: comps/dataprep/src/Dockerfile
|
||||
extends: videoqna
|
||||
image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
|
||||
embedding-multimodal-clip:
|
||||
embedding:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/third_parties/clip/src/Dockerfile
|
||||
|
||||
@@ -13,6 +13,55 @@ export TAG=${IMAGE_TAG}
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
export host_ip=${ip_address}
|
||||
|
||||
function setup_env() {
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
|
||||
|
||||
export INDEX_NAME="mega-videoqna"
|
||||
export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download
|
||||
export RERANK_COMPONENT_NAME="OPEA_VIDEO_RERANKING"
|
||||
export LVM_COMPONENT_NAME="OPEA_VIDEO_LLAMA_LVM"
|
||||
export EMBEDDING_COMPONENT_NAME="OPEA_CLIP_EMBEDDING"
|
||||
export USECLIP=1
|
||||
export LOGFLAG=True
|
||||
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export LVM_SERVICE_HOST_IP=${host_ip}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export VDMS_HOST=${host_ip}
|
||||
|
||||
export BACKEND_PORT=8888
|
||||
export DATAPREP_PORT=6007
|
||||
export EMBEDDER_PORT=6990
|
||||
export MULTIMODAL_CLIP_EMBEDDER_PORT=6991
|
||||
export LVM_PORT=9399
|
||||
export RERANKING_PORT=8000
|
||||
export RETRIEVER_PORT=7000
|
||||
export UI_PORT=5173
|
||||
export VDMS_PORT=8001
|
||||
export VIDEO_LLAMA_PORT=9009
|
||||
|
||||
export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/health_check"
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/videoqna"
|
||||
export CLIP_EMBEDDING_ENDPOINT="http://${host_ip}:${MULTIMODAL_CLIP_EMBEDDER_PORT}"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get"
|
||||
export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get_videos"
|
||||
export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/ingest_videos"
|
||||
export EMBEDDING_ENDPOINT="http://${host_ip}:${EMBEDDER_PORT}/v1/embeddings"
|
||||
export FRONTEND_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health"
|
||||
export LVM_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}"
|
||||
export LVM_VIDEO_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}/generate"
|
||||
export RERANKING_ENDPOINT="http://${host_ip}:${RERANKING_PORT}/v1/reranking"
|
||||
export RETRIEVER_ENDPOINT="http://${host_ip}:${RETRIEVER_PORT}/v1/retrieval"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:${TEI_RERANKING_PORT}"
|
||||
export UI_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health"
|
||||
|
||||
export no_proxy="${NO_PROXY},${host_ip},vdms-vector-db,dataprep-vdms-server,clip-embedding-server,reranking-tei-server,retriever-vdms-server,lvm-video-llama,lvm,videoqna-xeon-backend-server,videoqna-xeon-ui-server"
|
||||
}
|
||||
|
||||
function build_docker_images() {
|
||||
opea_branch=${opea_branch:-"main"}
|
||||
@@ -28,26 +77,33 @@ function build_docker_images() {
|
||||
fi
|
||||
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
||||
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git GenAIComps
|
||||
|
||||
# Create .cache directory for cache volume to connect (avoids permission denied error)
|
||||
OLD_STRING="mkdir -p /home/user "
|
||||
NEW_STRING="mkdir -p /home/user/.cache "
|
||||
sed -i "s|$OLD_STRING|$NEW_STRING|g" "GenAIComps/comps/dataprep/src/Dockerfile"
|
||||
sed -i "s|$OLD_STRING|$NEW_STRING|g" "GenAIComps/comps/retrievers/src/Dockerfile"
|
||||
sed -i "s|$OLD_STRING|$NEW_STRING|g" "GenAIComps/comps/third_parties/clip/src/Dockerfile"
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
docker compose -f build.yaml build --no-cache 2>&1 > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull intellabs/vdms:v2.8.0
|
||||
docker pull intellabs/vdms:latest
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
function start_services() {
|
||||
echo "Starting services..."
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||
|
||||
source set_env.sh
|
||||
docker volume create video-llama-model
|
||||
docker volume create videoqna-cache
|
||||
docker compose up vdms-vector-db dataprep -d
|
||||
sleep 30s
|
||||
|
||||
# Insert some sample data to the DB
|
||||
HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep/ingest \
|
||||
HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST ${DATAPREP_INGEST_SERVICE_ENDPOINT} \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F "files=@./data/op_1_0320241830.mp4")
|
||||
|
||||
@@ -58,12 +114,13 @@ function start_services() {
|
||||
docker logs dataprep-vdms-server >> ${LOG_PATH}/dataprep.log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Bring all the others
|
||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
sleep 1m
|
||||
|
||||
# List of containers running uvicorn
|
||||
list=("dataprep-vdms-server" "embedding-multimodal-server" "retriever-vdms-server" "reranking-tei-server" "lvm-video-llama" "lvm-video-llama" "videoqna-xeon-backend-server")
|
||||
list=("dataprep-vdms-server" "clip-embedding-server" "retriever-vdms-server" "reranking-tei-server" "lvm-video-llama" "videoqna-xeon-backend-server")
|
||||
|
||||
# Define the maximum time limit in seconds
|
||||
TIME_LIMIT=5400
|
||||
@@ -95,10 +152,10 @@ function start_services() {
|
||||
for i in "${!list[@]}"; do
|
||||
item=${list[i]}
|
||||
if check_condition "$item"; then
|
||||
echo "Condition met for $item, removing from list."
|
||||
echo "Condition met for $item, removing from list." >> ${LOG_PATH}/list_check.log
|
||||
unset list[i]
|
||||
else
|
||||
echo "Condition not met for $item, keeping in list."
|
||||
echo "Condition not met for $item, keeping in list." >> ${LOG_PATH}/list_check.log
|
||||
fi
|
||||
done
|
||||
|
||||
@@ -110,7 +167,7 @@ function start_services() {
|
||||
echo "List is empty. Exiting."
|
||||
break
|
||||
fi
|
||||
sleep 5m
|
||||
sleep 2m
|
||||
done
|
||||
|
||||
if docker logs videoqna-xeon-ui-server 2>&1 | grep -q "Streamlit app"; then
|
||||
@@ -128,33 +185,37 @@ function validate_services() {
|
||||
local DOCKER_NAME="$4"
|
||||
local INPUT_DATA="$5"
|
||||
|
||||
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
|
||||
if [ "$HTTP_STATUS" -eq 200 ]; then
|
||||
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
|
||||
HTTP_RESPONSE=$(curl -s -w "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
|
||||
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
|
||||
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
|
||||
|
||||
local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
|
||||
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
|
||||
|
||||
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
|
||||
echo "[ $SERVICE_NAME ] Content is as expected."
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
|
||||
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
# check response status
|
||||
if [ "$HTTP_STATUS" -ne "200" ]; then
|
||||
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
|
||||
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
|
||||
exit 1
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
|
||||
fi
|
||||
|
||||
# check response body
|
||||
if [[ "${RESPONSE_BODY}" != *"${EXPECTED_RESULT}"* ]]; then
|
||||
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
|
||||
exit 1
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] Content is as expected."
|
||||
fi
|
||||
sleep 1s
|
||||
}
|
||||
|
||||
function validate_microservices() {
|
||||
# Check if the microservices are running correctly.
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon//data
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/data
|
||||
|
||||
# dataprep microservice
|
||||
HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep/ingest \
|
||||
echo "Validating Dataprep microservice ..."
|
||||
HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST "${DATAPREP_INGEST_SERVICE_ENDPOINT}" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F "files=@./op_1_0320241830.mp4")
|
||||
|
||||
@@ -168,24 +229,24 @@ function validate_microservices() {
|
||||
|
||||
# Embedding Microservice
|
||||
validate_services \
|
||||
"${ip_address}:6000/v1/embeddings" \
|
||||
"Sample text" \
|
||||
${EMBEDDING_ENDPOINT} \
|
||||
'"embedding":[' \
|
||||
"embedding" \
|
||||
"embedding-multimodal-server" \
|
||||
'{"text":"Sample text"}'
|
||||
"clip-embedding-server" \
|
||||
'{"input":"What is the man doing?"}'
|
||||
|
||||
# Retriever Microservice
|
||||
export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
|
||||
export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
|
||||
validate_services \
|
||||
"${ip_address}:7000/v1/retrieval" \
|
||||
${RETRIEVER_ENDPOINT} \
|
||||
"retrieved_docs" \
|
||||
"retriever" \
|
||||
"retriever-vdms-server" \
|
||||
"{\"text\":\"test\",\"embedding\":${your_embedding}}"
|
||||
"{\"text\":\"What is the man doing?\",\"embedding\":${your_embedding},\"search_type\":\"mmr\", \"k\":4}"
|
||||
|
||||
# Reranking Microservice
|
||||
validate_services \
|
||||
"${ip_address}:8000/v1/reranking" \
|
||||
${RERANKING_ENDPOINT} \
|
||||
"video_url" \
|
||||
"reranking" \
|
||||
"reranking-tei-server" \
|
||||
@@ -198,32 +259,52 @@ function validate_microservices() {
|
||||
]
|
||||
}'
|
||||
|
||||
# Video Llama LVM Backend Service
|
||||
result=$(http_proxy="" curl -X POST \
|
||||
"${LVM_VIDEO_ENDPOINT}?video_url=https%3A%2F%2Fgithub.com%2FDAMO-NLP-SG%2FVideo-LLaMA%2Fraw%2Fmain%2Fexamples%2Fsilence_girl.mp4&start=0.0&duration=9&prompt=What%20is%20the%20person%20doing%3F&max_new_tokens=150" \
|
||||
-H "accept: */*" -d '')
|
||||
|
||||
if [[ $result == *"silence"* ]]; then
|
||||
echo "LVM microservice is running correctly."
|
||||
else
|
||||
echo "LVM microservice is not running correctly. Received status was $HTTP_STATUS"
|
||||
docker logs lvm-video-llama >> ${LOG_PATH}/lvm-video-llama.log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# LVM Microservice
|
||||
validate_services \
|
||||
"${ip_address}:9000/v1/lvm" \
|
||||
"http://${host_ip}:${LVM_PORT}/v1/lvm" \
|
||||
"silence" \
|
||||
"lvm" \
|
||||
"lvm-video-llama" \
|
||||
'{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the person doing?","max_new_tokens": 50}'
|
||||
"lvm" \
|
||||
'{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the man doing?","max_new_tokens": 50}'
|
||||
|
||||
echo "==== microservices validated ===="
|
||||
sleep 1s
|
||||
}
|
||||
|
||||
function validate_megaservice() {
|
||||
echo "Validating videoqna-xeon-backend-server ..."
|
||||
|
||||
validate_services \
|
||||
"${ip_address}:8888/v1/videoqna" \
|
||||
${BACKEND_SERVICE_ENDPOINT} \
|
||||
"man" \
|
||||
"videoqna-xeon-backend-server" \
|
||||
"videoqna-xeon-backend-server" \
|
||||
'{"messages":"What is the man doing?","stream":"True"}'
|
||||
|
||||
echo "==== megaservice validated ===="
|
||||
}
|
||||
|
||||
function validate_frontend() {
|
||||
HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X GET http://${ip_address}:5173/_stcore/health)
|
||||
echo "Validating frontend ..."
|
||||
|
||||
HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X GET ${FRONTEND_ENDPOINT})
|
||||
|
||||
if [ "$HTTP_STATUS" -eq 200 ]; then
|
||||
echo "Frontend is running correctly."
|
||||
local CONTENT=$(curl -s -X GET http://${ip_address}:5173/_stcore/health)
|
||||
local CONTENT=$(curl -s -X GET ${FRONTEND_ENDPOINT})
|
||||
if echo "$CONTENT" | grep -q "ok"; then
|
||||
echo "Frontend Content is as expected."
|
||||
else
|
||||
@@ -236,20 +317,31 @@ function validate_frontend() {
|
||||
docker logs videoqna-xeon-ui-server >> ${LOG_PATH}/ui.log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "==== frontend validated ===="
|
||||
}
|
||||
|
||||
function stop_docker() {
|
||||
echo "Stopping docker..."
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||
docker compose stop && docker compose rm -f
|
||||
docker volume rm video-llama-model
|
||||
docker volume rm videoqna-cache
|
||||
echo "Docker stopped."
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
setup_env
|
||||
stop_docker
|
||||
|
||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||
|
||||
start_time=$(date +%s)
|
||||
start_services
|
||||
end_time=$(date +%s)
|
||||
duration=$((end_time-start_time))
|
||||
echo "Mega service start duration is $duration s" && sleep 1s
|
||||
|
||||
validate_microservices
|
||||
validate_megaservice
|
||||
@@ -260,4 +352,4 @@ function main() {
|
||||
|
||||
}
|
||||
|
||||
# main
|
||||
main
|
||||
|
||||
@@ -12,25 +12,43 @@ from comps.cores.proto.api_protocol import (
|
||||
ChatMessage,
|
||||
UsageInfo,
|
||||
)
|
||||
from comps.cores.proto.docarray import LLMParams
|
||||
from comps.cores.proto.docarray import LLMParams, TextDoc
|
||||
from fastapi import Request
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
MEGA_SERVICE_PORT = int(os.getenv("BACKEND_PORT", 8888))
|
||||
EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
|
||||
EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
|
||||
EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDER_PORT", 6990))
|
||||
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
|
||||
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_PORT", 7000))
|
||||
RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
|
||||
LVM_SERVICE_HOST_IP = os.getenv("LVM_SERVICE_HOST_IP", "0.0.0.0")
|
||||
LVM_SERVICE_PORT = int(os.getenv("LVM_SERVICE_PORT", 9000))
|
||||
LVM_SERVICE_PORT = int(os.getenv("LVM_PORT", 9399))
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
service_type = self.services[cur_node].service_type
|
||||
if service_type == ServiceType.EMBEDDING:
|
||||
if "input" in inputs:
|
||||
input_text = inputs["input"]["text"] if isinstance(inputs["input"], dict) else inputs["input"]
|
||||
inputs = TextDoc(text=input_text).model_dump()
|
||||
return inputs
|
||||
|
||||
|
||||
def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
|
||||
return {"text": inputs["text"], "embedding": data["embedding"]}
|
||||
else:
|
||||
return data
|
||||
|
||||
|
||||
class VideoQnAService:
|
||||
def __init__(self, host="0.0.0.0", port=8888):
|
||||
self.host = host
|
||||
self.port = port
|
||||
ServiceOrchestrator.align_inputs = align_inputs
|
||||
ServiceOrchestrator.align_outputs = align_outputs
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
self.endpoint = str(MegaServiceEndpoint.VIDEO_RAG_QNA)
|
||||
|
||||
@@ -74,8 +92,8 @@ class VideoQnAService:
|
||||
|
||||
async def handle_request(self, request: Request):
|
||||
data = await request.json()
|
||||
stream_opt = data.get("stream", False)
|
||||
chat_request = ChatCompletionRequest.parse_obj(data)
|
||||
stream_opt = bool(data.get("stream", False))
|
||||
chat_request = ChatCompletionRequest.model_validate(data)
|
||||
prompt = handle_message(chat_request.messages)
|
||||
parameters = LLMParams(
|
||||
max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
|
||||
@@ -86,9 +104,10 @@ class VideoQnAService:
|
||||
presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
|
||||
repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
|
||||
stream=stream_opt,
|
||||
chat_template=chat_request.chat_template if chat_request.chat_template else None,
|
||||
)
|
||||
result_dict, runtime_graph = await self.megaservice.schedule(
|
||||
initial_inputs={"text": prompt}, llm_parameters=parameters
|
||||
initial_inputs={"input": prompt}, llm_parameters=parameters
|
||||
)
|
||||
for node, response in result_dict.items():
|
||||
# Here it suppose the last microservice in the megaservice is LVM.
|
||||
|
||||
Reference in New Issue
Block a user