Set vLLM as default model for VisualQnA (#1644)

This commit is contained in:
Spycsh
2025-03-18 15:29:49 +08:00
committed by GitHub
parent 1b6342aa5b
commit bf8d03425c
12 changed files with 762 additions and 181 deletions

View File

@@ -10,28 +10,6 @@ For detailed information about these instance types, you can refer to this [link
After launching your instance, you can connect to it using SSH (for Linux instances) or Remote Desktop Protocol (RDP) (for Windows instances). From there, you'll have full access to your Xeon server, allowing you to install, configure, and manage your applications as needed.
**Certain ports in the EC2 instance need to opened up in the security group, for the microservices to work with the curl commands**
> See one example below. Please open up these ports in the EC2 instance based on the IP addresses you want to allow
```
llava-tgi-service
===========
Port 8399 - Open to 0.0.0.0/0
llm
===
Port 9399 - Open to 0.0.0.0/0
visualqna-xeon-backend-server
==========================
Port 8888 - Open to 0.0.0.0/0
visualqna-xeon-ui-server
=====================
Port 5173 - Open to 0.0.0.0/0
```
## 🚀 Build Docker Images
First of all, you need to build Docker Images locally and install the python package of it.
@@ -64,19 +42,23 @@ cd GenAIExamples/VisualQnA/ui
docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile .
```
### 4. Pull TGI Xeon Image
### 4. Pull vLLM/TGI Xeon Image
```bash
# vLLM
docker pull opea/vllm:latest
# TGI (Optional)
docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
```
Then run the command `docker images`, you will have the following 5 Docker Images:
Then run the command `docker images`, you will have the following Docker Images:
1. `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`
2. `opea/lvm:latest`
3. `opea/visualqna:latest`
4. `opea/visualqna-ui:latest`
5. `opea/nginx`
1. `opea/vllm:latest`
2. `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu` (Optional)
3. `opea/lvm:latest`
4. `opea/visualqna:latest`
5. `opea/visualqna-ui:latest`
6. `opea/nginx`
## 🚀 Start Microservices
@@ -84,30 +66,8 @@ Then run the command `docker images`, you will have the following 5 Docker Image
Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
> Change the External_Public_IP below with the actual IPV4 value
```
export host_ip="External_Public_IP"
```
**Append the value of the public IP address to the no_proxy list**
```
export your_no_proxy="${your_no_proxy},${host_ip}"
```
```bash
export no_proxy=${your_no_proxy}
export http_proxy=${your_http_proxy}
export https_proxy=${your_http_proxy}
export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf"
export LVM_ENDPOINT="http://${host_ip}:8399"
export LVM_SERVICE_PORT=9399
export MEGA_SERVICE_HOST_IP=${host_ip}
export LVM_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/visualqna"
source set_env.sh
```
Note: Please replace with `host_ip` with you external IP address, do not use localhost.
@@ -122,6 +82,8 @@ cd GenAIExamples/VisualQnA/docker_compose/intel/cpu/xeon
```bash
docker compose -f compose.yaml up -d
# if use TGI as the LLM serving backend
docker compose -f compose_tgi.yaml up -d
```
### Validate Microservices

View File

@@ -2,32 +2,31 @@
# SPDX-License-Identifier: Apache-2.0
services:
llava-tgi-service:
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-llava-xeon-server
vllm-service:
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
container_name: vllm-service
ports:
- "8399:80"
- ${VLLM_PORT:-8399}:80
volumes:
- "${MODEL_CACHE:-./data}:/data"
shm_size: 1g
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
host_ip: ${host_ip}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
VLLM_TORCH_PROFILER_DIR: "/mnt"
healthcheck:
test: ["CMD-SHELL", "curl -f http://$host_ip:8399/health || exit 1"]
test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
interval: 10s
timeout: 10s
retries: 60
command: --model-id ${LVM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --cuda-graphs 0
retries: 100
command: --model $LVM_MODEL_ID --host 0.0.0.0 --port 80 --chat-template examples/template_llava.jinja # https://docs.vllm.ai/en/v0.5.0/models/vlm.html
lvm:
image: ${REGISTRY:-opea}/lvm:${TAG:-latest}
container_name: lvm-xeon-server
depends_on:
llava-tgi-service:
vllm-service:
condition: service_healthy
ports:
- "9399:9399"
@@ -37,7 +36,8 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LVM_ENDPOINT: ${LVM_ENDPOINT}
LVM_COMPONENT_NAME: "OPEA_TGI_LLAVA_LVM"
LVM_COMPONENT_NAME: "OPEA_VLLM_LVM"
LLM_MODEL_ID: ${LVM_MODEL_ID}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
@@ -45,7 +45,7 @@ services:
image: ${REGISTRY:-opea}/visualqna:${TAG:-latest}
container_name: visualqna-xeon-backend-server
depends_on:
- llava-tgi-service
- vllm-service
- lvm
ports:
- "8888:8888"

View File

@@ -0,0 +1,96 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
llava-tgi-service:
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-llava-xeon-server
ports:
- "8399:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
host_ip: ${host_ip}
healthcheck:
test: ["CMD-SHELL", "curl -f http://$host_ip:8399/health || exit 1"]
interval: 10s
timeout: 10s
retries: 60
command: --model-id ${LVM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --cuda-graphs 0
lvm:
image: ${REGISTRY:-opea}/lvm:${TAG:-latest}
container_name: lvm-xeon-server
depends_on:
llava-tgi-service:
condition: service_healthy
ports:
- "9399:9399"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LVM_ENDPOINT: ${LVM_ENDPOINT}
LVM_COMPONENT_NAME: "OPEA_TGI_LLAVA_LVM"
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
visualqna-xeon-backend-server:
image: ${REGISTRY:-opea}/visualqna:${TAG:-latest}
container_name: visualqna-xeon-backend-server
depends_on:
- llava-tgi-service
- lvm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- LVM_SERVICE_HOST_IP=${LVM_SERVICE_HOST_IP}
ipc: host
restart: always
visualqna-xeon-ui-server:
image: ${REGISTRY:-opea}/visualqna-ui:${TAG:-latest}
container_name: visualqna-xeon-ui-server
depends_on:
- visualqna-xeon-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- BACKEND_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always
visualqna-xeon-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: visualqna-xeon-nginx-server
depends_on:
- visualqna-xeon-backend-server
- visualqna-xeon-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -6,7 +6,8 @@ pushd "../../../../../" > /dev/null
source .set_env.sh
popd > /dev/null
export host_ip=$(hostname -I | awk '{print $1}')
export no_proxy=$host_ip,$no_proxy
export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf"
export LVM_ENDPOINT="http://${host_ip}:8399"
export LVM_SERVICE_PORT=9399

View File

@@ -15,15 +15,29 @@ docker build --no-cache -t opea/lvm:latest --build-arg https_proxy=$https_proxy
docker build --no-cache -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/nginx/src/Dockerfile .
```
### 2. Pull TGI Gaudi Image
### 2. Build vLLM/Pull TGI Gaudi Image
```bash
# vLLM
# currently you have to build the opea/vllm-gaudi with the habana_main branch and the specific commit locally
# we will update it to stable release tag in the future
git clone https://github.com/HabanaAI/vllm-fork.git
cd ./vllm-fork/
docker build -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
cd ..
rm -rf vllm-fork
```
```bash
# TGI (Optional)
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
```
### 3. Build MegaService Docker Image
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `visuralqna.py` Python script. Build the MegaService Docker image using the command below:
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `visualqna.py` Python script. Build the MegaService Docker image using the command below:
```bash
git clone https://github.com/opea-project/GenAIExamples.git
@@ -43,11 +57,12 @@ docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$htt
Then run the command `docker images`, you will have the following 5 Docker Images:
1. `ghcr.io/huggingface/tgi-gaudi:2.0.6`
2. `opea/lvm:latest`
3. `opea/visualqna:latest`
4. `opea/visualqna-ui:latest`
5. `opea/nginx`
1. `opea/vllm-gaudi:latest`
2. `ghcr.io/huggingface/tgi-gaudi:2.0.6` (Optional)
3. `opea/lvm:latest`
4. `opea/visualqna:latest`
5. `opea/visualqna-ui:latest`
6. `opea/nginx`
## 🚀 Start MicroServices and MegaService
@@ -56,18 +71,10 @@ Then run the command `docker images`, you will have the following 5 Docker Image
Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
```bash
export no_proxy=${your_no_proxy}
export http_proxy=${your_http_proxy}
export https_proxy=${your_http_proxy}
export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf"
export LVM_ENDPOINT="http://${host_ip}:8399"
export LVM_SERVICE_PORT=9399
export MEGA_SERVICE_HOST_IP=${host_ip}
export LVM_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/visualqna"
source set_env.sh
```
Note: Please replace with `host_ip` with you external IP address, do **NOT** use localhost.
Note: Please replace with `host_ip` with you external IP address, do not use localhost.
### Start all the services Docker Containers
@@ -77,6 +84,8 @@ cd GenAIExamples/VisualQnA/docker_compose/intel/hpu/gaudi/
```bash
docker compose -f compose.yaml up -d
# if use TGI as the LLM serving backend
docker compose -f compose_tgi.yaml up -d
```
> **_NOTE:_** Users need at least one Gaudi cards to run the VisualQnA successfully.

View File

@@ -2,41 +2,42 @@
# SPDX-License-Identifier: Apache-2.0
services:
llava-tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
container_name: tgi-llava-gaudi-server
vllm-gaudi-service:
image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
container_name: vllm-gaudi-service
ports:
- "8399:80"
- ${VLLM_PORT:-8399}:80
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/root/.cache/huggingface/hub"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
healthcheck:
test: ["CMD-SHELL", "curl -f http://$host_ip:8399/health || exit 1"]
interval: 10s
timeout: 10s
retries: 60
LLM_MODEL_ID: ${LVM_MODEL_ID}
VLLM_TORCH_PROFILER_DIR: "/mnt"
VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
MAX_MODEL_LEN: ${MAX_TOTAL_TOKENS:-4096}
MAX_SEQ_LEN_TO_CAPTURE: ${MAX_TOTAL_TOKENS:-4096}
PT_HPUGRAPH_DISABLE_TENSOR_CACHE: false # https://github.com/HabanaAI/vllm-fork/issues/841#issuecomment-2700421704
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model-id ${LVM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
interval: 10s
timeout: 10s
retries: 150
command: --model $LVM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --chat-template examples/template_llava.jinja # https://docs.vllm.ai/en/v0.5.0/models/vlm.html
lvm:
image: ${REGISTRY:-opea}/lvm:${TAG:-latest}
container_name: lvm-gaudi-server
container_name: lvm-vllm-gaudi-service
depends_on:
- llava-tgi-service
vllm-gaudi-service:
condition: service_healthy
ports:
- "9399:9399"
ipc: host
@@ -45,7 +46,8 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LVM_ENDPOINT: ${LVM_ENDPOINT}
LVM_COMPONENT_NAME: "OPEA_TGI_LLAVA_LVM"
LVM_COMPONENT_NAME: "OPEA_VLLM_LVM"
LLM_MODEL_ID: ${LVM_MODEL_ID}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
@@ -53,7 +55,7 @@ services:
image: ${REGISTRY:-opea}/visualqna:${TAG:-latest}
container_name: visualqna-gaudi-backend-server
depends_on:
- llava-tgi-service
- vllm-gaudi-service
- lvm
ports:
- "8888:8888"

View File

@@ -0,0 +1,105 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
llava-tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
container_name: tgi-llava-gaudi-server
ports:
- "8399:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
healthcheck:
test: ["CMD-SHELL", "curl -f http://$host_ip:8399/health || exit 1"]
interval: 10s
timeout: 10s
retries: 60
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model-id ${LVM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192
lvm:
image: ${REGISTRY:-opea}/lvm:${TAG:-latest}
container_name: lvm-gaudi-server
depends_on:
llava-tgi-service:
condition: service_healthy
ports:
- "9399:9399"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LVM_ENDPOINT: ${LVM_ENDPOINT}
LVM_COMPONENT_NAME: "OPEA_TGI_LLAVA_LVM"
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
visualqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/visualqna:${TAG:-latest}
container_name: visualqna-gaudi-backend-server
depends_on:
- llava-tgi-service
- lvm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- LVM_SERVICE_HOST_IP=${LVM_SERVICE_HOST_IP}
ipc: host
restart: always
visualqna-gaudi-ui-server:
image: ${REGISTRY:-opea}/visualqna-ui:${TAG:-latest}
container_name: visualqna-gaudi-ui-server
depends_on:
- visualqna-gaudi-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- BACKEND_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always
visualqna-gaudi-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: visualqna-gaudi-nginx-server
depends_on:
- visualqna-gaudi-backend-server
- visualqna-gaudi-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -6,7 +6,10 @@ pushd "../../../../../" > /dev/null
source .set_env.sh
popd > /dev/null
export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf"
export host_ip=$(hostname -I | awk '{print $1}')
export no_proxy=$host_ip,$no_proxy
# export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf"
export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf"
export LVM_ENDPOINT="http://${host_ip}:8399"
export LVM_SERVICE_PORT=9399
export MEGA_SERVICE_HOST_IP=${host_ip}

View File

@@ -10,51 +10,32 @@ echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
export NGINX_PORT=81
export VLLM_SKIP_WARMUP=true
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
git clone --depth 1 --branch main https://github.com/opea-project/GenAIComps.git
docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
git clone https://github.com/HabanaAI/vllm-fork.git
cd ./vllm-fork/
docker build -f Dockerfile.hpu -t opea/vllm-gaudi:${TAG} --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
cd ..
rm -rf vllm-fork
docker images && sleep 1s
}
function start_services() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf"
export LVM_ENDPOINT="http://${ip_address}:8399"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LVM_SERVICE_PORT=9399
export MEGA_SERVICE_HOST_IP=${ip_address}
export LVM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/visualqna"
export FRONTEND_SERVICE_IP=${ip_address}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_NAME=visualqna
export BACKEND_SERVICE_IP=${ip_address}
export BACKEND_SERVICE_PORT=8888
export NGINX_PORT=80
export host_ip=${ip_address}
source ./set_env.sh
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
@@ -63,8 +44,8 @@ function start_services() {
n=0
until [[ "$n" -ge 100 ]]; do
docker logs lvm-gaudi-server > ${LOG_PATH}/lvm_tgi_service_start.log
if grep -q Connected ${LOG_PATH}/lvm_tgi_service_start.log; then
docker logs vllm-gaudi-service > ${LOG_PATH}/lvm_vllm_service_start.log
if grep -q Starting ${LOG_PATH}/lvm_vllm_service_start.log; then
break
fi
sleep 5s
@@ -101,22 +82,24 @@ function validate_services() {
}
function validate_microservices() {
sleep 15s
# Check if the microservices are running correctly.
# lvm microservice
validate_services \
"${ip_address}:9399/v1/lvm" \
"The image" \
"yellow" \
"lvm" \
"lvm-gaudi-server" \
"lvm-vllm-gaudi-service" \
'{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}'
}
function validate_megaservice() {
sleep 15s
# Curl the Mega Service
validate_services \
"${ip_address}:8888/v1/visualqna" \
"The image" \
"sign" \
"visualqna-gaudi-backend-server" \
"visualqna-gaudi-backend-server" \
'{
@@ -142,8 +125,8 @@ function validate_megaservice() {
# test the megeservice via nginx
validate_services \
"${ip_address}:80/v1/visualqna" \
"The image" \
"${ip_address}:${NGINX_PORT}/v1/visualqna" \
"sign" \
"visualqna-gaudi-nginx-server" \
"visualqna-gaudi-nginx-server" \
'{

View File

@@ -10,51 +10,26 @@ echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
export NGINX_PORT=81
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
git clone --depth 1 --branch main https://github.com/opea-project/GenAIComps.git
docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
docker pull opea/vllm:latest
docker tag opea/vllm:latest opea/vllm:${TAG}
docker images && sleep 1s
}
function start_services() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/
export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf"
export LVM_ENDPOINT="http://${ip_address}:8399"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LVM_SERVICE_PORT=9399
export MEGA_SERVICE_HOST_IP=${ip_address}
export LVM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/visualqna"
export FRONTEND_SERVICE_IP=${ip_address}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_NAME=visualqna
export BACKEND_SERVICE_IP=${ip_address}
export BACKEND_SERVICE_PORT=8888
export NGINX_PORT=80
export host_ip=${ip_address}
source ./set_env.sh
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
@@ -63,8 +38,8 @@ function start_services() {
n=0
until [[ "$n" -ge 200 ]]; do
docker logs lvm-xeon-server > ${LOG_PATH}/lvm_tgi_service_start.log
if grep -q Connected ${LOG_PATH}/lvm_tgi_service_start.log; then
docker logs vllm-service > ${LOG_PATH}/lvm_vllm_service_start.log
if grep -q Starting ${LOG_PATH}/lvm_vllm_service_start.log; then
break
fi
sleep 5s
@@ -101,12 +76,13 @@ function validate_services() {
}
function validate_microservices() {
sleep 15s
# Check if the microservices are running correctly.
# lvm microservice
validate_services \
"${ip_address}:9399/v1/lvm" \
"The image" \
"yellow" \
"lvm" \
"lvm-xeon-server" \
'{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}'
@@ -116,7 +92,7 @@ function validate_megaservice() {
# Curl the Mega Service
validate_services \
"${ip_address}:8888/v1/visualqna" \
"The image" \
"sign" \
"visualqna-xeon-backend-server" \
"visualqna-xeon-backend-server" \
'{
@@ -142,8 +118,8 @@ function validate_megaservice() {
# test the megeservice via nginx
validate_services \
"${ip_address}:80/v1/visualqna" \
"The image" \
"${ip_address}:${NGINX_PORT}/v1/visualqna" \
"sign" \
"visualqna-xeon-nginx-server" \
"visualqna-xeon-nginx-server" \
'{

View File

@@ -0,0 +1,222 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -x
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
docker images && sleep 1s
}
function start_services() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf"
export LVM_ENDPOINT="http://${ip_address}:8399"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LVM_SERVICE_PORT=9399
export MEGA_SERVICE_HOST_IP=${ip_address}
export LVM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/visualqna"
export FRONTEND_SERVICE_IP=${ip_address}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_NAME=visualqna
export BACKEND_SERVICE_IP=${ip_address}
export BACKEND_SERVICE_PORT=8888
export NGINX_PORT=80
export host_ip=${ip_address}
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
# Start Docker Containers
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 100 ]]; do
docker logs tgi-llava-gaudi-server > ${LOG_PATH}/lvm_tgi_service_start.log
if grep -q Connected ${LOG_PATH}/lvm_tgi_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
}
function validate_services() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local INPUT_DATA="$5"
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
if [ "$HTTP_STATUS" -eq 200 ]; then
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
echo "[ $SERVICE_NAME ] Content is as expected."
else
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
else
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
sleep 1s
}
function validate_microservices() {
sleep 15s
# Check if the microservices are running correctly.
# lvm microservice
validate_services \
"${ip_address}:9399/v1/lvm" \
"The image" \
"lvm" \
"lvm-gaudi-server" \
'{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}'
}
function validate_megaservice() {
sleep 15s
# Curl the Mega Service
validate_services \
"${ip_address}:8888/v1/visualqna" \
"The image" \
"visualqna-gaudi-backend-server" \
"visualqna-gaudi-backend-server" \
'{
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What'\''s in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "https://www.ilankelman.org/stopsigns/australia.jpg"
}
}
]
}
],
"max_tokens": 300
}'
# test the megeservice via nginx
validate_services \
"${ip_address}:80/v1/visualqna" \
"The image" \
"visualqna-gaudi-nginx-server" \
"visualqna-gaudi-nginx-server" \
'{
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What'\''s in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "https://www.ilankelman.org/stopsigns/australia.jpg"
}
}
]
}
],
"max_tokens": 300
}'
}
function validate_frontend() {
cd $WORKPATH/ui/svelte
local conda_env_name="OPEA_e2e"
export PATH=${HOME}/miniforge3/bin/:$PATH
if conda info --envs | grep -q "$conda_env_name"; then
echo "$conda_env_name exist!"
else
conda create -n ${conda_env_name} python=3.12 -y
fi
source activate ${conda_env_name}
sed -i "s/localhost/$ip_address/g" playwright.config.ts
conda install -c conda-forge nodejs=22.6.0 -y
npm install && npm ci && npx playwright install --with-deps
node -v && npm -v && pip list
exit_status=0
npx playwright test || exit_status=$?
if [ $exit_status -ne 0 ]; then
echo "[TEST INFO]: ---------frontend test failed---------"
exit $exit_status
else
echo "[TEST INFO]: ---------frontend test passed---------"
fi
}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
docker compose stop && docker compose rm -f
}
function main() {
stop_docker
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_services
validate_microservices
validate_megaservice
# validate_frontend
stop_docker
echo y | docker system prune
}
main

View File

@@ -0,0 +1,222 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -x
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
docker images && sleep 1s
}
function start_services() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/
export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf"
export LVM_ENDPOINT="http://${ip_address}:8399"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LVM_SERVICE_PORT=9399
export MEGA_SERVICE_HOST_IP=${ip_address}
export LVM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/visualqna"
export FRONTEND_SERVICE_IP=${ip_address}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_NAME=visualqna
export BACKEND_SERVICE_IP=${ip_address}
export BACKEND_SERVICE_PORT=8888
export NGINX_PORT=80
export host_ip=${ip_address}
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
# Start Docker Containers
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 200 ]]; do
docker logs tgi-llava-xeon-server > ${LOG_PATH}/lvm_tgi_service_start.log
if grep -q Connected ${LOG_PATH}/lvm_tgi_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
}
function validate_services() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local INPUT_DATA="$5"
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
if [ "$HTTP_STATUS" -eq 200 ]; then
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
echo "[ $SERVICE_NAME ] Content is as expected."
else
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
else
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
sleep 1s
}
function validate_microservices() {
sleep 15s
# Check if the microservices are running correctly.
# lvm microservice
validate_services \
"${ip_address}:9399/v1/lvm" \
"The image" \
"lvm" \
"lvm-xeon-server" \
'{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}'
}
function validate_megaservice() {
sleep 15s
# Curl the Mega Service
validate_services \
"${ip_address}:8888/v1/visualqna" \
"The image" \
"visualqna-xeon-backend-server" \
"visualqna-xeon-backend-server" \
'{
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What'\''s in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "https://www.ilankelman.org/stopsigns/australia.jpg"
}
}
]
}
],
"max_tokens": 300
}'
# test the megeservice via nginx
validate_services \
"${ip_address}:80/v1/visualqna" \
"The image" \
"visualqna-xeon-nginx-server" \
"visualqna-xeon-nginx-server" \
'{
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What'\''s in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "https://www.ilankelman.org/stopsigns/australia.jpg"
}
}
]
}
],
"max_tokens": 300
}'
}
function validate_frontend() {
cd $WORKPATH/ui/svelte
local conda_env_name="OPEA_e2e"
export PATH=${HOME}/miniforge3/bin/:$PATH
if conda info --envs | grep -q "$conda_env_name"; then
echo "$conda_env_name exist!"
else
conda create -n ${conda_env_name} python=3.12 -y
fi
source activate ${conda_env_name}
sed -i "s/localhost/$ip_address/g" playwright.config.ts
conda install -c conda-forge nodejs=22.6.0 -y
npm install && npm ci && npx playwright install --with-deps
node -v && npm -v && pip list
exit_status=0
npx playwright test || exit_status=$?
if [ $exit_status -ne 0 ]; then
echo "[TEST INFO]: ---------frontend test failed---------"
exit $exit_status
else
echo "[TEST INFO]: ---------frontend test passed---------"
fi
}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/
docker compose stop && docker compose rm -f
}
function main() {
stop_docker
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_services
validate_microservices
validate_megaservice
# validate_frontend
stop_docker
echo y | docker system prune
}
main