Compare commits

...

6 Commits

Author SHA1 Message Date
Ying Hu
b10456e42a Update test_compose_vllm_on_xeon.sh
move the vllm-service
2025-03-04 19:11:12 +08:00
pre-commit-ci[bot]
3fb6cb590c [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2025-03-04 09:06:37 +00:00
Ying Hu
75ee579021 Create test_compose_vllm_on_xeon.sh for vLLM
Create test_compose_vllm_on_xeon.sh for vLLM
2025-03-04 17:05:30 +08:00
Ying Hu
768f1a45e2 Create compose_vllm.yaml for vLLM
Create compose_vllm.yaml for vLLM
2025-03-04 17:01:41 +08:00
pre-commit-ci[bot]
c4dffdad80 [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2025-03-04 08:59:57 +00:00
Ying Hu
2a0c4ccb81 Support vLLM for DBQnA
Support vLLM for DBQnA
1. update Readme
2025-03-04 16:57:49 +08:00
3 changed files with 229 additions and 5 deletions

View File

@@ -51,16 +51,20 @@ Since the `compose.yaml` will consume some environment variables, you need to se
export host_ip=$(hostname -I | awk '{print $1}')
# Example: no_proxy="localhost,127.0.0.1,192.168.1.1"
export no_proxy=${no_proxy},${host_ip}
export no_proxy=${no_proxy},${host_ip},dbqna-xeon-react-ui-server,text2sql-service,vllm-service,tgi-service
# If you are in a proxy environment, also set the proxy-related environment variables:
export http_proxy=${http_proxy}
export https_proxy=${https_proxy}
# Set other required variables
#TGI Service
export TGI_PORT=8008
export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_PORT}
#vLLM Sercice
export LLM_ENDPOINT_PORT=8008
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export POSTGRES_USER=postgres
@@ -89,6 +93,13 @@ cd GenAIExamples/DBQnA/docker_compose/intel/cpu/xeon
docker compose up -d
```
or use vLLM service
```bash
cd GenAIExamples/DBQnA/docker_compose/intel/cpu/xeon
docker compose -f compose_vllm.yaml up -d
```
#### 2.2.2 Alternatively we can start the microservices by running individual docker services
**NOTE:** Make sure all the individual docker services are down before starting them.
@@ -108,7 +119,7 @@ docker run --name test-text2sql-postgres --ipc=host -e POSTGRES_USER=${POSTGRES_
```bash
docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $model
docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model
```
- Start Text-to-SQL Service
@@ -127,7 +138,9 @@ docker run -d --name="test-dbqna-react-ui-server" --ipc=host -p 5174:80 -e no_pr
## 🚀 Validate Microservices
### 3.1 TGI Service
### 3.1 TGI Service Or vllm Sercice
TGI Service
```bash
@@ -137,6 +150,13 @@ curl http://${host_ip}:$TGI_PORT/generate \
-H 'Content-Type: application/json'
```
vllm Sercice
````bash
curl http://${host_ip}:8008/v1/chat/completions \
-X POST \
-d '{"model":"mistralai/Mistral-7B-Instruct-v0.3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
-H 'Content-Type: application/json'
### 3.2 Postgres Microservice
Once Text-to-SQL microservice is started, user can use below command
@@ -147,7 +167,7 @@ Once Text-to-SQL microservice is started, user can use below command
curl --location http://${host_ip}:9090/v1/postgres/health \
--header 'Content-Type: application/json' \
--data '{"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${host_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}'
```
````
#### 3.2.2 Invoke the microservice.

View File

@@ -0,0 +1,67 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
vllm-service:
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
container_name: vllm-service
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "${DATA_PATH:-./data}:/data"
shm_size: 128g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL_ID: ${LLM_MODEL_ID}
VLLM_TORCH_PROFILER_DIR: "${VLLM_TORCH_PROFILER_DIR:-/mnt}"
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
postgres:
image: postgres:latest
container_name: postgres-container
restart: always
environment:
- POSTGRES_USER=${POSTGRES_USER}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
- POSTGRES_DB=${POSTGRES_DB}
ports:
- '5442:5432'
volumes:
- ./chinook.sql:/docker-entrypoint-initdb.d/chinook.sql
text2sql-service:
image: ${REGISTRY:-opea}/text2sql:${TAG:-latest}
container_name: text2sql-service
ports:
- "9090:8080"
environment:
- TGI_LLM_ENDPOINT=${LLM_ENDPOINT}
dbqna-xeon-react-ui-server:
image: ${REGISTRY:-opea}/text2sql-react-ui:${TAG:-latest}
container_name: dbqna-xeon-react-ui-server
depends_on:
- text2sql-service
ports:
- "5174:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -0,0 +1,137 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -xe
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH
git clone https://github.com/vllm-project/vllm.git
cd ./vllm/
VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )"
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null
docker build --no-cache -f Dockerfile.cpu -t ${REGISTRY:-opea}/vllm:${TAG:-latest} --shm-size=128g .
if [ $? -ne 0 ]; then
echo "opea/vllm built fail"
exit 1
else
echo "opea/vllm built successful"
fi
cd $WORKPATH/docker_image_build
git clone --single-branch --branch "${opea_branch:-"main"}" https://github.com/opea-project/GenAIComps.git
echo "Build all the images with --no-cache, check docker_image_build.log for details... #vllm-service"
service_list="text2sql text2sql-react-ui"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
}
function start_service() {
cd $WORKPATH/docker_compose/intel/cpu/xeon
export model="mistralai/Mistral-7B-Instruct-v0.3"
export LLM_MODEL_ID=${model}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export POSTGRES_USER=postgres
export POSTGRES_PASSWORD=testpwd
export POSTGRES_DB=chinook
export TEXT2SQL_PORT=9090
export LLM_ENDPOINT_PORT=8008
export LLM_ENDPOINT="http://${ip_address}:${LLM_ENDPOINT_PORT}"
# Start Docker Containers
docker compose -f compose_vllm.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
# check whether vLLM is fully ready.
n=0
until [[ "$n" -ge 100 ]]; do
docker logs vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1
if grep -q complete ${LOG_PATH}/vllm_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
}
function validate_microservice() {
result=$(http_proxy="" curl --connect-timeout 5 --max-time 120000 http://${ip_address}:$TEXT2SQL_PORT/v1/text2sql\
-X POST \
-d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${ip_address}'", "port": "5442", "database": "'${POSTGRES_DB}'" }}' \
-H 'Content-Type: application/json')
if [[ $result == *"output"* ]]; then
echo $result
echo "Result correct."
else
echo "Result wrong. Received was $result"
docker logs text2sql-service > ${LOG_PATH}/text2sql.log
docker logs tgi-service > ${LOG_PATH}/tgi.log
exit 1
fi
}
function validate_frontend() {
echo "[ TEST INFO ]: --------- frontend test started ---------"
cd $WORKPATH/ui/react
local conda_env_name="OPEA_e2e"
export PATH=${HOME}/miniforge3/bin/:$PATH
if conda info --envs | grep -q "$conda_env_name"; then
echo "$conda_env_name exist!"
else
conda create -n ${conda_env_name} python=3.12 -y
fi
source activate ${conda_env_name}
echo "[ TEST INFO ]: --------- conda env activated ---------"
conda install -c conda-forge nodejs=22.6.0 -y
npm install && npm ci
node -v && npm -v && pip list
exit_status=0
npm run test || exit_status=$?
if [ $exit_status -ne 0 ]; then
echo "[TEST INFO]: ---------frontend test failed---------"
exit $exit_status
else
echo "[TEST INFO]: ---------frontend test passed---------"
fi
}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/cpu/xeon
docker compose stop && docker compose rm -f
}
function main() {
stop_docker
build_docker_images
start_service
validate_microservice
validate_frontend
stop_docker
echo y | docker system prune
}
main