Update test_compose_vllm_on_xeon.sh

move the vllm-service
[pre-commit.ci] auto fixes from pre-commit.com hooks
2025-03-04 19:11:12 +08:00 · 2025-03-04 09:06:37 +00:00 · 2025-03-04 17:05:30 +08:00 · 2025-03-04 17:01:41 +08:00 · 2025-03-04 08:59:57 +00:00 · 2025-03-04 16:57:49 +08:00
3 changed files with 229 additions and 5 deletions
--- a/DBQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/DBQnA/docker_compose/intel/cpu/xeon/README.md
@@ -51,16 +51,20 @@ Since the `compose.yaml` will consume some environment variables, you need to se
 export host_ip=$(hostname -I | awk '{print $1}')
 # Example: no_proxy="localhost,127.0.0.1,192.168.1.1"
-export no_proxy=${no_proxy},${host_ip}
+export no_proxy=${no_proxy},${host_ip},dbqna-xeon-react-ui-server,text2sql-service,vllm-service,tgi-service
 # If you are in a proxy environment, also set the proxy-related environment variables:
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
 # Set other required variables
-
+#TGI Service
 export TGI_PORT=8008
 export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_PORT}
 #vLLM Sercice
 export LLM_ENDPOINT_PORT=8008
 export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
 export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
 export POSTGRES_USER=postgres
@@ -89,6 +93,13 @@ cd GenAIExamples/DBQnA/docker_compose/intel/cpu/xeon
 docker compose up -d
 ```
 or use vLLM service
 ```bash
 cd GenAIExamples/DBQnA/docker_compose/intel/cpu/xeon
 docker compose -f compose_vllm.yaml up -d
 ```
 #### 2.2.2 Alternatively we can start the microservices by running individual docker services
 **NOTE:** Make sure all the individual docker services are down before starting them.
@@ -108,7 +119,7 @@ docker run --name test-text2sql-postgres --ipc=host -e POSTGRES_USER=${POSTGRES_
 ```bash
-docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $model
+docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model
 ```
 - Start Text-to-SQL Service
@@ -127,7 +138,9 @@ docker run -d --name="test-dbqna-react-ui-server" --ipc=host -p 5174:80 -e no_pr
 ## 🚀 Validate Microservices
-### 3.1 TGI Service
+### 3.1 TGI Service Or vllm Sercice
 TGI Service
 ```bash
@@ -137,6 +150,13 @@ curl http://${host_ip}:$TGI_PORT/generate \
    -H 'Content-Type: application/json'
 ```
 vllm Sercice
 ````bash
 curl http://${host_ip}:8008/v1/chat/completions \
  -X POST \
  -d '{"model":"mistralai/Mistral-7B-Instruct-v0.3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
  -H 'Content-Type: application/json'
 ### 3.2 Postgres Microservice
 Once Text-to-SQL microservice is started, user can use below command
@@ -147,7 +167,7 @@ Once Text-to-SQL microservice is started, user can use below command
 curl --location http://${host_ip}:9090/v1/postgres/health \
    --header 'Content-Type: application/json' \
    --data '{"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${host_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}'
-```
+````
 #### 3.2.2 Invoke the microservice.
--- a/DBQnA/docker_compose/intel/cpu/xeon/compose_vllm.yaml
+++ b/DBQnA/docker_compose/intel/cpu/xeon/compose_vllm.yaml
@@ -0,0 +1,67 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 services:
  vllm-service:
    image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
    container_name: vllm-service
    ports:
      - ${LLM_ENDPOINT_PORT:-8008}:80
    volumes:
      - "${DATA_PATH:-./data}:/data"
    shm_size: 128g
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "${VLLM_TORCH_PROFILER_DIR:-/mnt}"
      host_ip: ${host_ip}
      LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
      VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
      interval: 10s
      timeout: 10s
      retries: 100
    command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
  postgres:
    image: postgres:latest
    container_name: postgres-container
    restart: always
    environment:
      - POSTGRES_USER=${POSTGRES_USER}
      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
      - POSTGRES_DB=${POSTGRES_DB}
    ports:
      - '5442:5432'
    volumes:
      - ./chinook.sql:/docker-entrypoint-initdb.d/chinook.sql
  text2sql-service:
    image: ${REGISTRY:-opea}/text2sql:${TAG:-latest}
    container_name: text2sql-service
    ports:
      - "9090:8080"
    environment:
      - TGI_LLM_ENDPOINT=${LLM_ENDPOINT}
  dbqna-xeon-react-ui-server:
    image: ${REGISTRY:-opea}/text2sql-react-ui:${TAG:-latest}
    container_name: dbqna-xeon-react-ui-server
    depends_on:
      - text2sql-service
    ports:
      - "5174:80"
    environment:
      - no_proxy=${no_proxy}
      - https_proxy=${https_proxy}
      - http_proxy=${http_proxy}
    ipc: host
    restart: always
 networks:
  default:
    driver: bridge
--- a/DBQnA/tests/test_compose_vllm_on_xeon.sh
+++ b/DBQnA/tests/test_compose_vllm_on_xeon.sh
@@ -0,0 +1,137 @@
 #!/bin/bash
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 set -xe
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
 echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
 export REGISTRY=${IMAGE_REPO}
 export TAG=${IMAGE_TAG}
 export MODEL_CACHE=${model_cache:-"./data"}
 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
    cd $WORKPATH
    git clone https://github.com/vllm-project/vllm.git
    cd ./vllm/
    VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )"
    echo "Check out vLLM tag ${VLLM_VER}"
    git checkout ${VLLM_VER} &> /dev/null
    docker build --no-cache -f Dockerfile.cpu -t ${REGISTRY:-opea}/vllm:${TAG:-latest} --shm-size=128g .
    if [ $? -ne 0 ]; then
        echo "opea/vllm built fail"
        exit 1
    else
        echo "opea/vllm built successful"
    fi
    cd $WORKPATH/docker_image_build
    git clone --single-branch --branch "${opea_branch:-"main"}" https://github.com/opea-project/GenAIComps.git
    echo "Build all the images with --no-cache, check docker_image_build.log for details... #vllm-service"
    service_list="text2sql text2sql-react-ui"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 	}
 function start_service() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon
    export model="mistralai/Mistral-7B-Instruct-v0.3"
    export LLM_MODEL_ID=${model}
    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
    export POSTGRES_USER=postgres
    export POSTGRES_PASSWORD=testpwd
    export POSTGRES_DB=chinook
    export TEXT2SQL_PORT=9090
 	  export LLM_ENDPOINT_PORT=8008
    export LLM_ENDPOINT="http://${ip_address}:${LLM_ENDPOINT_PORT}"
    # Start Docker Containers
    docker compose -f compose_vllm.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
    # check whether vLLM is fully ready.
    n=0
    until [[ "$n" -ge 100 ]]; do
        docker logs vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1
        if grep -q complete ${LOG_PATH}/vllm_service_start.log; then
            break
        fi
        sleep 5s
        n=$((n+1))
    done
 }
 function validate_microservice() {
    result=$(http_proxy="" curl --connect-timeout 5 --max-time 120000 http://${ip_address}:$TEXT2SQL_PORT/v1/text2sql\
        -X POST \
        -d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${ip_address}'", "port": "5442", "database": "'${POSTGRES_DB}'" }}' \
        -H 'Content-Type: application/json')
    if [[ $result == *"output"* ]]; then
        echo $result
        echo "Result correct."
    else
        echo "Result wrong. Received was $result"
        docker logs text2sql-service > ${LOG_PATH}/text2sql.log
        docker logs tgi-service > ${LOG_PATH}/tgi.log
        exit 1
    fi
 }
 function validate_frontend() {
    echo "[ TEST INFO ]: --------- frontend test started ---------"
    cd $WORKPATH/ui/react
    local conda_env_name="OPEA_e2e"
    export PATH=${HOME}/miniforge3/bin/:$PATH
    if conda info --envs | grep -q "$conda_env_name"; then
        echo "$conda_env_name exist!"
    else
        conda create -n ${conda_env_name} python=3.12 -y
    fi
    source activate ${conda_env_name}
    echo "[ TEST INFO ]: --------- conda env activated ---------"
    conda install -c conda-forge nodejs=22.6.0 -y
    npm install && npm ci
    node -v && npm -v && pip list
    exit_status=0
    npm run test || exit_status=$?
    if [ $exit_status -ne 0 ]; then
        echo "[TEST INFO]: ---------frontend test failed---------"
        exit $exit_status
    else
        echo "[TEST INFO]: ---------frontend test passed---------"
    fi
 }
 function stop_docker() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon
    docker compose stop && docker compose rm -f
 }
 function main() {
    stop_docker
    build_docker_images
    start_service
    validate_microservice
    validate_frontend
    stop_docker
    echo y | docker system prune
 }
 main
Author	SHA1	Message	Date
Ying Hu	b10456e42a	Update test_compose_vllm_on_xeon.sh move the vllm-service	2025-03-04 19:11:12 +08:00
pre-commit-ci[bot]	3fb6cb590c	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2025-03-04 09:06:37 +00:00
Ying Hu	75ee579021	Create test_compose_vllm_on_xeon.sh for vLLM Create test_compose_vllm_on_xeon.sh for vLLM	2025-03-04 17:05:30 +08:00
Ying Hu	768f1a45e2	Create compose_vllm.yaml for vLLM Create compose_vllm.yaml for vLLM	2025-03-04 17:01:41 +08:00
pre-commit-ci[bot]	c4dffdad80	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2025-03-04 08:59:57 +00:00
Ying Hu	2a0c4ccb81	Support vLLM for DBQnA Support vLLM for DBQnA 1. update Readme	2025-03-04 16:57:49 +08:00