From 2a0c4ccb81c64c0db65eda872f4c1144f80d444f Mon Sep 17 00:00:00 2001 From: Ying Hu Date: Tue, 4 Mar 2025 16:57:49 +0800 Subject: [PATCH] Support vLLM for DBQnA Support vLLM for DBQnA 1. update Readme --- DBQnA/docker_compose/intel/cpu/xeon/README.md | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/DBQnA/docker_compose/intel/cpu/xeon/README.md b/DBQnA/docker_compose/intel/cpu/xeon/README.md index 26b46ec4b..8cb0ba607 100644 --- a/DBQnA/docker_compose/intel/cpu/xeon/README.md +++ b/DBQnA/docker_compose/intel/cpu/xeon/README.md @@ -51,16 +51,20 @@ Since the `compose.yaml` will consume some environment variables, you need to se export host_ip=$(hostname -I | awk '{print $1}') # Example: no_proxy="localhost,127.0.0.1,192.168.1.1" -export no_proxy=${no_proxy},${host_ip} +export no_proxy=${no_proxy},${host_ip},dbqna-xeon-react-ui-server,text2sql-service,vllm-service,tgi-service # If you are in a proxy environment, also set the proxy-related environment variables: export http_proxy=${http_proxy} export https_proxy=${https_proxy} # Set other required variables - +#TGI Service export TGI_PORT=8008 export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_PORT} +#vLLM Sercice +export LLM_ENDPOINT_PORT=8008 +export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" + export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" export POSTGRES_USER=postgres @@ -75,7 +79,6 @@ edit the file set_env.sh to set those environment variables, ```bash source set_env.sh ``` - Note: Please replace with `host_ip` with your external IP address, do not use localhost. ### 2.2 Start Microservice Docker Containers @@ -88,7 +91,11 @@ There are 2 options to start the microservice cd GenAIExamples/DBQnA/docker_compose/intel/cpu/xeon docker compose up -d ``` - +or use vLLM service +```bash +cd GenAIExamples/DBQnA/docker_compose/intel/cpu/xeon +docker compose -f compose_vllm.yaml up -d +``` #### 2.2.2 Alternatively we can start the microservices by running individual docker services **NOTE:** Make sure all the individual docker services are down before starting them. @@ -108,7 +115,7 @@ docker run --name test-text2sql-postgres --ipc=host -e POSTGRES_USER=${POSTGRES_ ```bash -docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $model +docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model ``` - Start Text-to-SQL Service @@ -127,8 +134,8 @@ docker run -d --name="test-dbqna-react-ui-server" --ipc=host -p 5174:80 -e no_pr ## 🚀 Validate Microservices -### 3.1 TGI Service - +### 3.1 TGI Service Or vllm Sercice +TGI Service ```bash curl http://${host_ip}:$TGI_PORT/generate \ @@ -136,7 +143,13 @@ curl http://${host_ip}:$TGI_PORT/generate \ -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ -H 'Content-Type: application/json' ``` +vllm Sercice +```bash +curl http://${host_ip}:8008/v1/chat/completions \ + -X POST \ + -d '{"model":"mistralai/Mistral-7B-Instruct-v0.3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ + -H 'Content-Type: application/json' ### 3.2 Postgres Microservice Once Text-to-SQL microservice is started, user can use below command