EdgeCraftRAG: Add E2E test cases for EdgeCraftRAG - local LLM and vllm (#1137)

Signed-off-by: Zhang, Rui <rui2.zhang@intel.com>
Signed-off-by: Mingyuan Qi <mingyuan.qi@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Mingyuan Qi <mingyuan.qi@intel.com>
This commit is contained in:
rui2zhang
2024-11-17 18:22:32 +08:00
committed by GitHub
parent cbe952ec5e
commit 7949045176
8 changed files with 524 additions and 3 deletions

View File

@@ -0,0 +1,53 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
function validate_services() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local INPUT_DATA="$5"
echo "[ $SERVICE_NAME ] Validating $SERVICE_NAME service..."
local RESPONSE=$(curl -s -w "%{http_code}" -o ${LOG_PATH}/${SERVICE_NAME}.log -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
while [ ! -f ${LOG_PATH}/${SERVICE_NAME}.log ]; do
sleep 1
done
local HTTP_STATUS="${RESPONSE: -3}"
local CONTENT=$(cat ${LOG_PATH}/${SERVICE_NAME}.log)
if [ "$HTTP_STATUS" -eq 200 ]; then
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
echo "[ $SERVICE_NAME ] Content is as expected."
else
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
else
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
sleep 1s
}
function check_gpu_usage() {
echo $date > ${LOG_PATH}/gpu.log
pci_address=$(lspci | grep -i '56a0' | awk '{print $1}')
gpu_stats=$(sudo xpu-smi stats -d 0000:"$pci_address") #TODO need sudo
gpu_utilization=$(echo "$gpu_stats" | grep -i "GPU Utilization" | awk -F'|' '{print $3}' | awk '{print $1}')
memory_used=$(echo "$gpu_stats" | grep -i "GPU Memory Used" | awk -F'|' '{print $3}' | awk '{print $1}')
memory_util=$(echo "$gpu_stats" | grep -i "GPU Memory Util" | awk -F'|' '{print $3}' | awk '{print $1}')
echo "GPU Utilization (%): $gpu_utilization" >> ${LOG_PATH}/gpu.log
echo "GPU Memory Used (MiB): $memory_used" >> ${LOG_PATH}/gpu.log
echo "GPU Memory Util (%): $memory_util" >> ${LOG_PATH}/gpu.log
if [ "$memory_used" -lt 1024 ]; then
echo "GPU Memory Used is less than 1G. Please check."
exit 1
fi
}

View File

@@ -0,0 +1,3 @@
{
"text": "A test case for the rag pipeline. The test id is 1234567890. There are several tests in this test case. The first test is for node parser. There are 3 types of node parsers. Their names are Aa, Bb and Cc. The second test is for indexer. The indexer will do the indexing for the given nodes. The last test is for retriever. Retrieving text is based on similarity search."
}

View File

@@ -0,0 +1,44 @@
{
"name": "rag_test_local_llm",
"node_parser": {
"chunk_size": 400,
"chunk_overlap": 48,
"parser_type": "simple"
},
"indexer": {
"indexer_type": "faiss_vector",
"embedding_model": {
"model_id": "BAAI/bge-small-en-v1.5",
"model_path": "./models/BAAI/bge-small-en-v1.5",
"device": "auto",
"weight": "INT4"
}
},
"retriever": {
"retriever_type": "vectorsimilarity",
"retrieve_topk": 30
},
"postprocessor": [
{
"processor_type": "reranker",
"top_n": 2,
"reranker_model": {
"model_id": "BAAI/bge-reranker-large",
"model_path": "./models/BAAI/bge-reranker-large",
"device": "auto",
"weight": "INT4"
}
}
],
"generator": {
"model": {
"model_id": "Qwen/Qwen2-7B-Instruct",
"model_path": "./models/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights",
"device": "auto",
"weight": "INT4"
},
"prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt",
"inference_type": "local"
},
"active": "True"
}

View File

@@ -0,0 +1,44 @@
{
"name": "rag_test_local_llm",
"node_parser": {
"chunk_size": 400,
"chunk_overlap": 48,
"parser_type": "simple"
},
"indexer": {
"indexer_type": "faiss_vector",
"embedding_model": {
"model_id": "BAAI/bge-small-en-v1.5",
"model_path": "./models/BAAI/bge-small-en-v1.5",
"device": "auto",
"weight": "INT4"
}
},
"retriever": {
"retriever_type": "vectorsimilarity",
"retrieve_topk": 30
},
"postprocessor": [
{
"processor_type": "reranker",
"top_n": 2,
"reranker_model": {
"model_id": "BAAI/bge-reranker-large",
"model_path": "./models/BAAI/bge-reranker-large",
"device": "auto",
"weight": "INT4"
}
}
],
"generator": {
"model": {
"model_id": "Qwen/Qwen2-7B-Instruct",
"model_path": "./models/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights",
"device": "auto",
"weight": "INT4"
},
"prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt",
"inference_type": "vllm"
},
"active": "True"
}

View File

@@ -0,0 +1,113 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -e
source ./common.sh
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
HOST_IP=$ip_address
COMPOSE_FILE="compose.yaml"
EC_RAG_SERVICE_PORT=16010
#MODEL_PATH="$WORKPATH/models"
MODEL_PATH="/home/media/models"
HF_ENDPOINT=https://hf-mirror.com
function build_docker_images() {
cd $WORKPATH/docker_image_build
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="server ui ecrag"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker images && sleep 1s
}
function start_services() {
export MODEL_PATH=${MODEL_PATH}
export HOST_IP=${HOST_IP}
export LLM_MODEL=${LLM_MODEL}
export HF_ENDPOINT=${HF_ENDPOINT}
export vLLM_ENDPOINT=${vLLM_ENDPOINT}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export no_proxy="localhost, 127.0.0.1, 192.168.1.1"
cd $WORKPATH/docker_compose/intel/gpu/arc
# Start Docker Containers
docker compose -f $COMPOSE_FILE up -d > ${LOG_PATH}/start_services_with_compose.log
sleep 20
}
function validate_rag() {
cd $WORKPATH/tests
# setup pipeline
validate_services \
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/settings/pipelines" \
"active" \
"pipeline" \
"edgecraftrag-server" \
'@configs/test_pipeline_local_llm.json'
# add data
validate_services \
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/data" \
"Done" \
"data" \
"edgecraftrag-server" \
'@configs/test_data.json'
# query
validate_services \
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/chatqna" \
"1234567890" \
"query" \
"vllm-openvino-server" \
'{"messages":"What is the test id?"}'
}
function validate_megaservice() {
# Curl the Mega Service
validate_services \
"${HOST_IP}:16011/v1/chatqna" \
"1234567890" \
"query" \
"vllm-openvino-server" \
'{"messages":"What is the test id?"}'
}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/gpu/arc
docker compose -f $COMPOSE_FILE down
}
function main() {
mkdir -p $LOG_PATH
stop_docker
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_services
echo "EC_RAG service started" && sleep 1s
validate_rag
validate_megaservice
stop_docker
echo y | docker system prune
}
main

View File

@@ -0,0 +1,166 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -e
source ./common.sh
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
HOST_IP=$ip_address
COMPOSE_FILE="compose_vllm.yaml"
EC_RAG_SERVICE_PORT=16010
#MODEL_PATH="$WORKPATH/models"
MODEL_PATH="/home/media/models"
#HF_ENDPOINT=https://hf-mirror.com
LLM_MODEL="Qwen/Qwen2-7B-Instruct"
VLLM_SERVICE_PORT=8008
vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT}"
function build_docker_images() {
cd $WORKPATH/docker_image_build
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="server ui ecrag"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
echo "Build vllm_openvino image from GenAIComps..."
cd $WORKPATH && git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}"
cd comps/llms/text-generation/vllm/langchain/dependency
bash ./build_docker_vllm_openvino.sh gpu
docker images && sleep 1s
}
function start_services() {
export MODEL_PATH=${MODEL_PATH}
export HOST_IP=${HOST_IP}
export LLM_MODEL=${LLM_MODEL}
export HF_ENDPOINT=${HF_ENDPOINT}
export vLLM_ENDPOINT=${vLLM_ENDPOINT}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export no_proxy="localhost, 127.0.0.1, 192.168.1.1"
cd $WORKPATH/docker_compose/intel/gpu/arc
# Start Docker Containers
docker compose -f $COMPOSE_FILE up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 100 ]]; do
docker logs vllm-openvino-server > ${LOG_PATH}/vllm_service_start.log
if grep -q "metrics.py" ${LOG_PATH}/vllm_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
}
function validate_services() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local INPUT_DATA="$5"
echo "[ $SERVICE_NAME ] Validating $SERVICE_NAME service..."
local RESPONSE=$(curl -s -w "%{http_code}" -o ${LOG_PATH}/${SERVICE_NAME}.log -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
while [ ! -f ${LOG_PATH}/${SERVICE_NAME}.log ]; do
sleep 1
done
local HTTP_STATUS="${RESPONSE: -3}"
local CONTENT=$(cat ${LOG_PATH}/${SERVICE_NAME}.log)
if [ "$HTTP_STATUS" -eq 200 ]; then
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
echo "[ $SERVICE_NAME ] Content is as expected."
else
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
else
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
sleep 1s
}
function validate_rag() {
cd $WORKPATH/tests
# setup pipeline
validate_services \
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/settings/pipelines" \
"active" \
"pipeline" \
"edgecraftrag-server" \
'@configs/test_pipeline_vllm.json'
# add data
validate_services \
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/data" \
"Done" \
"data" \
"edgecraftrag-server" \
'@configs/test_data.json'
# query
validate_services \
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/chatqna" \
"1234567890" \
"query" \
"vllm-openvino-server" \
'{"messages":"What is the test id?"}'
}
function validate_megaservice() {
# Curl the Mega Service
validate_services \
"${HOST_IP}:16011/v1/chatqna" \
"1234567890" \
"query" \
"vllm-openvino-server" \
'{"messages":"What is the test id?"}'
}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/gpu/arc
docker compose -f $COMPOSE_FILE down
}
function main() {
mkdir -p "$LOG_PATH"
stop_docker
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_time=$(date +%s)
start_services
end_time=$(date +%s)
duration=$((end_time-start_time))
echo "EC_RAG service start duration is $duration s" && sleep 1s
validate_rag
validate_megaservice
stop_docker
echo y | docker system prune
}
main

View File

@@ -9,7 +9,9 @@
"indexer_type": "faiss_vector",
"embedding_model": {
"model_id": "BAAI/bge-small-en-v1.5",
"device": "auto"
"model_path": "./models/BAAI/bge-small-en-v1.5",
"device": "auto",
"weight": "INT4"
}
},
"retriever": {
@@ -22,14 +24,18 @@
"top_n": 2,
"reranker_model": {
"model_id": "BAAI/bge-reranker-large",
"device": "auto"
"model_path": "./models/BAAI/bge-reranker-large",
"device": "auto",
"weight": "INT4"
}
}
],
"generator": {
"model": {
"model_id": "Qwen/Qwen2-7B-Instruct",
"device": "cpu"
"model_path": "./models/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights",
"device": "auto",
"weight": "INT4"
},
"prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt",
"inference_type": "local"