EdgeCraftRAG: Add E2E test cases for EdgeCraftRAG - local LLM and vllm (#1137)
Signed-off-by: Zhang, Rui <rui2.zhang@intel.com> Signed-off-by: Mingyuan Qi <mingyuan.qi@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Mingyuan Qi <mingyuan.qi@intel.com>
This commit is contained in:
53
EdgeCraftRAG/tests/common.sh
Normal file
53
EdgeCraftRAG/tests/common.sh
Normal file
@@ -0,0 +1,53 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
function validate_services() {
|
||||
local URL="$1"
|
||||
local EXPECTED_RESULT="$2"
|
||||
local SERVICE_NAME="$3"
|
||||
local DOCKER_NAME="$4"
|
||||
local INPUT_DATA="$5"
|
||||
|
||||
echo "[ $SERVICE_NAME ] Validating $SERVICE_NAME service..."
|
||||
local RESPONSE=$(curl -s -w "%{http_code}" -o ${LOG_PATH}/${SERVICE_NAME}.log -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
|
||||
while [ ! -f ${LOG_PATH}/${SERVICE_NAME}.log ]; do
|
||||
sleep 1
|
||||
done
|
||||
local HTTP_STATUS="${RESPONSE: -3}"
|
||||
local CONTENT=$(cat ${LOG_PATH}/${SERVICE_NAME}.log)
|
||||
|
||||
if [ "$HTTP_STATUS" -eq 200 ]; then
|
||||
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
|
||||
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
|
||||
echo "[ $SERVICE_NAME ] Content is as expected."
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
|
||||
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
|
||||
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
|
||||
exit 1
|
||||
fi
|
||||
sleep 1s
|
||||
}
|
||||
|
||||
function check_gpu_usage() {
|
||||
echo $date > ${LOG_PATH}/gpu.log
|
||||
pci_address=$(lspci | grep -i '56a0' | awk '{print $1}')
|
||||
gpu_stats=$(sudo xpu-smi stats -d 0000:"$pci_address") #TODO need sudo
|
||||
gpu_utilization=$(echo "$gpu_stats" | grep -i "GPU Utilization" | awk -F'|' '{print $3}' | awk '{print $1}')
|
||||
memory_used=$(echo "$gpu_stats" | grep -i "GPU Memory Used" | awk -F'|' '{print $3}' | awk '{print $1}')
|
||||
memory_util=$(echo "$gpu_stats" | grep -i "GPU Memory Util" | awk -F'|' '{print $3}' | awk '{print $1}')
|
||||
|
||||
echo "GPU Utilization (%): $gpu_utilization" >> ${LOG_PATH}/gpu.log
|
||||
echo "GPU Memory Used (MiB): $memory_used" >> ${LOG_PATH}/gpu.log
|
||||
echo "GPU Memory Util (%): $memory_util" >> ${LOG_PATH}/gpu.log
|
||||
|
||||
if [ "$memory_used" -lt 1024 ]; then
|
||||
echo "GPU Memory Used is less than 1G. Please check."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
3
EdgeCraftRAG/tests/configs/test_data.json
Normal file
3
EdgeCraftRAG/tests/configs/test_data.json
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"text": "A test case for the rag pipeline. The test id is 1234567890. There are several tests in this test case. The first test is for node parser. There are 3 types of node parsers. Their names are Aa, Bb and Cc. The second test is for indexer. The indexer will do the indexing for the given nodes. The last test is for retriever. Retrieving text is based on similarity search."
|
||||
}
|
||||
44
EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json
Normal file
44
EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json
Normal file
@@ -0,0 +1,44 @@
|
||||
{
|
||||
"name": "rag_test_local_llm",
|
||||
"node_parser": {
|
||||
"chunk_size": 400,
|
||||
"chunk_overlap": 48,
|
||||
"parser_type": "simple"
|
||||
},
|
||||
"indexer": {
|
||||
"indexer_type": "faiss_vector",
|
||||
"embedding_model": {
|
||||
"model_id": "BAAI/bge-small-en-v1.5",
|
||||
"model_path": "./models/BAAI/bge-small-en-v1.5",
|
||||
"device": "auto",
|
||||
"weight": "INT4"
|
||||
}
|
||||
},
|
||||
"retriever": {
|
||||
"retriever_type": "vectorsimilarity",
|
||||
"retrieve_topk": 30
|
||||
},
|
||||
"postprocessor": [
|
||||
{
|
||||
"processor_type": "reranker",
|
||||
"top_n": 2,
|
||||
"reranker_model": {
|
||||
"model_id": "BAAI/bge-reranker-large",
|
||||
"model_path": "./models/BAAI/bge-reranker-large",
|
||||
"device": "auto",
|
||||
"weight": "INT4"
|
||||
}
|
||||
}
|
||||
],
|
||||
"generator": {
|
||||
"model": {
|
||||
"model_id": "Qwen/Qwen2-7B-Instruct",
|
||||
"model_path": "./models/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights",
|
||||
"device": "auto",
|
||||
"weight": "INT4"
|
||||
},
|
||||
"prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt",
|
||||
"inference_type": "local"
|
||||
},
|
||||
"active": "True"
|
||||
}
|
||||
44
EdgeCraftRAG/tests/configs/test_pipeline_vllm.json
Normal file
44
EdgeCraftRAG/tests/configs/test_pipeline_vllm.json
Normal file
@@ -0,0 +1,44 @@
|
||||
{
|
||||
"name": "rag_test_local_llm",
|
||||
"node_parser": {
|
||||
"chunk_size": 400,
|
||||
"chunk_overlap": 48,
|
||||
"parser_type": "simple"
|
||||
},
|
||||
"indexer": {
|
||||
"indexer_type": "faiss_vector",
|
||||
"embedding_model": {
|
||||
"model_id": "BAAI/bge-small-en-v1.5",
|
||||
"model_path": "./models/BAAI/bge-small-en-v1.5",
|
||||
"device": "auto",
|
||||
"weight": "INT4"
|
||||
}
|
||||
},
|
||||
"retriever": {
|
||||
"retriever_type": "vectorsimilarity",
|
||||
"retrieve_topk": 30
|
||||
},
|
||||
"postprocessor": [
|
||||
{
|
||||
"processor_type": "reranker",
|
||||
"top_n": 2,
|
||||
"reranker_model": {
|
||||
"model_id": "BAAI/bge-reranker-large",
|
||||
"model_path": "./models/BAAI/bge-reranker-large",
|
||||
"device": "auto",
|
||||
"weight": "INT4"
|
||||
}
|
||||
}
|
||||
],
|
||||
"generator": {
|
||||
"model": {
|
||||
"model_id": "Qwen/Qwen2-7B-Instruct",
|
||||
"model_path": "./models/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights",
|
||||
"device": "auto",
|
||||
"weight": "INT4"
|
||||
},
|
||||
"prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt",
|
||||
"inference_type": "vllm"
|
||||
},
|
||||
"active": "True"
|
||||
}
|
||||
113
EdgeCraftRAG/tests/test_compose_on_arc.sh
Executable file
113
EdgeCraftRAG/tests/test_compose_on_arc.sh
Executable file
@@ -0,0 +1,113 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
source ./common.sh
|
||||
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||
export REGISTRY=${IMAGE_REPO}
|
||||
export TAG=${IMAGE_TAG}
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
HOST_IP=$ip_address
|
||||
|
||||
COMPOSE_FILE="compose.yaml"
|
||||
EC_RAG_SERVICE_PORT=16010
|
||||
#MODEL_PATH="$WORKPATH/models"
|
||||
MODEL_PATH="/home/media/models"
|
||||
HF_ENDPOINT=https://hf-mirror.com
|
||||
|
||||
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="server ui ecrag"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
function start_services() {
|
||||
export MODEL_PATH=${MODEL_PATH}
|
||||
export HOST_IP=${HOST_IP}
|
||||
export LLM_MODEL=${LLM_MODEL}
|
||||
export HF_ENDPOINT=${HF_ENDPOINT}
|
||||
export vLLM_ENDPOINT=${vLLM_ENDPOINT}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
|
||||
cd $WORKPATH/docker_compose/intel/gpu/arc
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f $COMPOSE_FILE up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
sleep 20
|
||||
}
|
||||
|
||||
function validate_rag() {
|
||||
cd $WORKPATH/tests
|
||||
|
||||
# setup pipeline
|
||||
validate_services \
|
||||
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/settings/pipelines" \
|
||||
"active" \
|
||||
"pipeline" \
|
||||
"edgecraftrag-server" \
|
||||
'@configs/test_pipeline_local_llm.json'
|
||||
|
||||
# add data
|
||||
validate_services \
|
||||
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/data" \
|
||||
"Done" \
|
||||
"data" \
|
||||
"edgecraftrag-server" \
|
||||
'@configs/test_data.json'
|
||||
|
||||
# query
|
||||
validate_services \
|
||||
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/chatqna" \
|
||||
"1234567890" \
|
||||
"query" \
|
||||
"vllm-openvino-server" \
|
||||
'{"messages":"What is the test id?"}'
|
||||
}
|
||||
|
||||
function validate_megaservice() {
|
||||
# Curl the Mega Service
|
||||
validate_services \
|
||||
"${HOST_IP}:16011/v1/chatqna" \
|
||||
"1234567890" \
|
||||
"query" \
|
||||
"vllm-openvino-server" \
|
||||
'{"messages":"What is the test id?"}'
|
||||
}
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/gpu/arc
|
||||
docker compose -f $COMPOSE_FILE down
|
||||
}
|
||||
|
||||
|
||||
function main() {
|
||||
mkdir -p $LOG_PATH
|
||||
|
||||
stop_docker
|
||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||
start_services
|
||||
echo "EC_RAG service started" && sleep 1s
|
||||
|
||||
validate_rag
|
||||
validate_megaservice
|
||||
|
||||
stop_docker
|
||||
echo y | docker system prune
|
||||
|
||||
}
|
||||
|
||||
main
|
||||
166
EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh
Executable file
166
EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh
Executable file
@@ -0,0 +1,166 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
source ./common.sh
|
||||
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||
export REGISTRY=${IMAGE_REPO}
|
||||
export TAG=${IMAGE_TAG}
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
HOST_IP=$ip_address
|
||||
|
||||
COMPOSE_FILE="compose_vllm.yaml"
|
||||
EC_RAG_SERVICE_PORT=16010
|
||||
#MODEL_PATH="$WORKPATH/models"
|
||||
MODEL_PATH="/home/media/models"
|
||||
#HF_ENDPOINT=https://hf-mirror.com
|
||||
LLM_MODEL="Qwen/Qwen2-7B-Instruct"
|
||||
VLLM_SERVICE_PORT=8008
|
||||
vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT}"
|
||||
|
||||
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="server ui ecrag"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
echo "Build vllm_openvino image from GenAIComps..."
|
||||
cd $WORKPATH && git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}"
|
||||
cd comps/llms/text-generation/vllm/langchain/dependency
|
||||
bash ./build_docker_vllm_openvino.sh gpu
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
function start_services() {
|
||||
export MODEL_PATH=${MODEL_PATH}
|
||||
export HOST_IP=${HOST_IP}
|
||||
export LLM_MODEL=${LLM_MODEL}
|
||||
export HF_ENDPOINT=${HF_ENDPOINT}
|
||||
export vLLM_ENDPOINT=${vLLM_ENDPOINT}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
|
||||
cd $WORKPATH/docker_compose/intel/gpu/arc
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f $COMPOSE_FILE up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs vllm-openvino-server > ${LOG_PATH}/vllm_service_start.log
|
||||
if grep -q "metrics.py" ${LOG_PATH}/vllm_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
}
|
||||
|
||||
function validate_services() {
|
||||
local URL="$1"
|
||||
local EXPECTED_RESULT="$2"
|
||||
local SERVICE_NAME="$3"
|
||||
local DOCKER_NAME="$4"
|
||||
local INPUT_DATA="$5"
|
||||
|
||||
echo "[ $SERVICE_NAME ] Validating $SERVICE_NAME service..."
|
||||
local RESPONSE=$(curl -s -w "%{http_code}" -o ${LOG_PATH}/${SERVICE_NAME}.log -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
|
||||
while [ ! -f ${LOG_PATH}/${SERVICE_NAME}.log ]; do
|
||||
sleep 1
|
||||
done
|
||||
local HTTP_STATUS="${RESPONSE: -3}"
|
||||
local CONTENT=$(cat ${LOG_PATH}/${SERVICE_NAME}.log)
|
||||
|
||||
if [ "$HTTP_STATUS" -eq 200 ]; then
|
||||
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
|
||||
|
||||
|
||||
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
|
||||
echo "[ $SERVICE_NAME ] Content is as expected."
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
|
||||
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
|
||||
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
|
||||
exit 1
|
||||
fi
|
||||
sleep 1s
|
||||
}
|
||||
|
||||
function validate_rag() {
|
||||
cd $WORKPATH/tests
|
||||
|
||||
# setup pipeline
|
||||
validate_services \
|
||||
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/settings/pipelines" \
|
||||
"active" \
|
||||
"pipeline" \
|
||||
"edgecraftrag-server" \
|
||||
'@configs/test_pipeline_vllm.json'
|
||||
|
||||
# add data
|
||||
validate_services \
|
||||
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/data" \
|
||||
"Done" \
|
||||
"data" \
|
||||
"edgecraftrag-server" \
|
||||
'@configs/test_data.json'
|
||||
|
||||
# query
|
||||
validate_services \
|
||||
"${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/chatqna" \
|
||||
"1234567890" \
|
||||
"query" \
|
||||
"vllm-openvino-server" \
|
||||
'{"messages":"What is the test id?"}'
|
||||
}
|
||||
|
||||
function validate_megaservice() {
|
||||
# Curl the Mega Service
|
||||
validate_services \
|
||||
"${HOST_IP}:16011/v1/chatqna" \
|
||||
"1234567890" \
|
||||
"query" \
|
||||
"vllm-openvino-server" \
|
||||
'{"messages":"What is the test id?"}'
|
||||
}
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/gpu/arc
|
||||
docker compose -f $COMPOSE_FILE down
|
||||
}
|
||||
|
||||
|
||||
function main() {
|
||||
mkdir -p "$LOG_PATH"
|
||||
|
||||
stop_docker
|
||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||
start_time=$(date +%s)
|
||||
start_services
|
||||
end_time=$(date +%s)
|
||||
duration=$((end_time-start_time))
|
||||
echo "EC_RAG service start duration is $duration s" && sleep 1s
|
||||
|
||||
validate_rag
|
||||
validate_megaservice
|
||||
|
||||
stop_docker
|
||||
echo y | docker system prune
|
||||
|
||||
}
|
||||
|
||||
main
|
||||
@@ -9,7 +9,9 @@
|
||||
"indexer_type": "faiss_vector",
|
||||
"embedding_model": {
|
||||
"model_id": "BAAI/bge-small-en-v1.5",
|
||||
"device": "auto"
|
||||
"model_path": "./models/BAAI/bge-small-en-v1.5",
|
||||
"device": "auto",
|
||||
"weight": "INT4"
|
||||
}
|
||||
},
|
||||
"retriever": {
|
||||
@@ -22,14 +24,18 @@
|
||||
"top_n": 2,
|
||||
"reranker_model": {
|
||||
"model_id": "BAAI/bge-reranker-large",
|
||||
"device": "auto"
|
||||
"model_path": "./models/BAAI/bge-reranker-large",
|
||||
"device": "auto",
|
||||
"weight": "INT4"
|
||||
}
|
||||
}
|
||||
],
|
||||
"generator": {
|
||||
"model": {
|
||||
"model_id": "Qwen/Qwen2-7B-Instruct",
|
||||
"device": "cpu"
|
||||
"model_path": "./models/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights",
|
||||
"device": "auto",
|
||||
"weight": "INT4"
|
||||
},
|
||||
"prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt",
|
||||
"inference_type": "local"
|
||||
|
||||
Reference in New Issue
Block a user