[DocIndexRetriever] enable the without-rerank flavor (#1223)
Signed-off-by: Li Gang <gang.g.li@intel.com> Co-authored-by: ligang <ligang@ligang-nuc9v.bj.intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -62,6 +62,17 @@ cd GenAIExamples/DocIndexRetriever/intel/cpu/xoen/
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Two types of DocRetriever pipeline are supported now: `DocRetriever with/without Rerank`. And the `DocRetriever without Rerank` pipeline (including Embedding and Retrieval) is offered for customers who expect to handle all retrieved documents by LLM, and require high performance of DocRetriever.
|
||||
In that case, start Docker Containers with compose_without_rerank.yaml
|
||||
|
||||
```bash
|
||||
export host_ip="YOUR IP ADDR"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
cd GenAIExamples/DocIndexRetriever/intel/cpu/xoen/
|
||||
docker compose -f compose_without_rerank.yaml up -d
|
||||
```
|
||||
|
||||
## 4. Validation
|
||||
|
||||
Add Knowledge Base via HTTP Links:
|
||||
|
||||
@@ -0,0 +1,102 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
redis-vector-db:
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
container_name: redis-vector-db
|
||||
ports:
|
||||
- "6379:6379"
|
||||
- "8001:8001"
|
||||
dataprep-redis-service:
|
||||
image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
|
||||
container_name: dataprep-redis-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
ports:
|
||||
- "6007:6007"
|
||||
- "6008:6008"
|
||||
- "6009:6009"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME:-rag-redis}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-embedding-server
|
||||
ports:
|
||||
- "6006:80"
|
||||
volumes:
|
||||
- "/home/ligang/models:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
embedding:
|
||||
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
|
||||
container_name: embedding-tei-server
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
ports:
|
||||
- "7000:7000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
INDEX_NAME: ${INDEX_NAME:-rag-redis}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
restart: unless-stopped
|
||||
doc-index-retriever-server:
|
||||
image: ${REGISTRY:-opea}/doc-index-retriever:${TAG:-latest}
|
||||
container_name: doc-index-retriever-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- retriever
|
||||
ports:
|
||||
- "8889:8889"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-0.0.0.0}
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding
|
||||
EMBEDDING_SERVICE_PORT: ${EMBEDDING_SERVER_PORT:-6000}
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever
|
||||
LOGFLAG: ${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
command: --without-rerank
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
@@ -1,6 +1,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
from typing import Union
|
||||
@@ -124,8 +125,37 @@ class RetrievalToolService(Gateway):
|
||||
output_datatype=Union[RerankedDoc, LLMParamsDoc],
|
||||
)
|
||||
|
||||
def add_remote_service_without_rerank(self):
|
||||
embedding = MicroService(
|
||||
name="embedding",
|
||||
host=EMBEDDING_SERVICE_HOST_IP,
|
||||
port=EMBEDDING_SERVICE_PORT,
|
||||
endpoint="/v1/embeddings",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
)
|
||||
retriever = MicroService(
|
||||
name="retriever",
|
||||
host=RETRIEVER_SERVICE_HOST_IP,
|
||||
port=RETRIEVER_SERVICE_PORT,
|
||||
endpoint="/v1/retrieval",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RETRIEVER,
|
||||
)
|
||||
|
||||
self.megaservice.add(embedding).add(retriever)
|
||||
self.megaservice.flow_to(embedding, retriever)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--without-rerank", action="store_true")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
chatqna = RetrievalToolService(port=MEGA_SERVICE_PORT)
|
||||
chatqna.add_remote_service()
|
||||
if args.without_rerank:
|
||||
chatqna.add_remote_service_without_rerank()
|
||||
else:
|
||||
chatqna.add_remote_service()
|
||||
chatqna.start()
|
||||
|
||||
147
DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh
Normal file
147
DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh
Normal file
@@ -0,0 +1,147 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||
export REGISTRY=${IMAGE_REPO}
|
||||
export TAG=${IMAGE_TAG}
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
function build_docker_images() {
|
||||
echo "Building Docker Images...."
|
||||
cd $WORKPATH/docker_image_build
|
||||
if [ ! -d "GenAIComps" ] ; then
|
||||
echo "Cloning GenAIComps repository"
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
fi
|
||||
service_list="dataprep-redis embedding-tei retriever-redis doc-index-retriever"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull redis/redis-stack:7.2.0-v9
|
||||
docker images && sleep 1s
|
||||
|
||||
echo "Docker images built!"
|
||||
}
|
||||
|
||||
function start_services() {
|
||||
echo "Starting Docker Services...."
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006"
|
||||
export REDIS_URL="redis://${ip_address}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_without_rerank.yaml up -d
|
||||
sleep 5m
|
||||
echo "Docker services started!"
|
||||
}
|
||||
|
||||
function validate() {
|
||||
local CONTENT="$1"
|
||||
local EXPECTED_RESULT="$2"
|
||||
local SERVICE_NAME="$3"
|
||||
|
||||
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
|
||||
echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT."
|
||||
echo 0
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
|
||||
echo 1
|
||||
fi
|
||||
}
|
||||
|
||||
function validate_megaservice() {
|
||||
echo "===========Ingest data=================="
|
||||
local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F 'link_list=["https://opea.dev/"]')
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-service-xeon")
|
||||
echo "$EXIT_CODE"
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
echo "return value is $EXIT_CODE"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs dataprep-redis-server | tee -a ${LOG_PATH}/dataprep-redis-service-xeon.log
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Curl the Mega Service
|
||||
echo "================Testing retriever service: Text Request ================"
|
||||
cd $WORKPATH/tests
|
||||
local CONTENT=$(http_proxy="" curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
|
||||
"text": "Explain the OPEA project?"
|
||||
}')
|
||||
# local CONTENT=$(python test.py --host_ip ${ip_address} --request_type text)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon")
|
||||
echo "$EXIT_CODE"
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
echo "return value is $EXIT_CODE"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
echo "=============Embedding container log=================="
|
||||
docker logs embedding-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
|
||||
echo "=============Retriever container log=================="
|
||||
docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
|
||||
echo "=============Doc-index-retriever container log=================="
|
||||
docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "================Testing retriever service: ChatCompletion Request================"
|
||||
cd $WORKPATH/tests
|
||||
local CONTENT=$(python test.py --host_ip ${ip_address} --request_type chat_completion)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon")
|
||||
echo "$EXIT_CODE"
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
echo "return value is $EXIT_CODE"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
echo "=============Embedding container log=================="
|
||||
docker logs embedding-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
|
||||
echo "=============Retriever container log=================="
|
||||
docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
|
||||
echo "=============Doc-index-retriever container log=================="
|
||||
docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||
container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
|
||||
for container_name in $container_list; do
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
echo "Stopping container $container_name"
|
||||
if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
|
||||
done
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
stop_docker
|
||||
build_docker_images
|
||||
echo "Dump current docker ps"
|
||||
docker ps
|
||||
start_time=$(date +%s)
|
||||
start_services
|
||||
end_time=$(date +%s)
|
||||
duration=$((end_time-start_time))
|
||||
echo "Mega service start duration is $duration s"
|
||||
validate_megaservice
|
||||
|
||||
stop_docker
|
||||
echo y | docker system prune
|
||||
|
||||
}
|
||||
|
||||
main
|
||||
Reference in New Issue
Block a user