From a50e4e6f9f2570ef941faaf8cf3a75149db2f4dd Mon Sep 17 00:00:00 2001 From: Li Gang Date: Thu, 12 Dec 2024 09:34:21 +0800 Subject: [PATCH] [DocIndexRetriever] enable the without-rerank flavor (#1223) Signed-off-by: Li Gang Co-authored-by: ligang Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../docker_compose/intel/cpu/xeon/README.md | 11 ++ .../cpu/xeon/compose_without_rerank.yaml | 102 ++++++++++++ DocIndexRetriever/retrieval_tool.py | 32 +++- .../test_compose_without_rerank_on_xeon.sh | 147 ++++++++++++++++++ 4 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml create mode 100644 DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md index 58354babf..3ad27345c 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md @@ -62,6 +62,17 @@ cd GenAIExamples/DocIndexRetriever/intel/cpu/xoen/ docker compose up -d ``` +Two types of DocRetriever pipeline are supported now: `DocRetriever with/without Rerank`. And the `DocRetriever without Rerank` pipeline (including Embedding and Retrieval) is offered for customers who expect to handle all retrieved documents by LLM, and require high performance of DocRetriever. +In that case, start Docker Containers with compose_without_rerank.yaml + +```bash +export host_ip="YOUR IP ADDR" +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +cd GenAIExamples/DocIndexRetriever/intel/cpu/xoen/ +docker compose -f compose_without_rerank.yaml up -d +``` + ## 4. Validation Add Knowledge Base via HTTP Links: diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml new file mode 100644 index 000000000..986fcb41a --- /dev/null +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml @@ -0,0 +1,102 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + ports: + - "6379:6379" + - "8001:8001" + dataprep-redis-service: + image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + - redis-vector-db + ports: + - "6007:6007" + - "6008:6008" + - "6009:6009" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: redis://redis-vector-db:6379 + REDIS_HOST: redis-vector-db + INDEX_NAME: ${INDEX_NAME:-rag-redis} + TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-server + ports: + - "6006:80" + volumes: + - "/home/ligang/models:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate + embedding: + image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest} + container_name: embedding-tei-server + ports: + - "6000:6000" + ipc: host + depends_on: + - tei-embedding-service + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 + restart: unless-stopped + retriever: + image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} + container_name: retriever-redis-server + depends_on: + - redis-vector-db + ports: + - "7000:7000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: redis://redis-vector-db:6379 + INDEX_NAME: ${INDEX_NAME:-rag-redis} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 + restart: unless-stopped + doc-index-retriever-server: + image: ${REGISTRY:-opea}/doc-index-retriever:${TAG:-latest} + container_name: doc-index-retriever-server + depends_on: + - redis-vector-db + - tei-embedding-service + - embedding + - retriever + ports: + - "8889:8889" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-0.0.0.0} + EMBEDDING_SERVICE_HOST_IP: embedding + EMBEDDING_SERVICE_PORT: ${EMBEDDING_SERVER_PORT:-6000} + RETRIEVER_SERVICE_HOST_IP: retriever + LOGFLAG: ${LOGFLAG} + ipc: host + restart: always + command: --without-rerank + +networks: + default: + driver: bridge diff --git a/DocIndexRetriever/retrieval_tool.py b/DocIndexRetriever/retrieval_tool.py index b902b7a20..9581612a5 100644 --- a/DocIndexRetriever/retrieval_tool.py +++ b/DocIndexRetriever/retrieval_tool.py @@ -1,6 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import argparse import asyncio import os from typing import Union @@ -124,8 +125,37 @@ class RetrievalToolService(Gateway): output_datatype=Union[RerankedDoc, LLMParamsDoc], ) + def add_remote_service_without_rerank(self): + embedding = MicroService( + name="embedding", + host=EMBEDDING_SERVICE_HOST_IP, + port=EMBEDDING_SERVICE_PORT, + endpoint="/v1/embeddings", + use_remote_service=True, + service_type=ServiceType.EMBEDDING, + ) + retriever = MicroService( + name="retriever", + host=RETRIEVER_SERVICE_HOST_IP, + port=RETRIEVER_SERVICE_PORT, + endpoint="/v1/retrieval", + use_remote_service=True, + service_type=ServiceType.RETRIEVER, + ) + + self.megaservice.add(embedding).add(retriever) + self.megaservice.flow_to(embedding, retriever) + if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--without-rerank", action="store_true") + + args = parser.parse_args() + chatqna = RetrievalToolService(port=MEGA_SERVICE_PORT) - chatqna.add_remote_service() + if args.without_rerank: + chatqna.add_remote_service_without_rerank() + else: + chatqna.add_remote_service() chatqna.start() diff --git a/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh b/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh new file mode 100644 index 000000000..0298a8a55 --- /dev/null +++ b/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -e +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + echo "Building Docker Images...." + cd $WORKPATH/docker_image_build + if [ ! -d "GenAIComps" ] ; then + echo "Cloning GenAIComps repository" + git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../ + fi + service_list="dataprep-redis embedding-tei retriever-redis doc-index-retriever" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + docker pull redis/redis-stack:7.2.0-v9 + docker images && sleep 1s + + echo "Docker images built!" +} + +function start_services() { + echo "Starting Docker Services...." + cd $WORKPATH/docker_compose/intel/cpu/xeon + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006" + export REDIS_URL="redis://${ip_address}:6379" + export INDEX_NAME="rag-redis" + export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export MEGA_SERVICE_HOST_IP=${ip_address} + export EMBEDDING_SERVICE_HOST_IP=${ip_address} + export RETRIEVER_SERVICE_HOST_IP=${ip_address} + + # Start Docker Containers + docker compose -f compose_without_rerank.yaml up -d + sleep 5m + echo "Docker services started!" +} + +function validate() { + local CONTENT="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT." + echo 0 + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + echo 1 + fi +} + +function validate_megaservice() { + echo "===========Ingest data==================" + local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep" \ + -H "Content-Type: multipart/form-data" \ + -F 'link_list=["https://opea.dev/"]') + local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-service-xeon") + echo "$EXIT_CODE" + local EXIT_CODE="${EXIT_CODE:0-1}" + echo "return value is $EXIT_CODE" + if [ "$EXIT_CODE" == "1" ]; then + docker logs dataprep-redis-server | tee -a ${LOG_PATH}/dataprep-redis-service-xeon.log + return 1 + fi + + # Curl the Mega Service + echo "================Testing retriever service: Text Request ================" + cd $WORKPATH/tests + local CONTENT=$(http_proxy="" curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{ + "text": "Explain the OPEA project?" + }') + # local CONTENT=$(python test.py --host_ip ${ip_address} --request_type text) + local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon") + echo "$EXIT_CODE" + local EXIT_CODE="${EXIT_CODE:0-1}" + echo "return value is $EXIT_CODE" + if [ "$EXIT_CODE" == "1" ]; then + echo "=============Embedding container log==================" + docker logs embedding-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + echo "=============Retriever container log==================" + docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + echo "=============Doc-index-retriever container log==================" + docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + exit 1 + fi + + echo "================Testing retriever service: ChatCompletion Request================" + cd $WORKPATH/tests + local CONTENT=$(python test.py --host_ip ${ip_address} --request_type chat_completion) + local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon") + echo "$EXIT_CODE" + local EXIT_CODE="${EXIT_CODE:0-1}" + echo "return value is $EXIT_CODE" + if [ "$EXIT_CODE" == "1" ]; then + echo "=============Embedding container log==================" + docker logs embedding-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + echo "=============Retriever container log==================" + docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + echo "=============Doc-index-retriever container log==================" + docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/cpu/xeon + container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2) + for container_name in $container_list; do + cid=$(docker ps -aq --filter "name=$container_name") + echo "Stopping container $container_name" + if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi + done +} + +function main() { + + stop_docker + build_docker_images + echo "Dump current docker ps" + docker ps + start_time=$(date +%s) + start_services + end_time=$(date +%s) + duration=$((end_time-start_time)) + echo "Mega service start duration is $duration s" + validate_megaservice + + stop_docker + echo y | docker system prune + +} + +main