Update DocIndexRetriever Example to allow user passing in retriever/reranker params (#880)

Signed-off-by: minmin-intel <minmin.hou@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-09-26 19:21:54 -07:00
parent bd32b03e3c
commit 62e06a0aff
8 changed files with 188 additions and 12 deletions
--- a/DocIndexRetriever/README.md
+++ b/DocIndexRetriever/README.md
@@ -1,8 +1,22 @@
 # DocRetriever Application

-DocRetriever are the most widely adopted use case for leveraging the different methodologies to match user query against a set of free-text records. DocRetriever is essential to RAG system, which bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. The core of this architecture are vector databases, which are instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity.
+DocRetriever is the most widely adopted use case for leveraging the different methodologies to match user query against a set of free-text records. DocRetriever is essential to RAG system, which bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. The core of this architecture are vector databases, which are instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity.

 ## We provided DocRetriever with different deployment infra

 - [docker xeon version](docker_compose/intel/cpu/xeon/README.md) => minimum endpoints, easy to setup
 - [docker gaudi version](docker_compose/intel/hpu/gaudi/README.md) => with extra tei_gaudi endpoint, faster
+
+## We allow users to set retriever/reranker hyperparams via requests
+
+Example usage:
+
+```python
+url = "http://{host_ip}:{port}/v1/retrievaltool".format(host_ip=host_ip, port=port)
+payload = {
+    "messages": query,
+    "k": 5,  # retriever top k
+    "top_n": 2,  # reranker top n
+}
+response = requests.post(url, json=payload)
+```
--- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md
+++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md
@@ -79,13 +79,26 @@ Retrieval from KnowledgeBase

 ```bash
 curl http://${host_ip}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
-     "text": "Explain the OPEA project?"
+     "messages": "Explain the OPEA project?"
     }'

 # expected output
 {"id":"354e62c703caac8c547b3061433ec5e8","reranked_docs":[{"id":"06d5a5cefc06cf9a9e0b5fa74a9f233c","text":"Close SearchsearchMenu WikiNewsCommunity Daysx-twitter linkedin github searchStreamlining implementation of enterprise-grade Generative AIEfficiently integrate secure, performant, and cost-effective Generative AI workflows into business value.TODAYOPEA..."}],"initial_query":"Explain the OPEA project?"}
 ```

+**Note**: `messages` is the required field. You can also pass in parameters for the retriever and reranker in the request. The parameters that can changed are listed below.
+
+    1. retriever
+    * search_type: str = "similarity"
+    * k: int = 4
+    * distance_threshold: Optional[float] = None
+    * fetch_k: int = 20
+    * lambda_mult: float = 0.5
+    * score_threshold: float = 0.2
+
+    2. reranker
+    * top_n: int = 1
+
 ## 5. Trouble shooting

 1. check all containers are alive
--- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml
@@ -74,13 +74,30 @@ services:
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
    restart: unless-stopped
+  tei-reranking-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-reranking-server
+    ports:
+      - "8808:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
  reranking:
    image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
    container_name: reranking-tei-xeon-server
+    depends_on:
+      - tei-reranking-service
    ports:
      - "8000:8000"
    ipc: host
-    entrypoint: python local_reranking.py
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
--- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md
+++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md
@@ -80,13 +80,26 @@ Retrieval from KnowledgeBase

 ```bash
 curl http://${host_ip}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
-     "text": "Explain the OPEA project?"
+     "messages": "Explain the OPEA project?"
     }'

 # expected output
 {"id":"354e62c703caac8c547b3061433ec5e8","reranked_docs":[{"id":"06d5a5cefc06cf9a9e0b5fa74a9f233c","text":"Close SearchsearchMenu WikiNewsCommunity Daysx-twitter linkedin github searchStreamlining implementation of enterprise-grade Generative AIEfficiently integrate secure, performant, and cost-effective Generative AI workflows into business value.TODAYOPEA..."}],"initial_query":"Explain the OPEA project?"}
 ```

+**Note**: `messages` is the required field. You can also pass in parameters for the retriever and reranker in the request. The parameters that can changed are listed below.
+
+    1. retriever
+    * search_type: str = "similarity"
+    * k: int = 4
+    * distance_threshold: Optional[float] = None
+    * fetch_k: int = 20
+    * lambda_mult: float = 0.5
+    * score_threshold: float = 0.2
+
+    2. reranker
+    * top_n: int = 1
+
 ## 5. Trouble shooting

 1. check all containers are alive
--- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -77,13 +77,30 @@ services:
      REDIS_URL: ${REDIS_URL}
      INDEX_NAME: ${INDEX_NAME}
    restart: unless-stopped
+  tei-reranking-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-reranking-gaudi-server
+    ports:
+      - "8808:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
  reranking:
    image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
    container_name: reranking-tei-gaudi-server
+    depends_on:
+      - tei-reranking-service
    ports:
      - "8000:8000"
    ipc: host
-    entrypoint: python local_reranking.py
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
--- a/DocIndexRetriever/tests/test.py
+++ b/DocIndexRetriever/tests/test.py
@@ -0,0 +1,71 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import argparse
+
+import requests
+
+
+def search_knowledge_base(query: str, url: str, request_type="chat_completion") -> str:
+    """Search the knowledge base for a specific query."""
+    print(url)
+    proxies = {"http": ""}
+    if request_type == "chat_completion":
+        print("Sending chat completion request")
+        payload = {
+            "messages": query,
+            "k": 5,
+            "top_n": 2,
+        }
+    else:
+        print("Sending text request")
+        payload = {
+            "text": query,
+        }
+    response = requests.post(url, json=payload, proxies=proxies)
+    print(response)
+    if "documents" in response.json():
+        docs = response.json()["documents"]
+        context = ""
+        for i, doc in enumerate(docs):
+            if i == 0:
+                context = str(i) + ": " + doc
+            else:
+                context += "\n" + str(i) + ": " + doc
+        # print(context)
+        return context
+    elif "text" in response.json():
+        return response.json()["text"]
+    elif "reranked_docs" in response.json():
+        docs = response.json()["reranked_docs"]
+        context = ""
+        for i, doc in enumerate(docs):
+            if i == 0:
+                context = doc["text"]
+            else:
+                context += "\n" + doc["text"]
+        # print(context)
+        return context
+    else:
+        return "Error parsing response from the knowledge base."
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Index data")
+    parser.add_argument("--host_ip", type=str, default="localhost", help="Host IP")
+    parser.add_argument("--port", type=int, default=8889, help="Port")
+    parser.add_argument("--request_type", type=str, default="chat_completion", help="Test type")
+    args = parser.parse_args()
+    print(args)
+
+    host_ip = args.host_ip
+    port = args.port
+    url = "http://{host_ip}:{port}/v1/retrievaltool".format(host_ip=host_ip, port=port)
+
+    response = search_knowledge_base("OPEA", url, request_type=args.request_type)
+
+    print(response)
+
+
+if __name__ == "__main__":
+    main()
--- a/DocIndexRetriever/tests/test_compose_on_gaudi.sh
+++ b/DocIndexRetriever/tests/test_compose_on_gaudi.sh
@@ -64,7 +64,7 @@ function validate() {
 }

 function validate_megaservice() {
-    echo "Testing DataPrep Service"
+    echo "=========Ingest data=================="
    local CONTENT=$(curl -X POST "http://${ip_address}:6007/v1/dataprep" \
     -H "Content-Type: multipart/form-data" \
     -F 'link_list=["https://opea.dev"]')
@@ -78,7 +78,7 @@ function validate_megaservice() {
    fi

    # Curl the Mega Service
-    echo "Testing retriever service"
+    echo "==============Testing retriever service: Text Request================="
    local CONTENT=$(curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
     "text": "Explain the OPEA project?"
    }')
@@ -93,6 +93,21 @@ function validate_megaservice() {
        docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log
        exit 1
    fi
+
+    echo "==============Testing retriever service: ChatCompletion Request================"
+    cd $WORKPATH/tests
+    local CONTENT=$(python test.py --host_ip ${ip_address} --request_type chat_completion)
+    local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-gaudi")
+    echo "$EXIT_CODE"
+    local EXIT_CODE="${EXIT_CODE:0-1}"
+    echo "return value is $EXIT_CODE"
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs tei-embedding-gaudi-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log
+        docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log
+        docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log
+        docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log
+        exit 1
+    fi
 }

 function stop_docker() {
--- a/DocIndexRetriever/tests/test_compose_on_xeon.sh
+++ b/DocIndexRetriever/tests/test_compose_on_xeon.sh
@@ -63,8 +63,8 @@ function validate() {
 }

 function validate_megaservice() {
-    echo "Testing DataPrep Service"
-    local CONTENT=$(curl -X POST "http://${ip_address}:6007/v1/dataprep" \
+    echo "===========Ingest data=================="
+    local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep" \
     -H "Content-Type: multipart/form-data" \
     -F 'link_list=["https://opea.dev"]')
    local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-service-xeon")
@@ -77,16 +77,32 @@ function validate_megaservice() {
    fi

    # Curl the Mega Service
-    echo "Testing retriever service"
+    echo "================Testing retriever service: Default params================"
+
    local CONTENT=$(curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
-     "text": "Explain the OPEA project?"
+     "messages": "Explain the OPEA project?"
    }')
    local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon")
    echo "$EXIT_CODE"
    local EXIT_CODE="${EXIT_CODE:0-1}"
    echo "return value is $EXIT_CODE"
    if [ "$EXIT_CODE" == "1" ]; then
-        docker logs tei-embedding-xeon-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
+        docker logs tei-embedding-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
+        docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
+        docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
+        docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
+        exit 1
+    fi
+
+    echo "================Testing retriever service: ChatCompletion Request================"
+    cd $WORKPATH/tests
+    local CONTENT=$(python test.py --host_ip ${ip_address} --request_type chat_completion)
+    local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon")
+    echo "$EXIT_CODE"
+    local EXIT_CODE="${EXIT_CODE:0-1}"
+    echo "return value is $EXIT_CODE"
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs tei-embedding-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
        docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
        docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
        docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log