Add ChatQnA Xeon workflow (#127)

Signed-off-by: chensuyue <suyue.chen@intel.com>
2024-05-13 23:05:34 +08:00
parent 7748f64e21
commit 2887cad65f
5 changed files with 222 additions and 16 deletions
--- a/.github/workflows/ChatQnA_gaudi.yml
+++ b/.github/workflows/ChatQnA_gaudi.yml
@@ -4,12 +4,13 @@
 name: ChatQnA E2E test on Gaudi

 on:
-  pull_request_target:
+  pull_request:
    branches: [main]
    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
    paths:
      - ChatQnA/microservice/gaudi/**
      - "!**.md"
+      - "!**/ui/**"
      - .github/workflows/ChatQnA_gaudi.yml
  workflow_dispatch:

--- a/.github/workflows/ChatQnA_xeon.yml
+++ b/.github/workflows/ChatQnA_xeon.yml
@@ -0,0 +1,46 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: ChatQnA E2E test on Xeon
+
+on:
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
+    paths:
+      - ChatQnA/microservice/xeon/**
+      - "!**.md"
+      - "!**/ui/**"
+      - .github/workflows/ChatQnA_xeon.yml
+  workflow_dispatch:
+
+# If there is a new commit, the previous jobs will be canceled
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  ChatQnA:
+    runs-on: aise-cluster
+    steps:
+      - name: Clean Up Working Directory
+        run: sudo rm -rf ${{github.workspace}}/*
+
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+        with:
+          ref: "refs/pull/${{ github.event.number }}/merge"
+
+      - name: Run Test
+        env:
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          AISE_CLUSTER_01_2_IP: ${{ secrets.AISE_CLUSTER_01_2_IP }}
+        run: |
+          cd ${{ github.workspace }}/ChatQnA/tests
+          bash test_chatqna_on_xeon.sh
+
+      - name: Publish pipeline artifact
+        if: ${{ !cancelled() }}
+        uses: actions/upload-artifact@v4
+        with:
+          path: ${{ github.workspace }}/ChatQnA/tests/*.log
--- a/ChatQnA/microservice/xeon/README.md
+++ b/ChatQnA/microservice/xeon/README.md
@@ -64,11 +64,11 @@ export https_proxy=${your_http_proxy}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-large"
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6006"
-export TEI_RERANKING_ENDPOINT="http://${your_ip}:8808"
-export TGI_LLM_ENDPOINT="http://${your_ip}:9009"
-export REDIS_URL="redis://${your_ip}:6379"
-export INDEX_NAME=${your_index_name}
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 ```

@@ -83,7 +83,7 @@ docker compose -f docker_compose.yaml up -d
 1. TEI Embedding Service

 ```bash
-curl ${your_ip}:6006/embed \
+curl ${host_ip}:6006/embed \
    -X POST \
    -d '{"inputs":"What is Deep Learning?"}' \
    -H 'Content-Type: application/json'
@@ -92,13 +92,13 @@ curl ${your_ip}:6006/embed \
 2. Embedding Microservice

 ```bash
-curl http://${your_ip}:6000/v1/embeddings\
+curl http://${host_ip}:6000/v1/embeddings\
  -X POST \
  -d '{"text":"hello"}' \
  -H 'Content-Type: application/json'
 ```

-3. Retriever Microservice
+3. Retriever Microservice  
   To validate the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:

 ```Python
@@ -110,16 +110,16 @@ print(embedding)
 Then substitute your mock embedding vector for the `${your_embedding}` in the following cURL command:

 ```bash
-curl http://${your_ip}:7000/v1/retrieval\
+curl http://${host_ip}:7000/v1/retrieval\
  -X POST \
-  -d '{"text":"What is the revenue of Nike in 2023?","embedding":${your_embedding}' \
+  -d '{"text":"What is the revenue of Nike in 2023?","embedding":${your_embedding}}' \
  -H 'Content-Type: application/json'
 ```

 4. TEI Reranking Service

 ```bash
-curl http://${your_ip}:8808/rerank \
+curl http://${host_ip}:8808/rerank \
    -X POST \
    -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
    -H 'Content-Type: application/json'
@@ -128,7 +128,7 @@ curl http://${your_ip}:8808/rerank \
 5. Reranking Microservice

 ```bash
-curl http://${your_ip}:8000/v1/reranking\
+curl http://${host_ip}:8000/v1/reranking\
  -X POST \
  -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
  -H 'Content-Type: application/json'
@@ -137,7 +137,7 @@ curl http://${your_ip}:8000/v1/reranking\
 6. TGI Service

 ```bash
-curl http://${your_ip}:9009/generate \
+curl http://${host_ip}:9009/generate \
  -X POST \
  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
  -H 'Content-Type: application/json'
@@ -146,7 +146,7 @@ curl http://${your_ip}:9009/generate \
 7. LLM Microservice

 ```bash
-curl http://${your_ip}:9000/v1/chat/completions\
+curl http://${host_ip}:9000/v1/chat/completions\
  -X POST \
  -d '{"text":"What is Deep Learning?"}' \
  -H 'Content-Type: application/json'
--- a/ChatQnA/tests/test_chatqna_on_gaudi.sh
+++ b/ChatQnA/tests/test_chatqna_on_gaudi.sh
@@ -104,7 +104,7 @@ function check_microservices() {
    curl http://${ip_address}:8008/generate \
        -X POST \
        -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \
-        -H 'Content-Type: application/json' || docker logs tgi-gaudi-server > ${LOG_PATH}/generate.log
+        -H 'Content-Type: application/json' > ${LOG_PATH}/generate.log
    sleep 5s

    curl http://${ip_address}:9000/v1/chat/completions \
--- a/ChatQnA/tests/test_chatqna_on_xeon.sh
+++ b/ChatQnA/tests/test_chatqna_on_xeon.sh
@@ -0,0 +1,159 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -xe
+
+WORKPATH=$(dirname "$PWD")
+LOG_PATH="$WORKPATH/tests"
+cd $WORKPATH
+
+function setup_test_env() {
+    cd $WORKPATH
+    # build conda env
+    conda_env_name="test_GenAIExample"
+    export PATH="${HOME}/miniconda3/bin:$PATH"
+    conda remove --all -y -n ${conda_env_name}
+    conda create python=3.10 -y -n ${conda_env_name}
+    source activate ${conda_env_name}
+
+    # install comps
+    git clone https://github.com/opea-project/GenAIComps.git
+    cd GenAIComps
+    pip install -r requirements.txt
+    pip install .
+    pip list
+}
+
+function build_docker_image() {
+    cd $WORKPATH/GenAIComps
+
+    docker build -t opea/gen-ai-comps:embedding-tei-server -f comps/embeddings/langchain/docker/Dockerfile .
+    docker build -t opea/gen-ai-comps:retriever-redis-server -f comps/retrievers/langchain/docker/Dockerfile .
+    docker build -t opea/gen-ai-comps:reranking-tei-xeon-server -f comps/reranks/docker/Dockerfile .
+    docker build -t opea/gen-ai-comps:llm-tgi-server -f comps/llms/langchain/docker/Dockerfile .
+
+    docker images
+}
+
+function start_microservices() {
+    cd $WORKPATH
+
+    ip_name=$(echo $(hostname) | tr '[a-z]-' '[A-Z]_')_$(echo 'IP')
+    ip_address=$(eval echo '$'$ip_name)
+
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-large"
+    export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+    export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006"
+    export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
+    export TGI_LLM_ENDPOINT="http://${ip_address}:9009"
+    export REDIS_URL="redis://${ip_address}:6379"
+    export INDEX_NAME="rag-redis"
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+
+    # Start Microservice Docker Containers
+    # TODO: Replace the container name with a test-specific name
+    cd microservice/xeon
+    docker compose -f docker_compose.yaml up -d
+
+    sleep 1m # Waits 1 minutes
+}
+
+function check_microservices() {
+    # Check if the microservices are running correctly.
+    # TODO: Any results check required??
+    curl ${ip_address}:6006/embed \
+        -X POST \
+        -d '{"inputs":"What is Deep Learning?"}' \
+        -H 'Content-Type: application/json' > ${LOG_PATH}/embed.log
+    sleep 5s
+
+    curl http://${ip_address}:6000/v1/embeddings \
+        -X POST \
+        -d '{"text":"hello"}' \
+        -H 'Content-Type: application/json' > ${LOG_PATH}/embeddings.log
+    sleep 5s
+
+    test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
+    curl http://${ip_address}:7000/v1/retrieval \
+        -X POST \
+        -d '{"text":"What is the revenue of Nike in 2023?","embedding":${test_embedding}}' \
+        -H 'Content-Type: application/json' > ${LOG_PATH}/retrieval.log
+    sleep 5s
+
+    curl http://${ip_address}:8808/rerank \
+        -X POST \
+        -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
+        -H 'Content-Type: application/json' > ${LOG_PATH}/rerank.log
+    sleep 5s
+
+    curl http://${ip_address}:8000/v1/reranking\
+        -X POST \
+        -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
+        -H 'Content-Type: application/json' > ${LOG_PATH}/reranking.log
+    sleep 1m
+
+    curl http://${ip_address}:9009/generate \
+        -X POST \
+        -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+        -H 'Content-Type: application/json' > ${LOG_PATH}/generate.log
+    sleep 5s
+
+    curl http://${ip_address}:9000/v1/chat/completions \
+        -X POST \
+        -d '{"text":"What is Deep Learning?"}' \
+        -H 'Content-Type: application/json' > ${LOG_PATH}/completions.log
+    sleep 5s
+}
+
+function run_megaservice() {
+    python chatqna.py > ${LOG_PATH}/run_megaservice.log
+}
+
+function check_results() {
+    echo "Checking response results, make sure the output is reasonable. "
+    local status=false
+    if [[ -f $LOG_PATH/run_megaservice.log ]] && [[ $(grep -c "\$51.2 billion" $LOG_PATH/run_megaservice.log) != 0 ]]; then
+        status=true
+    fi
+
+    if [ $status == false ]; then
+        echo "Response check failed, please check the logs in artifacts!"
+        exit 1
+    else
+        echo "Response check succeed!"
+    fi
+
+    echo "Checking response format, make sure the output format is acceptable for UI."
+    # TODO
+}
+
+function stop_docker() {
+    cd $WORKPATH/microservice/xeon
+    container_list=$(cat docker_compose.yaml | grep container_name | cut -d':' -f2)
+    for container_name in $container_list; do
+        cid=$(docker ps -aq --filter "name=$container_name")
+        if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+    done
+}
+
+function main() {
+
+    stop_docker
+
+    setup_test_env
+    build_docker_image
+
+    start_microservices
+    check_microservices
+
+    run_megaservice
+    check_results
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main