Add ChatQnA Xeon workflow (#127)
Signed-off-by: chensuyue <suyue.chen@intel.com>
This commit is contained in:
3
.github/workflows/ChatQnA_gaudi.yml
vendored
3
.github/workflows/ChatQnA_gaudi.yml
vendored
@@ -4,12 +4,13 @@
|
||||
name: ChatQnA E2E test on Gaudi
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- ChatQnA/microservice/gaudi/**
|
||||
- "!**.md"
|
||||
- "!**/ui/**"
|
||||
- .github/workflows/ChatQnA_gaudi.yml
|
||||
workflow_dispatch:
|
||||
|
||||
|
||||
46
.github/workflows/ChatQnA_xeon.yml
vendored
Normal file
46
.github/workflows/ChatQnA_xeon.yml
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: ChatQnA E2E test on Xeon
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- ChatQnA/microservice/xeon/**
|
||||
- "!**.md"
|
||||
- "!**/ui/**"
|
||||
- .github/workflows/ChatQnA_xeon.yml
|
||||
workflow_dispatch:
|
||||
|
||||
# If there is a new commit, the previous jobs will be canceled
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
ChatQnA:
|
||||
runs-on: aise-cluster
|
||||
steps:
|
||||
- name: Clean Up Working Directory
|
||||
run: sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: "refs/pull/${{ github.event.number }}/merge"
|
||||
|
||||
- name: Run Test
|
||||
env:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
AISE_CLUSTER_01_2_IP: ${{ secrets.AISE_CLUSTER_01_2_IP }}
|
||||
run: |
|
||||
cd ${{ github.workspace }}/ChatQnA/tests
|
||||
bash test_chatqna_on_xeon.sh
|
||||
|
||||
- name: Publish pipeline artifact
|
||||
if: ${{ !cancelled() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: ${{ github.workspace }}/ChatQnA/tests/*.log
|
||||
@@ -64,11 +64,11 @@ export https_proxy=${your_http_proxy}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-large"
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6006"
|
||||
export TEI_RERANKING_ENDPOINT="http://${your_ip}:8808"
|
||||
export TGI_LLM_ENDPOINT="http://${your_ip}:9009"
|
||||
export REDIS_URL="redis://${your_ip}:6379"
|
||||
export INDEX_NAME=${your_index_name}
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
```
|
||||
|
||||
@@ -83,7 +83,7 @@ docker compose -f docker_compose.yaml up -d
|
||||
1. TEI Embedding Service
|
||||
|
||||
```bash
|
||||
curl ${your_ip}:6006/embed \
|
||||
curl ${host_ip}:6006/embed \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
@@ -92,13 +92,13 @@ curl ${your_ip}:6006/embed \
|
||||
2. Embedding Microservice
|
||||
|
||||
```bash
|
||||
curl http://${your_ip}:6000/v1/embeddings\
|
||||
curl http://${host_ip}:6000/v1/embeddings\
|
||||
-X POST \
|
||||
-d '{"text":"hello"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
3. Retriever Microservice
|
||||
3. Retriever Microservice
|
||||
To validate the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:
|
||||
|
||||
```Python
|
||||
@@ -110,16 +110,16 @@ print(embedding)
|
||||
Then substitute your mock embedding vector for the `${your_embedding}` in the following cURL command:
|
||||
|
||||
```bash
|
||||
curl http://${your_ip}:7000/v1/retrieval\
|
||||
curl http://${host_ip}:7000/v1/retrieval\
|
||||
-X POST \
|
||||
-d '{"text":"What is the revenue of Nike in 2023?","embedding":${your_embedding}' \
|
||||
-d '{"text":"What is the revenue of Nike in 2023?","embedding":${your_embedding}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
4. TEI Reranking Service
|
||||
|
||||
```bash
|
||||
curl http://${your_ip}:8808/rerank \
|
||||
curl http://${host_ip}:8808/rerank \
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
@@ -128,7 +128,7 @@ curl http://${your_ip}:8808/rerank \
|
||||
5. Reranking Microservice
|
||||
|
||||
```bash
|
||||
curl http://${your_ip}:8000/v1/reranking\
|
||||
curl http://${host_ip}:8000/v1/reranking\
|
||||
-X POST \
|
||||
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
|
||||
-H 'Content-Type: application/json'
|
||||
@@ -137,7 +137,7 @@ curl http://${your_ip}:8000/v1/reranking\
|
||||
6. TGI Service
|
||||
|
||||
```bash
|
||||
curl http://${your_ip}:9009/generate \
|
||||
curl http://${host_ip}:9009/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
@@ -146,7 +146,7 @@ curl http://${your_ip}:9009/generate \
|
||||
7. LLM Microservice
|
||||
|
||||
```bash
|
||||
curl http://${your_ip}:9000/v1/chat/completions\
|
||||
curl http://${host_ip}:9000/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"text":"What is Deep Learning?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
@@ -104,7 +104,7 @@ function check_microservices() {
|
||||
curl http://${ip_address}:8008/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json' || docker logs tgi-gaudi-server > ${LOG_PATH}/generate.log
|
||||
-H 'Content-Type: application/json' > ${LOG_PATH}/generate.log
|
||||
sleep 5s
|
||||
|
||||
curl http://${ip_address}:9000/v1/chat/completions \
|
||||
|
||||
159
ChatQnA/tests/test_chatqna_on_xeon.sh
Normal file
159
ChatQnA/tests/test_chatqna_on_xeon.sh
Normal file
@@ -0,0 +1,159 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
cd $WORKPATH
|
||||
|
||||
function setup_test_env() {
|
||||
cd $WORKPATH
|
||||
# build conda env
|
||||
conda_env_name="test_GenAIExample"
|
||||
export PATH="${HOME}/miniconda3/bin:$PATH"
|
||||
conda remove --all -y -n ${conda_env_name}
|
||||
conda create python=3.10 -y -n ${conda_env_name}
|
||||
source activate ${conda_env_name}
|
||||
|
||||
# install comps
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
pip install -r requirements.txt
|
||||
pip install .
|
||||
pip list
|
||||
}
|
||||
|
||||
function build_docker_image() {
|
||||
cd $WORKPATH/GenAIComps
|
||||
|
||||
docker build -t opea/gen-ai-comps:embedding-tei-server -f comps/embeddings/langchain/docker/Dockerfile .
|
||||
docker build -t opea/gen-ai-comps:retriever-redis-server -f comps/retrievers/langchain/docker/Dockerfile .
|
||||
docker build -t opea/gen-ai-comps:reranking-tei-xeon-server -f comps/reranks/docker/Dockerfile .
|
||||
docker build -t opea/gen-ai-comps:llm-tgi-server -f comps/llms/langchain/docker/Dockerfile .
|
||||
|
||||
docker images
|
||||
}
|
||||
|
||||
function start_microservices() {
|
||||
cd $WORKPATH
|
||||
|
||||
ip_name=$(echo $(hostname) | tr '[a-z]-' '[A-Z]_')_$(echo 'IP')
|
||||
ip_address=$(eval echo '$'$ip_name)
|
||||
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-large"
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006"
|
||||
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
|
||||
export TGI_LLM_ENDPOINT="http://${ip_address}:9009"
|
||||
export REDIS_URL="redis://${ip_address}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
# Start Microservice Docker Containers
|
||||
# TODO: Replace the container name with a test-specific name
|
||||
cd microservice/xeon
|
||||
docker compose -f docker_compose.yaml up -d
|
||||
|
||||
sleep 1m # Waits 1 minutes
|
||||
}
|
||||
|
||||
function check_microservices() {
|
||||
# Check if the microservices are running correctly.
|
||||
# TODO: Any results check required??
|
||||
curl ${ip_address}:6006/embed \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?"}' \
|
||||
-H 'Content-Type: application/json' > ${LOG_PATH}/embed.log
|
||||
sleep 5s
|
||||
|
||||
curl http://${ip_address}:6000/v1/embeddings \
|
||||
-X POST \
|
||||
-d '{"text":"hello"}' \
|
||||
-H 'Content-Type: application/json' > ${LOG_PATH}/embeddings.log
|
||||
sleep 5s
|
||||
|
||||
test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
||||
curl http://${ip_address}:7000/v1/retrieval \
|
||||
-X POST \
|
||||
-d '{"text":"What is the revenue of Nike in 2023?","embedding":${test_embedding}}' \
|
||||
-H 'Content-Type: application/json' > ${LOG_PATH}/retrieval.log
|
||||
sleep 5s
|
||||
|
||||
curl http://${ip_address}:8808/rerank \
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
|
||||
-H 'Content-Type: application/json' > ${LOG_PATH}/rerank.log
|
||||
sleep 5s
|
||||
|
||||
curl http://${ip_address}:8000/v1/reranking\
|
||||
-X POST \
|
||||
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
|
||||
-H 'Content-Type: application/json' > ${LOG_PATH}/reranking.log
|
||||
sleep 1m
|
||||
|
||||
curl http://${ip_address}:9009/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json' > ${LOG_PATH}/generate.log
|
||||
sleep 5s
|
||||
|
||||
curl http://${ip_address}:9000/v1/chat/completions \
|
||||
-X POST \
|
||||
-d '{"text":"What is Deep Learning?"}' \
|
||||
-H 'Content-Type: application/json' > ${LOG_PATH}/completions.log
|
||||
sleep 5s
|
||||
}
|
||||
|
||||
function run_megaservice() {
|
||||
python chatqna.py > ${LOG_PATH}/run_megaservice.log
|
||||
}
|
||||
|
||||
function check_results() {
|
||||
echo "Checking response results, make sure the output is reasonable. "
|
||||
local status=false
|
||||
if [[ -f $LOG_PATH/run_megaservice.log ]] && [[ $(grep -c "\$51.2 billion" $LOG_PATH/run_megaservice.log) != 0 ]]; then
|
||||
status=true
|
||||
fi
|
||||
|
||||
if [ $status == false ]; then
|
||||
echo "Response check failed, please check the logs in artifacts!"
|
||||
exit 1
|
||||
else
|
||||
echo "Response check succeed!"
|
||||
fi
|
||||
|
||||
echo "Checking response format, make sure the output format is acceptable for UI."
|
||||
# TODO
|
||||
}
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/microservice/xeon
|
||||
container_list=$(cat docker_compose.yaml | grep container_name | cut -d':' -f2)
|
||||
for container_name in $container_list; do
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
|
||||
done
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
stop_docker
|
||||
|
||||
setup_test_env
|
||||
build_docker_image
|
||||
|
||||
start_microservices
|
||||
check_microservices
|
||||
|
||||
run_megaservice
|
||||
check_results
|
||||
|
||||
stop_docker
|
||||
echo y | docker system prune
|
||||
|
||||
}
|
||||
|
||||
main
|
||||
Reference in New Issue
Block a user