From 08f9d58fd2b06ca6b73e520ebe635dab59304554 Mon Sep 17 00:00:00 2001 From: "chen, suyue" Date: Tue, 9 Apr 2024 12:46:34 +0800 Subject: [PATCH] Add VisualQnA and SearchQnA CI test (#54) Co-authored-by: Sihan Chen <39623753+Spycsh@users.noreply.github.com> --- .github/workflows/ChatQnA.yml | 2 +- .github/workflows/SearchQnA.yml | 46 ++++++++ .github/workflows/VisualQnA.yml | 43 +++++++ ChatQnA/langchain/docker/docker-compose.yml | 2 +- ChatQnA/serving/tgi_gaudi/build_docker.sh | 4 +- ChatQnA/tests/test_langchain_inference.sh | 4 +- SearchQnA/README.md | 20 +++- SearchQnA/langchain/docker/Dockerfile | 5 +- .../serving/tgi_gaudi/launch_tgi_service.sh | 8 +- SearchQnA/tests/test_langchain_inference.sh | 107 ++++++++++++++++++ SearchQnA/ui/.env | 2 +- VisualQnA/README.md | 4 +- VisualQnA/tests/test_basic_inference.sh | 77 +++++++++++++ VisualQnA/ui/requirements.txt | 1 + 14 files changed, 305 insertions(+), 20 deletions(-) create mode 100644 .github/workflows/SearchQnA.yml create mode 100644 .github/workflows/VisualQnA.yml create mode 100644 SearchQnA/tests/test_langchain_inference.sh create mode 100644 VisualQnA/tests/test_basic_inference.sh diff --git a/.github/workflows/ChatQnA.yml b/.github/workflows/ChatQnA.yml index d6eb731ef..ed9ca569b 100644 --- a/.github/workflows/ChatQnA.yml +++ b/.github/workflows/ChatQnA.yml @@ -30,7 +30,7 @@ jobs: - name: Run Test env: - HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} + HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} run: | cd ${{ github.workspace }}/ChatQnA/tests bash test_${{ matrix.job_name }}_inference.sh diff --git a/.github/workflows/SearchQnA.yml b/.github/workflows/SearchQnA.yml new file mode 100644 index 000000000..6bbce7aa7 --- /dev/null +++ b/.github/workflows/SearchQnA.yml @@ -0,0 +1,46 @@ +name: SearchQnA-test + +on: + pull_request: + branches: [main] + types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped + paths: + - SearchQnA/** + - .github/workflows/SearchQnA.yml + workflow_dispatch: + +# If there is a new commit, the previous jobs will be canceled +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + SearchQnA: + runs-on: gaudi2 + strategy: + matrix: + job_name: ["langchain"] + fail-fast: false + steps: + - name: Clean Up Working Directory + run: sudo rm -rf ${{github.workspace}}/* + + - name: Checkout out Repo + uses: actions/checkout@v4 + + - name: Run Test + env: + HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + AISE_GAUDI_00_IP: ${{ secrets.AISE_GAUDI_00_IP }} + run: | + cd ${{ github.workspace }}/SearchQnA/tests + bash test_${{ matrix.job_name }}_inference.sh + + - name: Publish pipeline artifact + if: ${{ !cancelled() }} + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.job_name }} + path: ${{ github.workspace }}/SearchQnA/tests/*.log diff --git a/.github/workflows/VisualQnA.yml b/.github/workflows/VisualQnA.yml new file mode 100644 index 000000000..7f2c37792 --- /dev/null +++ b/.github/workflows/VisualQnA.yml @@ -0,0 +1,43 @@ +name: VisualQnA-test + +on: + pull_request: + branches: [main] + types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped + paths: + - VisualQnA/** + - .github/workflows/VisualQnA.yml + workflow_dispatch: + +# If there is a new commit, the previous jobs will be canceled +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + VisualQnA: + runs-on: gaudi2 + strategy: + matrix: + job_name: ["basic"] + fail-fast: false + steps: + - name: Clean Up Working Directory + run: sudo rm -rf ${{github.workspace}}/* + + - name: Checkout out Repo + uses: actions/checkout@v4 + + - name: Run Test + env: + HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + run: | + cd ${{ github.workspace }}/VisualQnA/tests + bash test_${{ matrix.job_name }}_inference.sh + + - name: Publish pipeline artifact + if: ${{ !cancelled() }} + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.job_name }} + path: ${{ github.workspace }}/VisualQnA/tests/*.log diff --git a/ChatQnA/langchain/docker/docker-compose.yml b/ChatQnA/langchain/docker/docker-compose.yml index 47ce30654..ac8e34742 100644 --- a/ChatQnA/langchain/docker/docker-compose.yml +++ b/ChatQnA/langchain/docker/docker-compose.yml @@ -14,7 +14,7 @@ services: redis-vector-db: - image: redis/redis-stack:latest + image: redis/redis-stack:7.2.0-v9 container_name: redis-vector-db ports: - "6379:6379" diff --git a/ChatQnA/serving/tgi_gaudi/build_docker.sh b/ChatQnA/serving/tgi_gaudi/build_docker.sh index 6e38c86d7..0dd7283d4 100644 --- a/ChatQnA/serving/tgi_gaudi/build_docker.sh +++ b/ChatQnA/serving/tgi_gaudi/build_docker.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Copyright (c) 2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,8 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -#!/bin/bash - git clone https://github.com/huggingface/tgi-gaudi.git cd ./tgi-gaudi/ docker build -t ghcr.io/huggingface/tgi-gaudi:1.2.1 . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy diff --git a/ChatQnA/tests/test_langchain_inference.sh b/ChatQnA/tests/test_langchain_inference.sh index 515a680d0..a4af03f9a 100644 --- a/ChatQnA/tests/test_langchain_inference.sh +++ b/ChatQnA/tests/test_langchain_inference.sh @@ -52,7 +52,7 @@ function launch_tgi_gaudi_service() { function launch_redis_and_langchain_service() { cd $WORKPATH - export HUGGINGFACEHUB_API_TOKEN=${HUGGING_FACE_TOKEN} + export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} local port=8890 sed -i "s/port=8000/port=$port/g" langchain/docker/qna-app/app/server.py docker compose -f langchain/docker/docker-compose.yml up -d --build @@ -82,7 +82,7 @@ function check_response() { cd $WORKPATH echo "Checking response" local status=false - if [[ $(grep -c "\$51.2 billion" $LOG_PATH) != 0 ]]; then + if [[ -f $LOG_PATH ]] && [[ $(grep -c "\$51.2 billion" $LOG_PATH) != 0 ]]; then status=true fi diff --git a/SearchQnA/README.md b/SearchQnA/README.md index 4a8cfb3bb..3b2b8f25f 100644 --- a/SearchQnA/README.md +++ b/SearchQnA/README.md @@ -22,7 +22,7 @@ The workflow falls into the following architecture: # Start Backend Service -- Start the TGI service to deploy your LLM +1. Start the TGI service to deploy your LLM ```sh cd serving/tgi_gaudi @@ -30,15 +30,25 @@ bash build_docker.sh bash launch_tgi_service.sh ``` -- Start the SearchQnA application using Google Search +`launch_tgi_service.sh` by default uses `8080` as the TGI service's port. Please replace it if there are any port conflicts. + +2. Start the SearchQnA application using Google Search ```sh -cd /home/sdp/sihanche/GenAIExamples/SearchQnA/langchain/docker +cd langchain/docker docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy} -t intel/gen-ai-examples:searchqna-gaudi --no-cache -docker run -e TGI_ENDPOINT= -e GOOGLE_CSE_ID= -e GOOGLE_API_KEY= -e HUGGINGFACEHUB_API_TOKEN= -p 8080:8000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -v $PWD/qna-app:/qna-app --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:searchqna-gaudi +docker run -e TGI_ENDPOINT= -e GOOGLE_CSE_ID= -e GOOGLE_API_KEY= -e HUGGINGFACEHUB_API_TOKEN= -p 8085:8000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:searchqna-gaudi ``` -- Test +Here is the explanation of some of the above parameters: + +- `TGI_ENDPOINT`: the endpoint of your TGI service, usually equal to `:` +- `GOOGLE_CSE_ID`: your CSE ID for Google Search Engine, usually generated [here](https://programmablesearchengine.google.com/controlpanel/all) +- `GOOGLE_API_KEY`: your API key for Google Search Engine, usually generated [here](https://console.cloud.google.com/apis/credentials) +- `HUGGINGFACEHUB_API_TOKEN`: your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens) +- `-p 8085:8000`: This will map the 8000 port of the SearchQnA service inside the container to the 8085 port on the host + +3. Quick test ```sh curl http://localhost:8085/v1/rag/web_search_chat_stream -X POST -d '{"query":"Give me some latest news?"}' -H 'Content-Type: application/json' diff --git a/SearchQnA/langchain/docker/Dockerfile b/SearchQnA/langchain/docker/Dockerfile index e3696ebab..51f30d538 100644 --- a/SearchQnA/langchain/docker/Dockerfile +++ b/SearchQnA/langchain/docker/Dockerfile @@ -18,11 +18,12 @@ RUN rm -rf /etc/ssh/ssh_host* # Set environment variables ENV LANG=en_US.UTF-8 -ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/langchain/libs/community:/langchain/libs/langchain +ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/ # Install required branch RUN git clone https://github.com/Spycsh/langchain.git /langchain -b master && \ - pip install --no-cache-dir -e /langchain/libs/langchain && \ + pip install --no-cache-dir /langchain/libs/langchain && \ + pip install --no-cache-dir /langchain/libs/community && \ rm -rf /langchain RUN useradd -m -s /bin/bash user && \ diff --git a/SearchQnA/serving/tgi_gaudi/launch_tgi_service.sh b/SearchQnA/serving/tgi_gaudi/launch_tgi_service.sh index 7a183b759..ef7468b54 100644 --- a/SearchQnA/serving/tgi_gaudi/launch_tgi_service.sh +++ b/SearchQnA/serving/tgi_gaudi/launch_tgi_service.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Copyright (c) 2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,8 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -#!/bin/bash - # Set default values default_port=8080 default_model="Intel/neural-chat-7b-v3-3" @@ -41,9 +41,9 @@ volume=$PWD/data # Build the Docker run command based on the number of cards if [ "$num_cards" -eq 1 ]; then - docker_cmd="docker run -p $port_number:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model_name" + docker_cmd="docker run -d --name tgi-gaudi-server -p $port_number:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model_name" else - docker_cmd="docker run -p $port_number:80 -v $volume:/data --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model_name --sharded true --num-shard $num_cards" + docker_cmd="docker run -d --name tgi-gaudi-server -p $port_number:80 -v $volume:/data --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model_name --sharded true --num-shard $num_cards" fi # Execute the Docker run command diff --git a/SearchQnA/tests/test_langchain_inference.sh b/SearchQnA/tests/test_langchain_inference.sh new file mode 100644 index 000000000..f014b432a --- /dev/null +++ b/SearchQnA/tests/test_langchain_inference.sh @@ -0,0 +1,107 @@ +#!/bin/bash +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -xe + +function test_env_setup() { + WORKPATH=$(dirname "$PWD") + LOG_PATH="$WORKPATH/tests/langchain.log" + + TGI_CONTAINER_NAME="test-tgi-gaudi-server" + LANGCHAIN_CONTAINER_NAME="test-searchqna-gaudi" +} + +function rename() { + # Rename the docker container/image names to avoid conflict with local test + cd ${WORKPATH} + sed -i "s/tgi-gaudi-server/${TGI_CONTAINER_NAME}/g" serving/tgi_gaudi/launch_tgi_service.sh +} + +function launch_tgi_gaudi_service() { + local card_num=1 + local port=8870 + local model_name="Intel/neural-chat-7b-v3-3" + + cd ${WORKPATH} + + docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1 + bash serving/tgi_gaudi/launch_tgi_service.sh $card_num $port $model_name + sleep 2m +} + +function launch_langchain_service() { + cd $WORKPATH + local port=8875 + cd langchain/docker + docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy} -t intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME} + + tgi_ip_name=$(echo $(hostname) | tr '[a-z]-' '[A-Z]_')_$(echo 'IP') + tgi_ip=$(eval echo '$'$tgi_ip_name) + docker run -d --name=${LANGCHAIN_CONTAINER_NAME} -e TGI_ENDPOINT=http://${tgi_ip}:8870 -e GOOGLE_CSE_ID=${GOOGLE_CSE_ID} -e GOOGLE_API_KEY=${GOOGLE_API_KEY} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ + -p ${port}:8000 --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME} + + sleep 2m +} + + +function run_tests() { + cd $WORKPATH + local port=8875 + + curl http://localhost:${port}/v1/rag/web_search_chat \ + -X POST \ + -d '{"query":"What is the GitHub Repo link of Intel Neural Compressor?"}' \ + -H 'Content-Type: application/json' > $LOG_PATH + +} + +function check_response() { + cd $WORKPATH + echo "Checking response" + local status=false + if [[ -f $LOG_PATH ]] && [[ $(grep -c "github.com/intel/neural-compressor" $LOG_PATH) != 0 ]]; then + status=true + fi + + if [ $status == false ]; then + echo "Response check failed" + exit 1 + else + echo "Response check succeed" + fi +} + +function docker_stop() { + local container_name=$1 + cid=$(docker ps -aq --filter "name=$container_name") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi +} + +function main() { + test_env_setup + rename + docker_stop $TGI_CONTAINER_NAME && docker_stop $LANGCHAIN_CONTAINER_NAME && sleep 5s + + launch_tgi_gaudi_service + launch_langchain_service + + run_tests + + docker_stop $TGI_CONTAINER_NAME && docker_stop $LANGCHAIN_CONTAINER_NAME && sleep 5s + echo y | docker system prune + + check_response +} + +main diff --git a/SearchQnA/ui/.env b/SearchQnA/ui/.env index fed581354..4f519d803 100644 --- a/SearchQnA/ui/.env +++ b/SearchQnA/ui/.env @@ -1 +1 @@ -BACKEND_BASE_URL = 'http://xxxxx:8003/v1/rag' \ No newline at end of file +BACKEND_BASE_URL = 'http://xxxxx:8085/v1/rag' \ No newline at end of file diff --git a/VisualQnA/README.md b/VisualQnA/README.md index 270911004..40713c7c8 100644 --- a/VisualQnA/README.md +++ b/VisualQnA/README.md @@ -30,7 +30,7 @@ docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${ht 2. Start the LLaVA service on Intel Gaudi2 ``` -docker run -d -p 8084:80 -p 8085:8000 -v ./data:/root/.cache/huggingface/hub/ -e http_proxy=$http_proxy -e https_proxy=$http_proxy -v $PWD/llava_server:/llava_server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:llava-gaudi +docker run -d -p 8085:8000 -v ./data:/root/.cache/huggingface/hub/ -e http_proxy=$http_proxy -e https_proxy=$http_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:llava-gaudi ``` Here are some explanation about the above parameters: @@ -55,7 +55,7 @@ Now you can start the frontend UI by following commands: ``` cd ui/ pip install -r requirements.txt -python app.py --host 0.0.0.0 --port 7860 --worker-addr http://localhost:8085 --share +http_proxy= python app.py --host 0.0.0.0 --port 7860 --worker-addr http://localhost:8085 --share ``` Here are some explanation about the above parameters: diff --git a/VisualQnA/tests/test_basic_inference.sh b/VisualQnA/tests/test_basic_inference.sh new file mode 100644 index 000000000..b350deb58 --- /dev/null +++ b/VisualQnA/tests/test_basic_inference.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -xe + +function test_env_setup() { + WORKPATH=$(dirname "$PWD") + LOG_PATH="$WORKPATH/tests/inference.log" + CONTAINER_NAME="test-llava-gaudi-service" + cd $WORKPATH +} + +function launch_llava_service() { + cd ${WORKPATH} + cd serving/ + local port=8855 + docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy} -t intel/gen-ai-examples:${CONTAINER_NAME} + docker run -d --name=${CONTAINER_NAME} -p ${port}:8000 -v ~/.cache/huggingface/hub/:/root/.cache/huggingface/hub/ -e http_proxy=$http_proxy -e https_proxy=$http_proxy \ + --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:${CONTAINER_NAME} + + sleep 3m +} + +function run_tests() { + cd $WORKPATH + local port=8855 + curl localhost:${port}/health -v 2>&1 | tee $LOG_PATH +} + +function check_response() { + cd $WORKPATH + echo "Checking response" + local status=false + if [[ -f $LOG_PATH ]] && [[ $(grep -c "200 OK" $LOG_PATH) != 0 ]]; then + status=true + fi + + if [ $status == false ]; then + echo "Response check failed" + exit 1 + else + echo "Response check succeed" + fi +} + +function docker_stop() { + local container_name=$1 + cid=$(docker ps -aq --filter "name=$container_name") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi +} + +function main() { + test_env_setup + docker_stop $CONTAINER_NAME && sleep 5s + + launch_llava_service + + run_tests + + docker_stop $CONTAINER_NAME && sleep 5s + echo y | docker system prune + + check_response +} + +main diff --git a/VisualQnA/ui/requirements.txt b/VisualQnA/ui/requirements.txt index 6d23f9b6e..39d06f8dc 100644 --- a/VisualQnA/ui/requirements.txt +++ b/VisualQnA/ui/requirements.txt @@ -1,2 +1,3 @@ gradio +gradio_client requests