Enable vllm for DocSum (#1716)

Set vllm as default llm serving, and add related docker compose files, readmes, and test scripts.

Fix issue #1436

Signed-off-by: letonghan <letong.han@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Letong Han
2025-03-28 17:15:01 +08:00
committed by GitHub
parent 87baeb833d
commit d4dcbd18ef
12 changed files with 1403 additions and 317 deletions

View File

@@ -2,6 +2,8 @@
This document outlines the deployment process for a Document Summarization application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on an Intel Xeon server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `llm`. We will publish the Docker images to Docker Hub soon, which will simplify the deployment process for this service.
The default pipeline deploys with vLLM as the LLM serving component. It also provides options of using TGI backend for LLM microservice, please refer to [start-microservice-docker-containers](#start-microservice-docker-containers) section in this page.
## 🚀 Apply Intel Xeon Server on AWS
To apply a Intel Xeon server on AWS, start by creating an AWS account if you don't have one already. Then, head to the [EC2 Console](https://console.aws.amazon.com/ec2/v2/home) to begin the process. Within the EC2 service, select the Amazon EC2 M7i or M7i-flex instance type to leverage 4th Generation Intel Xeon Scalable processors. These instances are optimized for high-performance computing and demanding workloads.
@@ -116,9 +118,20 @@ To set up environment variables for deploying Document Summarization services, f
```bash
cd GenAIExamples/DocSum/docker_compose/intel/cpu/xeon
```
If use vLLM as the LLM serving backend.
```bash
docker compose -f compose.yaml up -d
```
If use TGI as the LLM serving backend.
```bash
docker compose -f compose_tgi.yaml up -d
```
You will have the following Docker Images:
1. `opea/docsum-ui:latest`
@@ -128,10 +141,30 @@ You will have the following Docker Images:
### Validate Microservices
1. TGI Service
1. LLM backend Service
In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready.
Try the command below to check whether the LLM serving is ready.
```bash
curl http://${host_ip}:8008/generate \
# vLLM service
docker logs docsum-xeon-vllm-service 2>&1 | grep complete
# If the service is ready, you will get the response like below.
INFO: Application startup complete.
```
```bash
# TGI service
docker logs docsum-xeon-tgi-service | grep Connected
# If the service is ready, you will get the response like below.
2024-09-03T02:47:53.402023Z INFO text_generation_router::server: router/src/server.rs:2311: Connected
```
Then try the `cURL` command below to validate services.
```bash
# either vLLM or TGI service
curl http://${host_ip}:8008/v1/chat/completions \
-X POST \
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-H 'Content-Type: application/json'

View File

@@ -2,54 +2,53 @@
# SPDX-License-Identifier: Apache-2.0
services:
tgi-server:
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-server
vllm-service:
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
container_name: docsum-xeon-vllm-service
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
- "8008:80"
volumes:
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL_ID: ${LLM_MODEL_ID}
VLLM_TORCH_PROFILER_DIR: "/mnt"
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
volumes:
- "${MODEL_CACHE:-./data}:/data"
shm_size: 1g
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
llm-docsum-tgi:
llm-docsum-vllm:
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
container_name: llm-docsum-server
container_name: docsum-xeon-llm-server
depends_on:
tgi-server:
vllm-service:
condition: service_healthy
ports:
- ${DOCSUM_PORT:-9000}:9000
- ${LLM_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
LLM_MODEL_ID: ${LLM_MODEL_ID}
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped
whisper:
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
container_name: whisper-server
container_name: docsum-xeon-whisper-server
ports:
- "7066:7066"
ipc: host
@@ -63,10 +62,10 @@ services:
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
container_name: docsum-xeon-backend-server
depends_on:
- tgi-server
- llm-docsum-tgi
- vllm-service
- llm-docsum-vllm
ports:
- "8888:8888"
- "${BACKEND_SERVICE_PORT:-8888}:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
@@ -83,7 +82,7 @@ services:
depends_on:
- docsum-xeon-backend-server
ports:
- "5173:5173"
- "${FRONTEND_SERVICE_PORT:-5173}:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}

View File

@@ -0,0 +1,97 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
tgi-server:
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: docsum-xeon-tgi-server
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "${MODEL_CACHE:-./data}:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
host_ip: ${host_ip}
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
shm_size: 1g
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
llm-docsum-tgi:
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
container_name: docsum-xeon-llm-server
depends_on:
tgi-server:
condition: service_healthy
ports:
- ${LLM_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped
whisper:
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
container_name: docsum-xeon-whisper-server
ports:
- "7066:7066"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
restart: unless-stopped
docsum-xeon-backend-server:
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
container_name: docsum-xeon-backend-server
depends_on:
- tgi-server
- llm-docsum-tgi
ports:
- "${BACKEND_SERVICE_PORT:-8888}:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
ipc: host
restart: always
docsum-gradio-ui:
image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest}
container_name: docsum-xeon-ui-server
depends_on:
- docsum-xeon-backend-server
ports:
- "${FRONTEND_SERVICE_PORT:-5173}:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT}
- DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -2,6 +2,8 @@
This document outlines the deployment process for a Document Summarization application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `llm`. We will publish the Docker images to Docker Hub soon, which will simplify the deployment process for this service.
The default pipeline deploys with vLLM as the LLM serving component. It also provides options of using TGI backend for LLM microservice, please refer to [start-microservice-docker-containers](#start-microservice-docker-containers) section in this page.
## 🚀 Build Docker Images
### 1. Build MicroService Docker Image
@@ -108,9 +110,20 @@ To set up environment variables for deploying Document Summarization services, f
```bash
cd GenAIExamples/DocSum/docker_compose/intel/hpu/gaudi
```
If use vLLM as the LLM serving backend.
```bash
docker compose -f compose.yaml up -d
```
If use TGI as the LLM serving backend.
```bash
docker compose -f compose_tgi.yaml up -d
```
You will have the following Docker Images:
1. `opea/docsum-ui:latest`
@@ -120,10 +133,30 @@ You will have the following Docker Images:
### Validate Microservices
1. TGI Service
1. LLM backend Service
In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready.
Try the command below to check whether the LLM serving is ready.
```bash
curl http://${host_ip}:8008/generate \
# vLLM service
docker logs docsum-xeon-vllm-service 2>&1 | grep complete
# If the service is ready, you will get the response like below.
INFO: Application startup complete.
```
```bash
# TGI service
docker logs docsum-xeon-tgi-service | grep Connected
# If the service is ready, you will get the response like below.
2024-09-03T02:47:53.402023Z INFO text_generation_router::server: router/src/server.rs:2311: Connected
```
Then try the `cURL` command below to validate services.
```bash
# either vLLM or TGI service
curl http://${host_ip}:8008/v1/chat/completions \
-X POST \
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-H 'Content-Type: application/json'

View File

@@ -2,47 +2,42 @@
# SPDX-License-Identifier: Apache-2.0
services:
tgi-gaudi-server:
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
container_name: tgi-gaudi-server
vllm-service:
image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
container_name: docsum-gaudi-vllm-service
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
- "8008:80"
volumes:
- "${DATA_PATH:-./data}:/data"
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
NUM_CARDS: ${NUM_CARDS}
VLLM_TORCH_PROFILER_DIR: "/mnt"
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
runtime: habana
cap_add:
- SYS_NICE
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
command: --model $LLM_MODEL_ID --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq_len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE}
llm-docsum-tgi:
llm-docsum-vllm:
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
container_name: llm-docsum-gaudi-server
container_name: docsum-gaudi-llm-server
depends_on:
tgi-gaudi-server:
vllm-service:
condition: service_healthy
ports:
- ${DOCSUM_PORT:-9000}:9000
- ${LLM_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
@@ -59,7 +54,7 @@ services:
whisper:
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
container_name: whisper-server
container_name: docsum-gaudi-whisper-server
ports:
- "7066:7066"
ipc: host
@@ -78,10 +73,10 @@ services:
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
container_name: docsum-gaudi-backend-server
depends_on:
- tgi-gaudi-server
- llm-docsum-tgi
- vllm-service
- llm-docsum-vllm
ports:
- "8888:8888"
- "${BACKEND_SERVICE_PORT:-8888}:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
@@ -99,7 +94,7 @@ services:
depends_on:
- docsum-gaudi-backend-server
ports:
- "5173:5173"
- "${FRONTEND_SERVICE_PORT:-5173}:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}

View File

@@ -0,0 +1,114 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
tgi-gaudi-server:
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
container_name: docsum-gaudi-tgi-server
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "${MODEL_CACHE}:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
runtime: habana
cap_add:
- SYS_NICE
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
llm-docsum-tgi:
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
container_name: docsum-gaudi-llm-server
depends_on:
tgi-gaudi-server:
condition: service_healthy
ports:
- ${LLM_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped
whisper:
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
container_name: docsum-gaudi-whisper-server
ports:
- "7066:7066"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
runtime: habana
cap_add:
- SYS_NICE
restart: unless-stopped
docsum-gaudi-backend-server:
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
container_name: docsum-gaudi-backend-server
depends_on:
- tgi-gaudi-server
- llm-docsum-tgi
ports:
- "${BACKEND_SERVICE_PORT:-8888}:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
ipc: host
restart: always
docsum-gradio-ui:
image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest}
container_name: docsum-gaudi-ui-server
depends_on:
- docsum-gaudi-backend-server
ports:
- "${FRONTEND_SERVICE_PORT:-5173}:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT}
- DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -6,18 +6,21 @@ pushd "../../" > /dev/null
source .set_env.sh
popd > /dev/null
export no_proxy="${no_proxy},${host_ip}"
export LLM_ENDPOINT_PORT=8008
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export MAX_INPUT_TOKENS=1024
export MAX_TOTAL_TOKENS=2048
export no_proxy="${no_proxy},${host_ip}"
export LLM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
export LLM_ENDPOINT_PORT=8008
export DOCSUM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
export BACKEND_SERVICE_PORT=8888
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"

View File

@@ -49,3 +49,15 @@ services:
dockerfile: comps/llms/src/doc-summarization/Dockerfile
extends: docsum
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
vllm:
build:
context: vllm
dockerfile: Dockerfile.cpu
extends: docsum
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
vllm-gaudi:
build:
context: vllm-fork
dockerfile: Dockerfile.hpu
extends: docsum
image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}

View File

@@ -12,23 +12,29 @@ export host_ip=$(hostname -I | awk '{print $1}')
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export no_proxy="${no_proxy},${host_ip}"
export MODEL_CACHE=${model_cache:-"./data"}
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_ENDPOINT_PORT=8008
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export NUM_CARDS=1
export BLOCK_SIZE=128
export MAX_NUM_SEQS=256
export MAX_SEQ_LEN_TO_CAPTURE=2048
export MAX_INPUT_TOKENS=2048
export MAX_TOTAL_TOKENS=4096
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OpeaDocSumvLLM"
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
export no_proxy="${no_proxy},${host_ip}"
export LLM_ENDPOINT_PORT=8008
export DOCSUM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_PORT=8888
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"
export LOGFLAG=True
export DATA_PATH=${model_cache:-"/data/cache"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
@@ -39,17 +45,31 @@ ROOT_FOLDER=$(dirname "$(readlink -f "$0")")
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
pushd GenAIComps
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
git checkout ${VLLM_VER} &> /dev/null && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="docsum docsum-gradio-ui whisper llm-docsum"
service_list="docsum docsum-gradio-ui whisper llm-docsum vllm-gaudi"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
docker images && sleep 1s
}
@@ -84,115 +104,95 @@ input_data_for_test() {
esac
}
function validate_services_json() {
function validate_service() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local INPUT_DATA="$5"
local VALIDATE_TYPE="$5"
local INPUT_DATA="$6"
local FORM_DATA1="$7"
local FORM_DATA2="$8"
local FORM_DATA3="$9"
local FORM_DATA4="${10}"
local FORM_DATA5="${11}"
local FORM_DATA6="${12}"
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
echo "==========================================="
if [ "$HTTP_STATUS" -eq 200 ]; then
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
echo "[ $SERVICE_NAME ] Content is as expected."
if [[ $VALIDATE_TYPE == *"json"* ]]; then
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
else
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
CURL_CMD=(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$FORM_DATA1" -F "$FORM_DATA2" -F "$FORM_DATA3" -F "$FORM_DATA4" -F "$FORM_DATA5" -H 'Content-Type: multipart/form-data' "$URL")
if [[ -n "$FORM_DATA6" ]]; then
CURL_CMD+=(-F "$FORM_DATA6")
fi
else
HTTP_RESPONSE=$("${CURL_CMD[@]}")
fi
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
# check response status
if [ "$HTTP_STATUS" -ne "200" ]; then
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
sleep 1s
}
function validate_services_form() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local FORM_DATA1="$5"
local FORM_DATA2="$6"
local FORM_DATA3="$7"
local FORM_DATA4="$8"
local FORM_DATA5="$9"
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$FORM_DATA1" -F "$FORM_DATA2" -F "$FORM_DATA3" -F "$FORM_DATA4" -F "$FORM_DATA5" -H 'Content-Type: multipart/form-data' "$URL")
if [ "$HTTP_STATUS" -eq 200 ]; then
else
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
local CONTENT=$(curl -s -X POST -F "$FORM_DATA1" -F "$FORM_DATA2" -F "$FORM_DATA3" -F "$FORM_DATA4" -F "$FORM_DATA5" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
fi
# check response body
if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then
echo "EXPECTED_RESULT==> $EXPECTED_RESULT"
echo "RESPONSE_BODY==> $RESPONSE_BODY"
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
exit 1
else
echo "[ $SERVICE_NAME ] Content is as expected."
else
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
else
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
sleep 1s
}
function validate_microservices() {
# Check if the microservices are running correctly.
# tgi for llm service
validate_services_json \
"${host_ip}:8008/generate" \
"generated_text" \
"tgi-gaudi-server" \
"tgi-gaudi-server" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
# llm microservice
validate_services_json \
"${host_ip}:9000/v1/docsum" \
validate_service \
"${host_ip}:${LLM_PORT}/v1/docsum" \
"text" \
"llm-docsum-tgi" \
"llm-docsum-gaudi-server" \
"llm-docsum-vllm" \
"docsum-gaudi-llm-server" \
"json" \
'{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
# whisper microservice
ulimit -s 65536
validate_services_json \
validate_service \
"${host_ip}:7066/v1/asr" \
'{"asr_result":"well"}' \
"whisper" \
"whisper-server" \
"docsum-gaudi-whisper-server" \
"json" \
"{\"audio\": \"$(input_data_for_test "audio")\"}"
}
function validate_megaservice_text() {
echo ">>> Checking text data in json format"
validate_services_json \
"${host_ip}:8888/v1/docsum" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"[DONE]" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"json" \
'{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
echo ">>> Checking text data in form format, set language=en"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"[DONE]" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=text" \
"messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \
"max_tokens=32" \
@@ -200,11 +200,12 @@ function validate_megaservice_text() {
"stream=True"
echo ">>> Checking text data in form format, set language=zh"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"[DONE]" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=text" \
"messages=2024年9月26日北京——今日英特尔正式发布英特尔® 至强® 6性能核处理器代号Granite Rapids为AI、数据分析、科学计算等计算密集型业务提供卓越性能。" \
"max_tokens=32" \
@@ -212,125 +213,141 @@ function validate_megaservice_text() {
"stream=True"
echo ">>> Checking text data in form format, upload file"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"TEI" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/short.txt" \
"max_tokens=32" \
"language=en"
"language=en" \
"stream=False"
}
function validate_megaservice_multimedia() {
echo ">>> Checking audio data in json format"
validate_services_json \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"well" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}"
"json" \
"{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\", \"stream\": \"False\"}"
echo ">>> Checking audio data in form format"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"you" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=audio" \
"messages=UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA" \
"max_tokens=32" \
"language=en" \
"stream=True"
"stream=False"
echo ">>> Checking video data in json format"
validate_services_json \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"bye" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}"
"json" \
"{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\", \"stream\": \"False\"}"
echo ">>> Checking video data in form format"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"bye" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=video" \
"messages=\"$(input_data_for_test "video")\"" \
"max_tokens=32" \
"language=en" \
"stream=True"
"stream=False"
}
function validate_megaservice_long_text() {
echo ">>> Checking long text data in form format, set summary_type=auto"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"Intel" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"max_tokens=128" \
"summary_type=auto"
"summary_type=auto" \
"stream=False"
echo ">>> Checking long text data in form format, set summary_type=stuff"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"TEI" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"files=@$ROOT_FOLDER/data/short.txt" \
"max_tokens=128" \
"summary_type=stuff"
"summary_type=stuff" \
"stream=False"
echo ">>> Checking long text data in form format, set summary_type=truncate"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"Intel" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"max_tokens=128" \
"summary_type=truncate"
"summary_type=truncate" \
"stream=False"
echo ">>> Checking long text data in form format, set summary_type=map_reduce"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"Intel" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"max_tokens=128" \
"summary_type=map_reduce"
"summary_type=map_reduce" \
"stream=False"
echo ">>> Checking long text data in form format, set summary_type=refine"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"Intel" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"max_tokens=128" \
"summary_type=refine"
"summary_type=refine" \
"stream=False"
}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
docker compose stop && docker compose rm -f
docker compose -f compose.yaml stop && docker compose rm -f
}
function main() {

View File

@@ -12,22 +12,24 @@ export host_ip=$(hostname -I | awk '{print $1}')
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export no_proxy="${no_proxy},${host_ip}"
export MODEL_CACHE=${model_cache:-"./data"}
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_ENDPOINT_PORT=8008
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export MAX_INPUT_TOKENS=2048
export MAX_TOTAL_TOKENS=4096
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OpeaDocSumvLLM"
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
export no_proxy="${no_proxy},${host_ip}"
export LLM_ENDPOINT_PORT=8008
export DOCSUM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_PORT=8888
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"
export LOGFLAG=True
WORKPATH=$(dirname "$PWD")
@@ -38,17 +40,33 @@ ROOT_FOLDER=$(dirname "$(readlink -f "$0")")
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
pushd GenAIComps
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
git clone https://github.com/vllm-project/vllm.git && cd vllm
VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )"
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null
cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="docsum docsum-gradio-ui whisper llm-docsum"
service_list="docsum docsum-gradio-ui whisper llm-docsum vllm"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/text-generation-inference:1.4
docker images && sleep 1s
}
@@ -83,118 +101,95 @@ input_data_for_test() {
esac
}
function validate_services_json() {
function validate_service() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local INPUT_DATA="$5"
local VALIDATE_TYPE="$5"
local INPUT_DATA="$6"
local FORM_DATA1="$7"
local FORM_DATA2="$8"
local FORM_DATA3="$9"
local FORM_DATA4="${10}"
local FORM_DATA5="${11}"
local FORM_DATA6="${12}"
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
echo "==========================================="
if [ "$HTTP_STATUS" -eq 200 ]; then
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
echo "[ $SERVICE_NAME ] Content is as expected."
if [[ $VALIDATE_TYPE == *"json"* ]]; then
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
else
CURL_CMD=(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$FORM_DATA1" -F "$FORM_DATA2" -F "$FORM_DATA3" -F "$FORM_DATA4" -F "$FORM_DATA5" -H 'Content-Type: multipart/form-data' "$URL")
if [[ -n "$FORM_DATA6" ]]; then
CURL_CMD+=(-F "$FORM_DATA6")
fi
HTTP_RESPONSE=$("${CURL_CMD[@]}")
fi
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
# check response status
if [ "$HTTP_STATUS" -ne "200" ]; then
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
exit 1
else
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
fi
# check response body
if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then
echo "EXPECTED_RESULT==> $EXPECTED_RESULT"
echo "CONTENT==> $CONTENT"
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
echo "RESPONSE_BODY==> $RESPONSE_BODY"
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
exit 1
fi
else
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
sleep 1s
}
function validate_services_form() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local FORM_DATA1="$5"
local FORM_DATA2="$6"
local FORM_DATA3="$7"
local FORM_DATA4="$8"
local FORM_DATA5="$9"
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$FORM_DATA1" -F "$FORM_DATA2" -F "$FORM_DATA3" -F "$FORM_DATA4" -F "$FORM_DATA5" -H 'Content-Type: multipart/form-data' "$URL")
if [ "$HTTP_STATUS" -eq 200 ]; then
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
local CONTENT=$(curl -s -X POST -F "$FORM_DATA1" -F "$FORM_DATA2" -F "$FORM_DATA3" -F "$FORM_DATA4" -F "$FORM_DATA5" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
echo "[ $SERVICE_NAME ] Content is as expected."
else
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
else
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
sleep 1s
}
function validate_microservices() {
# Check if the microservices are running correctly.
# tgi for llm service
validate_services_json \
"${host_ip}:8008/generate" \
"generated_text" \
"tgi-server" \
"tgi-server" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
# llm microservice
validate_services_json \
"${host_ip}:9000/v1/docsum" \
validate_service \
"${host_ip}:${LLM_PORT}/v1/docsum" \
"text" \
"llm-docsum-tgi" \
"llm-docsum-server" \
"llm-docsum-vllm" \
"docsum-xeon-llm-server" \
"json" \
'{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
# whisper microservice
ulimit -s 65536
validate_services_json \
validate_service \
"${host_ip}:7066/v1/asr" \
'{"asr_result":"well"}' \
"whisper" \
"whisper-server" \
"docsum-xeon-whisper-server" \
"json" \
"{\"audio\": \"$(input_data_for_test "audio")\"}"
}
function validate_megaservice_text() {
echo ">>> Checking text data in json format"
validate_services_json \
"${host_ip}:8888/v1/docsum" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"[DONE]" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"json" \
'{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
echo ">>> Checking text data in form format, set language=en"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"[DONE]" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=text" \
"messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \
"max_tokens=32" \
@@ -202,11 +197,12 @@ function validate_megaservice_text() {
"stream=True"
echo ">>> Checking text data in form format, set language=zh"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"[DONE]" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=text" \
"messages=2024年9月26日北京——今日英特尔正式发布英特尔® 至强® 6性能核处理器代号Granite Rapids为AI、数据分析、科学计算等计算密集型业务提供卓越性能。" \
"max_tokens=32" \
@@ -214,120 +210,136 @@ function validate_megaservice_text() {
"stream=True"
echo ">>> Checking text data in form format, upload file"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"TEI" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/short.txt" \
"max_tokens=32" \
"language=en"
"language=en" \
"stream=False"
}
function validate_megaservice_multimedia() {
echo ">>> Checking audio data in json format"
validate_services_json \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"well" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}"
"json" \
"{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\", \"stream\": \"False\"}"
echo ">>> Checking audio data in form format"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"you" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=audio" \
"messages=UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA" \
"max_tokens=32" \
"language=en" \
"stream=True"
"stream=False"
echo ">>> Checking video data in json format"
validate_services_json \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"bye" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}"
"json" \
"{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\", \"stream\": \"False\"}"
echo ">>> Checking video data in form format"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"bye" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=video" \
"messages=\"$(input_data_for_test "video")\"" \
"max_tokens=32" \
"language=en" \
"stream=True"
"stream=False"
}
function validate_megaservice_long_text() {
echo ">>> Checking long text data in form format, set summary_type=auto"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"Intel" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"max_tokens=128" \
"summary_type=auto"
"summary_type=auto" \
"stream=False"
echo ">>> Checking long text data in form format, set summary_type=stuff"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"TEI" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"files=@$ROOT_FOLDER/data/short.txt" \
"max_tokens=128" \
"summary_type=stuff"
"summary_type=stuff" \
"stream=False"
echo ">>> Checking long text data in form format, set summary_type=truncate"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"Intel" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"max_tokens=128" \
"summary_type=truncate"
"summary_type=truncate" \
"stream=False"
echo ">>> Checking long text data in form format, set summary_type=map_reduce"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"Intel" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"max_tokens=128" \
"summary_type=map_reduce"
"summary_type=map_reduce" \
"stream=False"
echo ">>> Checking long text data in form format, set summary_type=refine"
validate_services_form \
"${host_ip}:8888/v1/docsum" \
"[DONE]" \
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"Intel" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"max_tokens=128" \
"summary_type=refine"
"summary_type=refine" \
"stream=False"
}
function stop_docker() {

View File

@@ -0,0 +1,386 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -xe
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
export http_proxy=$http_proxy
export https_proxy=$https_proxy
export host_ip=$(hostname -I | awk '{print $1}')
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export no_proxy="${no_proxy},${host_ip}"
export MODEL_CACHE=${model_cache:-"./data"}
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_ENDPOINT_PORT=8008
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export MAX_INPUT_TOKENS=2048
export MAX_TOTAL_TOKENS=4096
export LLM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_PORT=8888
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"
export LOGFLAG=True
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
# Get the root folder of the current script
ROOT_FOLDER=$(dirname "$(readlink -f "$0")")
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
pushd GenAIComps
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="docsum docsum-gradio-ui whisper llm-docsum"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
docker images && sleep 1s
}
function start_services() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
sleep 1m
}
get_base64_str() {
local file_name=$1
base64 -w 0 "$file_name"
}
# Function to generate input data for testing based on the document type
input_data_for_test() {
local document_type=$1
case $document_type in
("text")
echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are."
;;
("audio")
get_base64_str "$ROOT_FOLDER/data/test.wav"
;;
("video")
get_base64_str "$ROOT_FOLDER/data/test.mp4"
;;
(*)
echo "Invalid document type" >&2
exit 1
;;
esac
}
function validate_service() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local VALIDATE_TYPE="$5"
local INPUT_DATA="$6"
local FORM_DATA1="$7"
local FORM_DATA2="$8"
local FORM_DATA3="$9"
local FORM_DATA4="${10}"
local FORM_DATA5="${11}"
local FORM_DATA6="${12}"
if [[ $VALIDATE_TYPE == *"json"* ]]; then
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
else
CURL_CMD=(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$FORM_DATA1" -F "$FORM_DATA2" -F "$FORM_DATA3" -F "$FORM_DATA4" -F "$FORM_DATA5" -H 'Content-Type: multipart/form-data' "$URL")
if [[ -n "$FORM_DATA6" ]]; then
CURL_CMD+=(-F "$FORM_DATA6")
fi
HTTP_RESPONSE=$("${CURL_CMD[@]}")
fi
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
# check response status
if [ "$HTTP_STATUS" -ne "200" ]; then
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
exit 1
else
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
fi
# check response body
if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then
echo "EXPECTED_RESULT==> $EXPECTED_RESULT"
echo "RESPONSE_BODY==> $RESPONSE_BODY"
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
exit 1
else
echo "[ $SERVICE_NAME ] Content is as expected."
fi
sleep 1s
}
function validate_microservices() {
# Check if the microservices are running correctly.
# tgi for llm service
validate_service \
"${host_ip}:${LLM_ENDPOINT_PORT}/generate" \
"generated_text" \
"tgi-server" \
"docsum-gaudi-tgi-server" \
"json" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
# llm microservice
validate_service \
"${host_ip}:${LLM_PORT}/v1/docsum" \
"text" \
"llm-docsum-tgi" \
"docsum-gaudi-llm-server" \
"json" \
'{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
# whisper microservice
ulimit -s 65536
validate_service \
"${host_ip}:7066/v1/asr" \
'{"asr_result":"well"}' \
"whisper" \
"docsum-gaudi-whisper-server" \
"json" \
"{\"audio\": \"$(input_data_for_test "audio")\"}"
}
function validate_megaservice_text() {
echo ">>> Checking text data in json format"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"[DONE]" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"json" \
'{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
echo ">>> Checking text data in form format, set language=en"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"[DONE]" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=text" \
"messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \
"max_tokens=32" \
"language=en" \
"stream=True"
echo ">>> Checking text data in form format, set language=zh"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"[DONE]" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=text" \
"messages=2024年9月26日北京——今日英特尔正式发布英特尔® 至强® 6性能核处理器代号Granite Rapids为AI、数据分析、科学计算等计算密集型业务提供卓越性能。" \
"max_tokens=32" \
"language=zh" \
"stream=True"
echo ">>> Checking text data in form format, upload file"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"TEI" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/short.txt" \
"max_tokens=32" \
"language=en" \
"stream=False"
}
function validate_megaservice_multimedia() {
echo ">>> Checking audio data in json format"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"well" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"json" \
"{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\", \"stream\": \"False\"}"
echo ">>> Checking audio data in form format"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"you" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=audio" \
"messages=UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA" \
"max_tokens=32" \
"language=en" \
"stream=False"
echo ">>> Checking video data in json format"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"bye" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"json" \
"{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\", \"stream\": \"False\"}"
echo ">>> Checking video data in form format"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"bye" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=video" \
"messages=\"$(input_data_for_test "video")\"" \
"max_tokens=32" \
"language=en" \
"stream=False"
}
function validate_megaservice_long_text() {
echo ">>> Checking long text data in form format, set summary_type=auto"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"Intel" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"max_tokens=128" \
"summary_type=auto" \
"stream=False"
echo ">>> Checking long text data in form format, set summary_type=stuff"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"TEI" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/short.txt" \
"max_tokens=128" \
"summary_type=stuff" \
"stream=False"
echo ">>> Checking long text data in form format, set summary_type=truncate"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"Intel" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"max_tokens=128" \
"summary_type=truncate" \
"stream=False"
echo ">>> Checking long text data in form format, set summary_type=map_reduce"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"Intel" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"max_tokens=128" \
"summary_type=map_reduce" \
"stream=False"
echo ">>> Checking long text data in form format, set summary_type=refine"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"Intel" \
"docsum-gaudi-backend-server" \
"docsum-gaudi-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"max_tokens=128" \
"summary_type=refine" \
"stream=False"
}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
docker compose -f compose_tgi.yaml stop && docker compose rm -f
}
function main() {
echo "==========================================="
echo ">>>> Stopping any running Docker containers..."
stop_docker
echo "==========================================="
if [[ "$IMAGE_REPO" == "opea" ]]; then
echo ">>>> Building Docker images..."
build_docker_images
fi
echo "==========================================="
echo ">>>> Starting Docker services..."
start_services
echo "==========================================="
echo ">>>> Validating microservices..."
validate_microservices
echo "==========================================="
echo ">>>> Validating megaservice for text..."
validate_megaservice_text
echo "==========================================="
echo ">>>> Validating megaservice for multimedia..."
validate_megaservice_multimedia
echo "==========================================="
echo ">>>> Validating megaservice for long text..."
validate_megaservice_long_text
echo "==========================================="
echo ">>>> Stopping Docker containers..."
stop_docker
echo "==========================================="
echo ">>>> Pruning Docker system..."
echo y | docker system prune
echo ">>>> Docker system pruned successfully."
echo "==========================================="
}
main

View File

@@ -0,0 +1,385 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -xe
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
export http_proxy=$http_proxy
export https_proxy=$https_proxy
export host_ip=$(hostname -I | awk '{print $1}')
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export no_proxy="${no_proxy},${host_ip}"
export MODEL_CACHE=${model_cache:-"./data"}
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_ENDPOINT_PORT=8008
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export MAX_INPUT_TOKENS=2048
export MAX_TOTAL_TOKENS=4096
export LLM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_PORT=8888
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"
export LOGFLAG=True
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
# Get the root folder of the current script
ROOT_FOLDER=$(dirname "$(readlink -f "$0")")
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
pushd GenAIComps
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
popd && sleep 1s
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="docsum docsum-gradio-ui whisper llm-docsum"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/text-generation-inference:1.4
docker images && sleep 1s
}
function start_services() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
sleep 1m
}
get_base64_str() {
local file_name=$1
base64 -w 0 "$file_name"
}
# Function to generate input data for testing based on the document type
input_data_for_test() {
local document_type=$1
case $document_type in
("text")
echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are."
;;
("audio")
get_base64_str "$ROOT_FOLDER/data/test.wav"
;;
("video")
get_base64_str "$ROOT_FOLDER/data/test.mp4"
;;
(*)
echo "Invalid document type" >&2
exit 1
;;
esac
}
function validate_service() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local VALIDATE_TYPE="$5"
local INPUT_DATA="$6"
local FORM_DATA1="$7"
local FORM_DATA2="$8"
local FORM_DATA3="$9"
local FORM_DATA4="${10}"
local FORM_DATA5="${11}"
local FORM_DATA6="${12}"
if [[ $VALIDATE_TYPE == *"json"* ]]; then
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
else
CURL_CMD=(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$FORM_DATA1" -F "$FORM_DATA2" -F "$FORM_DATA3" -F "$FORM_DATA4" -F "$FORM_DATA5" -H 'Content-Type: multipart/form-data' "$URL")
if [[ -n "$FORM_DATA6" ]]; then
CURL_CMD+=(-F "$FORM_DATA6")
fi
HTTP_RESPONSE=$("${CURL_CMD[@]}")
fi
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
# check response status
if [ "$HTTP_STATUS" -ne "200" ]; then
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
exit 1
else
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
fi
# check response body
if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then
echo "EXPECTED_RESULT==> $EXPECTED_RESULT"
echo "RESPONSE_BODY==> $RESPONSE_BODY"
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
exit 1
else
echo "[ $SERVICE_NAME ] Content is as expected."
fi
sleep 1s
}
function validate_microservices() {
# Check if the microservices are running correctly.
# tgi for llm service
validate_service \
"${host_ip}:${LLM_ENDPOINT_PORT}/generate" \
"generated_text" \
"tgi-server" \
"docsum-xeon-tgi-server" \
"json" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
# llm microservice
validate_service \
"${host_ip}:${LLM_PORT}/v1/docsum" \
"text" \
"llm-docsum-tgi" \
"docsum-xeon-llm-server" \
"json" \
'{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
# whisper microservice
ulimit -s 65536
validate_service \
"${host_ip}:7066/v1/asr" \
'{"asr_result":"well"}' \
"whisper" \
"docsum-xeon-whisper-server" \
"json" \
"{\"audio\": \"$(input_data_for_test "audio")\"}"
}
function validate_megaservice_text() {
echo ">>> Checking text data in json format"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"[DONE]" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"json" \
'{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
echo ">>> Checking text data in form format, set language=en"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"[DONE]" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=text" \
"messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \
"max_tokens=32" \
"language=en" \
"stream=True"
echo ">>> Checking text data in form format, set language=zh"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"[DONE]" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=text" \
"messages=2024年9月26日北京——今日英特尔正式发布英特尔® 至强® 6性能核处理器代号Granite Rapids为AI、数据分析、科学计算等计算密集型业务提供卓越性能。" \
"max_tokens=32" \
"language=zh" \
"stream=True"
echo ">>> Checking text data in form format, upload file"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"TEI" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/short.txt" \
"max_tokens=32" \
"language=en" \
"stream=False"
}
function validate_megaservice_multimedia() {
echo ">>> Checking audio data in json format"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"well" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"json" \
"{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\", \"stream\": \"False\"}"
echo ">>> Checking audio data in form format"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"you" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=audio" \
"messages=UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA" \
"max_tokens=32" \
"language=en" \
"stream=False"
echo ">>> Checking video data in json format"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"bye" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"json" \
"{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\", \"stream\": \"False\"}"
echo ">>> Checking video data in form format"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"bye" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=video" \
"messages=\"$(input_data_for_test "video")\"" \
"max_tokens=32" \
"language=en" \
"stream=False"
}
function validate_megaservice_long_text() {
echo ">>> Checking long text data in form format, set summary_type=auto"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"Intel" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"max_tokens=128" \
"summary_type=auto" \
"stream=False"
echo ">>> Checking long text data in form format, set summary_type=stuff"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"TEI" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/short.txt" \
"max_tokens=128" \
"summary_type=stuff" \
"stream=False"
echo ">>> Checking long text data in form format, set summary_type=truncate"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"Intel" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"max_tokens=128" \
"summary_type=truncate" \
"stream=False"
echo ">>> Checking long text data in form format, set summary_type=map_reduce"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"Intel" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"max_tokens=128" \
"summary_type=map_reduce" \
"stream=False"
echo ">>> Checking long text data in form format, set summary_type=refine"
validate_service \
"${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \
"Intel" \
"docsum-xeon-backend-server" \
"docsum-xeon-backend-server" \
"media" "" \
"type=text" \
"messages=" \
"files=@$ROOT_FOLDER/data/long.txt" \
"max_tokens=128" \
"summary_type=refine" \
"stream=False"
}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/
docker compose -f compose_tgi.yaml stop && docker compose rm -f
}
function main() {
echo "==========================================="
echo ">>>> Stopping any running Docker containers..."
stop_docker
echo "==========================================="
if [[ "$IMAGE_REPO" == "opea" ]]; then
echo ">>>> Building Docker images..."
build_docker_images
fi
echo "==========================================="
echo ">>>> Starting Docker services..."
start_services
echo "==========================================="
echo ">>>> Validating microservices..."
validate_microservices
echo "==========================================="
echo ">>>> Validating megaservice for text..."
validate_megaservice_text
echo "==========================================="
echo ">>>> Validating megaservice for multimedia..."
validate_megaservice_multimedia
echo "==========================================="
echo ">>>> Validating megaservice for long text..."
validate_megaservice_long_text
echo "==========================================="
echo ">>>> Stopping Docker containers..."
stop_docker
echo "==========================================="
echo ">>>> Pruning Docker system..."
echo y | docker system prune
echo ">>>> Docker system pruned successfully."
echo "==========================================="
}
main