Add guardrails in the ChatQnA pipeline (#407)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
This commit is contained in:
lvliang-intel
2024-07-17 21:09:54 +08:00
committed by GitHub
parent 97da49f61e
commit 9551594164
6 changed files with 672 additions and 2 deletions

View File

@@ -0,0 +1,32 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
FROM python:3.11-slim
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
libgl1-mesa-glx \
libjemalloc-dev \
vim \
git
RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/
RUN cd /home/user/ && \
git clone https://github.com/opea-project/GenAIComps.git
RUN cd /home/user/GenAIComps && pip install --no-cache-dir --upgrade pip && \
pip install -r /home/user/GenAIComps/requirements.txt
COPY ./chatqna_guardrails.py /home/user/chatqna_guardrails.py
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
USER user
WORKDIR /home/user
ENTRYPOINT ["python", "chatqna_guardrails.py"]

View File

@@ -1,7 +1,6 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import asyncio
import os
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType

View File

@@ -0,0 +1,79 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
GUARDRAIL_SERVICE_HOST_IP = os.getenv("GUARDRAIL_SERVICE_HOST_IP", "0.0.0.0")
GUARDRAIL_SERVICE_PORT = int(os.getenv("GUARDRAIL_SERVICE_PORT", 9090))
EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
class ChatQnAService:
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
self.megaservice = ServiceOrchestrator()
def add_remote_service(self):
guardrail = MicroService(
name="guardrail",
host=GUARDRAIL_SERVICE_HOST_IP,
port=GUARDRAIL_SERVICE_PORT,
endpoint="/v1/guardrails",
use_remote_service=True,
service_type=ServiceType.GUARDRAIL,
)
embedding = MicroService(
name="embedding",
host=EMBEDDING_SERVICE_HOST_IP,
port=EMBEDDING_SERVICE_PORT,
endpoint="/v1/embeddings",
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
retriever = MicroService(
name="retriever",
host=RETRIEVER_SERVICE_HOST_IP,
port=RETRIEVER_SERVICE_PORT,
endpoint="/v1/retrieval",
use_remote_service=True,
service_type=ServiceType.RETRIEVER,
)
rerank = MicroService(
name="rerank",
host=RERANK_SERVICE_HOST_IP,
port=RERANK_SERVICE_PORT,
endpoint="/v1/reranking",
use_remote_service=True,
service_type=ServiceType.RERANK,
)
llm = MicroService(
name="llm",
host=LLM_SERVICE_HOST_IP,
port=LLM_SERVICE_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
)
self.megaservice.add(guardrail).add(embedding).add(retriever).add(rerank).add(llm)
self.megaservice.flow_to(guardrail, embedding)
self.megaservice.flow_to(embedding, retriever)
self.megaservice.flow_to(retriever, rerank)
self.megaservice.flow_to(rerank, llm)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
if __name__ == "__main__":
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
chatqna.add_remote_service()

View File

@@ -65,6 +65,15 @@ docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_pr
cd ../../..
```
If you want to enable guardrails microservice in the pipeline, please use the below command instead:
```bash
git clone https://github.com/opea-project/GenAIExamples.git
cd GenAIExamples/ChatQnA/docker
docker build --no-cache -t opea/chatqna-guardrails:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile_guardrails .
cd ../../..
```
### 9. Build UI Docker Image
Construct the frontend Docker image using the command below:
@@ -89,6 +98,16 @@ docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https
cd ../../../..
```
### 11. Build Guardrails Docker Image (Optional)
To fortify AI initiatives in production, Guardrails microservice can secure model inputs and outputs, building Trustworthy, Safe, and Secure LLM-based Applications.
```bash
cd GenAIExamples/ChatQnA/docker
docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/langchain/docker/Dockerfile .
cd ../../..
```
Then run the command `docker images`, you will have the following 8 Docker Images:
1. `opea/embedding-tei:latest`
@@ -97,13 +116,17 @@ Then run the command `docker images`, you will have the following 8 Docker Image
4. `opea/llm-tgi:latest`
5. `opea/tei-gaudi:latest`
6. `opea/dataprep-redis:latest`
7. `opea/chatqna:latest`
7. `opea/chatqna:latest` or `opea/chatqna-guardrails:latest`
8. `opea/chatqna-ui:latest`
If Conversation React UI is built, you will find one more image:
9. `opea/chatqna-conversation-ui:latest`
If Guardrails docker image is built, you will find one more image:
10. `opea/guardrails-tgi:latest`
## 🚀 Start MicroServices and MegaService
### Setup Environment Variables
@@ -134,6 +157,14 @@ export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
```
If guardrails microservice is enabled in the pipeline, the below environment variables are necessary to be set.
```bash
export GURADRAILS_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
export SAFETY_GUARD_ENDPOINT="http://${host_ip}:8088"
export GUARDRAIL_SERVICE_HOST_IP=${host_ip}
```
Note: Please replace with `host_ip` with you external IP address, do **NOT** use localhost.
### Start all the services Docker Containers
@@ -143,6 +174,13 @@ cd GenAIExamples/ChatQnA/docker/gaudi/
docker compose -f docker_compose.yaml up -d
```
If you want to enable guardrails microservice in the pipeline, please follow the below command instead:
```bash
cd GenAIExamples/ChatQnA/docker/gaudi/
docker compose -f docker_compose_guardrails.yaml up -d
```
### Validate MicroServices and MegaService
Follow the instructions to validate MicroServices.
@@ -276,6 +314,15 @@ curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
-H "Content-Type: application/json"
```
10. Guardrails (Optional)
```bash
curl http://${host_ip}:9090/v1/guardrails\
-X POST \
-d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
-H 'Content-Type: application/json'
```
## Enable LangSmith for Monotoring Application (Optional)
LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f docker_compose.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key.

View File

@@ -0,0 +1,244 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
version: "3.8"
services:
redis-vector-db:
image: redis/redis-stack:7.2.0-v9
container_name: redis-vector-db
ports:
- "6379:6379"
- "8001:8001"
dataprep-redis-service:
image: opea/dataprep-redis:latest
container_name: dataprep-redis-server
depends_on:
- redis-vector-db
ports:
- "6007:6007"
- "6008:6008"
- "6009:6009"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
tgi-guardrails-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
container_name: tgi-guardrails-server
ports:
- "8088:80"
volumes:
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model-id ${GURADRAILS_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
guardrails:
image: opea/guardrails-tgi:latest
container_name: guardrails-tgi-gaudi-server
ports:
- "9090:9090"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
SAFETY_GUARD_MODEL_ID: ${SAFETY_GUARD_MODEL_ID}
SAFETY_GUARD_ENDPOINT: ${SAFETY_GUARD_ENDPOINT}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-embedding-service:
image: opea/tei-gaudi:latest
container_name: tei-embedding-gaudi-server
ports:
- "8090:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID}
embedding:
image: opea/embedding-tei:latest
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-embedding-service"
restart: unless-stopped
retriever:
image: opea/retriever-redis:latest
container_name: retriever-redis-server
depends_on:
- redis-vector-db
ports:
- "7000:7000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-retriever-service"
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
container_name: tei-reranking-gaudi-server
ports:
- "8808:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: opea/reranking-tei:latest
container_name: reranking-tei-gaudi-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-reranking-service"
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
container_name: tgi-gaudi-server
ports:
- "8008:80"
volumes:
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
llm:
image: opea/llm-tgi:latest
container_name: llm-tgi-gaudi-server
depends_on:
- tgi-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-llm-service"
restart: unless-stopped
chaqna-gaudi-backend-server:
image: opea/chatqna-guardrails:latest
container_name: chatqna-gaudi-guardrails-server
depends_on:
- redis-vector-db
- tgi-guardrails-service
- guardrails
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- tgi-service
- llm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- GUARDRAIL_SERVICE_HOST_IP=${GUARDRAIL_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
ipc: host
restart: always
chaqna-gaudi-ui-server:
image: opea/chatqna-ui:latest
container_name: chatqna-gaudi-ui-server
depends_on:
- chaqna-gaudi-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -0,0 +1,269 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -e
echo "IMAGE_REPO=${IMAGE_REPO}"
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
docker build -t opea/embedding-tei:latest -f comps/embeddings/langchain/docker/Dockerfile .
docker build -t opea/retriever-redis:latest -f comps/retrievers/langchain/redis/docker/Dockerfile .
docker build -t opea/reranking-tei:latest -f comps/reranks/tei/docker/Dockerfile .
docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile .
docker build -t opea/dataprep-redis:latest -f comps/dataprep/redis/langchain/docker/Dockerfile .
docker build -t opea/guardrails-tgi:latest -f comps/guardrails/langchain/docker/Dockerfile .
# cd ..
# git clone https://github.com/huggingface/tei-gaudi
# cd tei-gaudi/
# docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest .
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
cd $WORKPATH/docker
docker build --no-cache -t opea/chatqna-guardrails:latest -f Dockerfile_guardrails .
cd $WORKPATH/docker/ui
docker build --no-cache -t opea/chatqna-ui:latest -f docker/Dockerfile .
docker images
}
function start_services() {
# build tei-gaudi for each test instead of pull from local registry
cd $WORKPATH
git clone https://github.com/huggingface/tei-gaudi
cd tei-gaudi/
docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest .
cd $WORKPATH/docker/gaudi
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:8090"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
export REDIS_URL="redis://${ip_address}:6379"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export GUARDRAIL_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
export GURADRAILS_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
export SAFETY_GUARD_ENDPOINT="http://${ip_address}:8088"
sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env
if [[ "$IMAGE_REPO" != "" ]]; then
# Replace the container name with a test-specific name
echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
sed -i "s#image: opea/chatqna-guardrails:latest#image: opea/chatqna:${IMAGE_TAG}#g" docker_compose_guardrails.yaml
sed -i "s#image: opea/chatqna-ui:latest#image: opea/chatqna-ui:${IMAGE_TAG}#g" docker_compose_guardrails.yaml
sed -i "s#image: opea/chatqna-conversation-ui:latest#image: opea/chatqna-conversation-ui:${IMAGE_TAG}#g" docker_compose_guardrails.yaml
sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" docker_compose_guardrails.yaml
sed -i "s#image: ${IMAGE_REPO}opea/tei-gaudi:latest#image: opea/tei-gaudi:latest#g" docker_compose_guardrails.yaml
echo "cat docker_compose_guardrails.yaml"
cat docker_compose_guardrails.yaml
fi
# Start Docker Containers
docker compose -f docker_compose_guardrails.yaml up -d
n=0
until [[ "$n" -ge 400 ]]; do
docker logs tgi-gaudi-server > tgi_service_start.log
if grep -q Connected tgi_service_start.log; then
break
fi
sleep 1s
n=$((n+1))
done
}
function validate_services() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local INPUT_DATA="$5"
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
if [ "$HTTP_STATUS" -eq 200 ]; then
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
echo "[ $SERVICE_NAME ] Content is as expected."
else
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
else
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
sleep 1s
}
function validate_microservices() {
# Check if the microservices are running correctly.
# tei for embedding service
validate_services \
"${ip_address}:8090/embed" \
"\[\[" \
"tei-embedding" \
"tei-embedding-gaudi-server" \
'{"inputs":"What is Deep Learning?"}'
# embedding microservice
validate_services \
"${ip_address}:6000/v1/embeddings" \
'"text":"What is Deep Learning?","embedding":\[' \
"embedding" \
"embedding-tei-server" \
'{"text":"What is Deep Learning?"}'
sleep 1m # retrieval can't curl as expected, try to wait for more time
# retrieval microservice
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
validate_services \
"${ip_address}:7000/v1/retrieval" \
" " \
"retrieval" \
"retriever-redis-server" \
"{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}"
# tei for rerank microservice
validate_services \
"${ip_address}:8808/rerank" \
'{"index":1,"score":' \
"tei-rerank" \
"tei-reranking-gaudi-server" \
'{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
# rerank microservice
validate_services \
"${ip_address}:8000/v1/reranking" \
"Deep learning is..." \
"rerank" \
"reranking-tei-gaudi-server" \
'{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}'
# tgi for llm service
validate_services \
"${ip_address}:8008/generate" \
"generated_text" \
"tgi-llm" \
"tgi-gaudi-server" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
# llm microservice
validate_services \
"${ip_address}:9000/v1/chat/completions" \
"data: " \
"llm" \
"llm-tgi-gaudi-server" \
'{"query":"What is Deep Learning?"}'
# tgi for guardrails service
validate_services \
"${ip_address}:8008/generate" \
"generated_text" \
"tgi-guardrails" \
"tgi-guardrails-server" \
'{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}'
# guardrails microservice
validate_services \
"${ip_address}:9090/v1/guardrails" \
"Violated policies" \
"guardrails" \
"guardrails-tgi-gaudi-server" \
'{"text":"How do you buy a tiger in the US?"}'
}
function validate_megaservice() {
# Curl the Mega Service
validate_services \
"${ip_address}:8888/v1/chatqna" \
"billion" \
"mega-chatqna" \
"chatqna-gaudi-guardrails-server" \
'{"messages": "What is the revenue of Nike in 2023?"}'
}
function validate_frontend() {
cd $WORKPATH/docker/ui/svelte
local conda_env_name="OPEA_e2e"
export PATH=${HOME}/miniforge3/bin/:$PATH
# conda remove -n ${conda_env_name} --all -y
# conda create -n ${conda_env_name} python=3.12 -y
source activate ${conda_env_name}
sed -i "s/localhost/$ip_address/g" playwright.config.ts
# conda install -c conda-forge nodejs -y
npm install && npm ci && npx playwright install --with-deps
node -v && npm -v && pip list
exit_status=0
npx playwright test || exit_status=$?
if [ $exit_status -ne 0 ]; then
echo "[TEST INFO]: ---------frontend test failed---------"
exit $exit_status
else
echo "[TEST INFO]: ---------frontend test passed---------"
fi
}
function stop_docker() {
cd $WORKPATH/docker/gaudi
container_list=$(cat docker_compose_guardrails.yaml | grep container_name | cut -d':' -f2)
for container_name in $container_list; do
cid=$(docker ps -aq --filter "name=$container_name")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
done
}
function main() {
stop_docker
if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi
start_time=$(date +%s)
start_services
end_time=$(date +%s)
duration=$((end_time-start_time))
echo "Mega service start duration is $duration s"
validate_microservices
validate_megaservice
# validate_frontend
stop_docker
echo y | docker system prune
}
main