Add guardrails in the ChatQnA pipeline (#407)
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
This commit is contained in:
32
ChatQnA/docker/Dockerfile_guardrails
Normal file
32
ChatQnA/docker/Dockerfile_guardrails
Normal file
@@ -0,0 +1,32 @@
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
FROM python:3.11-slim
|
||||
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
vim \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
mkdir -p /home/user && \
|
||||
chown -R user /home/user/
|
||||
|
||||
RUN cd /home/user/ && \
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
RUN cd /home/user/GenAIComps && pip install --no-cache-dir --upgrade pip && \
|
||||
pip install -r /home/user/GenAIComps/requirements.txt
|
||||
|
||||
COPY ./chatqna_guardrails.py /home/user/chatqna_guardrails.py
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
|
||||
|
||||
USER user
|
||||
|
||||
WORKDIR /home/user
|
||||
|
||||
ENTRYPOINT ["python", "chatqna_guardrails.py"]
|
||||
@@ -1,7 +1,6 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
|
||||
|
||||
79
ChatQnA/docker/chatqna_guardrails.py
Normal file
79
ChatQnA/docker/chatqna_guardrails.py
Normal file
@@ -0,0 +1,79 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
|
||||
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
|
||||
|
||||
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
GUARDRAIL_SERVICE_HOST_IP = os.getenv("GUARDRAIL_SERVICE_HOST_IP", "0.0.0.0")
|
||||
GUARDRAIL_SERVICE_PORT = int(os.getenv("GUARDRAIL_SERVICE_PORT", 9090))
|
||||
EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
|
||||
EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
|
||||
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
|
||||
RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
|
||||
RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
|
||||
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
|
||||
|
||||
|
||||
class ChatQnAService:
|
||||
def __init__(self, host="0.0.0.0", port=8000):
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
|
||||
def add_remote_service(self):
|
||||
guardrail = MicroService(
|
||||
name="guardrail",
|
||||
host=GUARDRAIL_SERVICE_HOST_IP,
|
||||
port=GUARDRAIL_SERVICE_PORT,
|
||||
endpoint="/v1/guardrails",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.GUARDRAIL,
|
||||
)
|
||||
embedding = MicroService(
|
||||
name="embedding",
|
||||
host=EMBEDDING_SERVICE_HOST_IP,
|
||||
port=EMBEDDING_SERVICE_PORT,
|
||||
endpoint="/v1/embeddings",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
)
|
||||
retriever = MicroService(
|
||||
name="retriever",
|
||||
host=RETRIEVER_SERVICE_HOST_IP,
|
||||
port=RETRIEVER_SERVICE_PORT,
|
||||
endpoint="/v1/retrieval",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RETRIEVER,
|
||||
)
|
||||
rerank = MicroService(
|
||||
name="rerank",
|
||||
host=RERANK_SERVICE_HOST_IP,
|
||||
port=RERANK_SERVICE_PORT,
|
||||
endpoint="/v1/reranking",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.RERANK,
|
||||
)
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVICE_HOST_IP,
|
||||
port=LLM_SERVICE_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
self.megaservice.add(guardrail).add(embedding).add(retriever).add(rerank).add(llm)
|
||||
self.megaservice.flow_to(guardrail, embedding)
|
||||
self.megaservice.flow_to(embedding, retriever)
|
||||
self.megaservice.flow_to(retriever, rerank)
|
||||
self.megaservice.flow_to(rerank, llm)
|
||||
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
|
||||
chatqna.add_remote_service()
|
||||
@@ -65,6 +65,15 @@ docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_pr
|
||||
cd ../../..
|
||||
```
|
||||
|
||||
If you want to enable guardrails microservice in the pipeline, please use the below command instead:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
cd GenAIExamples/ChatQnA/docker
|
||||
docker build --no-cache -t opea/chatqna-guardrails:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile_guardrails .
|
||||
cd ../../..
|
||||
```
|
||||
|
||||
### 9. Build UI Docker Image
|
||||
|
||||
Construct the frontend Docker image using the command below:
|
||||
@@ -89,6 +98,16 @@ docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https
|
||||
cd ../../../..
|
||||
```
|
||||
|
||||
### 11. Build Guardrails Docker Image (Optional)
|
||||
|
||||
To fortify AI initiatives in production, Guardrails microservice can secure model inputs and outputs, building Trustworthy, Safe, and Secure LLM-based Applications.
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker
|
||||
docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/langchain/docker/Dockerfile .
|
||||
cd ../../..
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have the following 8 Docker Images:
|
||||
|
||||
1. `opea/embedding-tei:latest`
|
||||
@@ -97,13 +116,17 @@ Then run the command `docker images`, you will have the following 8 Docker Image
|
||||
4. `opea/llm-tgi:latest`
|
||||
5. `opea/tei-gaudi:latest`
|
||||
6. `opea/dataprep-redis:latest`
|
||||
7. `opea/chatqna:latest`
|
||||
7. `opea/chatqna:latest` or `opea/chatqna-guardrails:latest`
|
||||
8. `opea/chatqna-ui:latest`
|
||||
|
||||
If Conversation React UI is built, you will find one more image:
|
||||
|
||||
9. `opea/chatqna-conversation-ui:latest`
|
||||
|
||||
If Guardrails docker image is built, you will find one more image:
|
||||
|
||||
10. `opea/guardrails-tgi:latest`
|
||||
|
||||
## 🚀 Start MicroServices and MegaService
|
||||
|
||||
### Setup Environment Variables
|
||||
@@ -134,6 +157,14 @@ export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
|
||||
```
|
||||
|
||||
If guardrails microservice is enabled in the pipeline, the below environment variables are necessary to be set.
|
||||
|
||||
```bash
|
||||
export GURADRAILS_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
|
||||
export SAFETY_GUARD_ENDPOINT="http://${host_ip}:8088"
|
||||
export GUARDRAIL_SERVICE_HOST_IP=${host_ip}
|
||||
```
|
||||
|
||||
Note: Please replace with `host_ip` with you external IP address, do **NOT** use localhost.
|
||||
|
||||
### Start all the services Docker Containers
|
||||
@@ -143,6 +174,13 @@ cd GenAIExamples/ChatQnA/docker/gaudi/
|
||||
docker compose -f docker_compose.yaml up -d
|
||||
```
|
||||
|
||||
If you want to enable guardrails microservice in the pipeline, please follow the below command instead:
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/docker/gaudi/
|
||||
docker compose -f docker_compose_guardrails.yaml up -d
|
||||
```
|
||||
|
||||
### Validate MicroServices and MegaService
|
||||
|
||||
Follow the instructions to validate MicroServices.
|
||||
@@ -276,6 +314,15 @@ curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
|
||||
-H "Content-Type: application/json"
|
||||
```
|
||||
|
||||
10. Guardrails (Optional)
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:9090/v1/guardrails\
|
||||
-X POST \
|
||||
-d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
## Enable LangSmith for Monotoring Application (Optional)
|
||||
|
||||
LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f docker_compose.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key.
|
||||
|
||||
244
ChatQnA/docker/gaudi/docker_compose_guardrails.yaml
Normal file
244
ChatQnA/docker/gaudi/docker_compose_guardrails.yaml
Normal file
@@ -0,0 +1,244 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
redis-vector-db:
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
container_name: redis-vector-db
|
||||
ports:
|
||||
- "6379:6379"
|
||||
- "8001:8001"
|
||||
dataprep-redis-service:
|
||||
image: opea/dataprep-redis:latest
|
||||
container_name: dataprep-redis-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
ports:
|
||||
- "6007:6007"
|
||||
- "6008:6008"
|
||||
- "6009:6009"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
tgi-guardrails-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
container_name: tgi-guardrails-server
|
||||
ports:
|
||||
- "8088:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${GURADRAILS_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
|
||||
guardrails:
|
||||
image: opea/guardrails-tgi:latest
|
||||
container_name: guardrails-tgi-gaudi-server
|
||||
ports:
|
||||
- "9090:9090"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
SAFETY_GUARD_MODEL_ID: ${SAFETY_GUARD_MODEL_ID}
|
||||
SAFETY_GUARD_ENDPOINT: ${SAFETY_GUARD_ENDPOINT}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-embedding-service:
|
||||
image: opea/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${EMBEDDING_MODEL_ID}
|
||||
embedding:
|
||||
image: opea/embedding-tei:latest
|
||||
container_name: embedding-tei-server
|
||||
depends_on:
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-embedding-service"
|
||||
restart: unless-stopped
|
||||
retriever:
|
||||
image: opea/retriever-redis:latest
|
||||
container_name: retriever-redis-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
ports:
|
||||
- "7000:7000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-retriever-service"
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
container_name: tei-reranking-gaudi-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
reranking:
|
||||
image: opea/reranking-tei:latest
|
||||
container_name: reranking-tei-gaudi-server
|
||||
depends_on:
|
||||
- tei-reranking-service
|
||||
ports:
|
||||
- "8000:8000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-reranking-service"
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "8008:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
|
||||
llm:
|
||||
image: opea/llm-tgi:latest
|
||||
container_name: llm-tgi-gaudi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-llm-service"
|
||||
restart: unless-stopped
|
||||
chaqna-gaudi-backend-server:
|
||||
image: opea/chatqna-guardrails:latest
|
||||
container_name: chatqna-gaudi-guardrails-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tgi-guardrails-service
|
||||
- guardrails
|
||||
- tei-embedding-service
|
||||
- embedding
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- reranking
|
||||
- tgi-service
|
||||
- llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- GUARDRAIL_SERVICE_HOST_IP=${GUARDRAIL_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
ipc: host
|
||||
restart: always
|
||||
chaqna-gaudi-ui-server:
|
||||
image: opea/chatqna-ui:latest
|
||||
container_name: chatqna-gaudi-ui-server
|
||||
depends_on:
|
||||
- chaqna-gaudi-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
|
||||
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
|
||||
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
|
||||
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
269
ChatQnA/tests/test_chatqna_guardrails_on_gaudi.sh
Normal file
269
ChatQnA/tests/test_chatqna_guardrails_on_gaudi.sh
Normal file
@@ -0,0 +1,269 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
echo "IMAGE_REPO=${IMAGE_REPO}"
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
|
||||
docker build -t opea/embedding-tei:latest -f comps/embeddings/langchain/docker/Dockerfile .
|
||||
docker build -t opea/retriever-redis:latest -f comps/retrievers/langchain/redis/docker/Dockerfile .
|
||||
docker build -t opea/reranking-tei:latest -f comps/reranks/tei/docker/Dockerfile .
|
||||
docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
docker build -t opea/dataprep-redis:latest -f comps/dataprep/redis/langchain/docker/Dockerfile .
|
||||
docker build -t opea/guardrails-tgi:latest -f comps/guardrails/langchain/docker/Dockerfile .
|
||||
|
||||
# cd ..
|
||||
# git clone https://github.com/huggingface/tei-gaudi
|
||||
# cd tei-gaudi/
|
||||
# docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest .
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
|
||||
|
||||
cd $WORKPATH/docker
|
||||
docker build --no-cache -t opea/chatqna-guardrails:latest -f Dockerfile_guardrails .
|
||||
|
||||
cd $WORKPATH/docker/ui
|
||||
docker build --no-cache -t opea/chatqna-ui:latest -f docker/Dockerfile .
|
||||
|
||||
docker images
|
||||
}
|
||||
|
||||
function start_services() {
|
||||
# build tei-gaudi for each test instead of pull from local registry
|
||||
cd $WORKPATH
|
||||
git clone https://github.com/huggingface/tei-gaudi
|
||||
cd tei-gaudi/
|
||||
docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest .
|
||||
|
||||
cd $WORKPATH/docker/gaudi
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:8090"
|
||||
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
|
||||
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
|
||||
export REDIS_URL="redis://${ip_address}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
|
||||
export RERANK_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export GUARDRAIL_SERVICE_HOST_IP=${ip_address}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
|
||||
export GURADRAILS_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
|
||||
export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
|
||||
export SAFETY_GUARD_ENDPOINT="http://${ip_address}:8088"
|
||||
|
||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env
|
||||
|
||||
if [[ "$IMAGE_REPO" != "" ]]; then
|
||||
# Replace the container name with a test-specific name
|
||||
echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
|
||||
sed -i "s#image: opea/chatqna-guardrails:latest#image: opea/chatqna:${IMAGE_TAG}#g" docker_compose_guardrails.yaml
|
||||
sed -i "s#image: opea/chatqna-ui:latest#image: opea/chatqna-ui:${IMAGE_TAG}#g" docker_compose_guardrails.yaml
|
||||
sed -i "s#image: opea/chatqna-conversation-ui:latest#image: opea/chatqna-conversation-ui:${IMAGE_TAG}#g" docker_compose_guardrails.yaml
|
||||
sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" docker_compose_guardrails.yaml
|
||||
sed -i "s#image: ${IMAGE_REPO}opea/tei-gaudi:latest#image: opea/tei-gaudi:latest#g" docker_compose_guardrails.yaml
|
||||
echo "cat docker_compose_guardrails.yaml"
|
||||
cat docker_compose_guardrails.yaml
|
||||
fi
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f docker_compose_guardrails.yaml up -d
|
||||
n=0
|
||||
until [[ "$n" -ge 400 ]]; do
|
||||
docker logs tgi-gaudi-server > tgi_service_start.log
|
||||
if grep -q Connected tgi_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 1s
|
||||
n=$((n+1))
|
||||
done
|
||||
}
|
||||
|
||||
function validate_services() {
|
||||
local URL="$1"
|
||||
local EXPECTED_RESULT="$2"
|
||||
local SERVICE_NAME="$3"
|
||||
local DOCKER_NAME="$4"
|
||||
local INPUT_DATA="$5"
|
||||
|
||||
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
|
||||
if [ "$HTTP_STATUS" -eq 200 ]; then
|
||||
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
|
||||
|
||||
local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
|
||||
|
||||
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
|
||||
echo "[ $SERVICE_NAME ] Content is as expected."
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
|
||||
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
|
||||
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
|
||||
exit 1
|
||||
fi
|
||||
sleep 1s
|
||||
}
|
||||
|
||||
function validate_microservices() {
|
||||
# Check if the microservices are running correctly.
|
||||
|
||||
# tei for embedding service
|
||||
validate_services \
|
||||
"${ip_address}:8090/embed" \
|
||||
"\[\[" \
|
||||
"tei-embedding" \
|
||||
"tei-embedding-gaudi-server" \
|
||||
'{"inputs":"What is Deep Learning?"}'
|
||||
|
||||
# embedding microservice
|
||||
validate_services \
|
||||
"${ip_address}:6000/v1/embeddings" \
|
||||
'"text":"What is Deep Learning?","embedding":\[' \
|
||||
"embedding" \
|
||||
"embedding-tei-server" \
|
||||
'{"text":"What is Deep Learning?"}'
|
||||
|
||||
sleep 1m # retrieval can't curl as expected, try to wait for more time
|
||||
|
||||
# retrieval microservice
|
||||
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
||||
validate_services \
|
||||
"${ip_address}:7000/v1/retrieval" \
|
||||
" " \
|
||||
"retrieval" \
|
||||
"retriever-redis-server" \
|
||||
"{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}"
|
||||
|
||||
# tei for rerank microservice
|
||||
validate_services \
|
||||
"${ip_address}:8808/rerank" \
|
||||
'{"index":1,"score":' \
|
||||
"tei-rerank" \
|
||||
"tei-reranking-gaudi-server" \
|
||||
'{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
|
||||
|
||||
# rerank microservice
|
||||
validate_services \
|
||||
"${ip_address}:8000/v1/reranking" \
|
||||
"Deep learning is..." \
|
||||
"rerank" \
|
||||
"reranking-tei-gaudi-server" \
|
||||
'{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}'
|
||||
|
||||
# tgi for llm service
|
||||
validate_services \
|
||||
"${ip_address}:8008/generate" \
|
||||
"generated_text" \
|
||||
"tgi-llm" \
|
||||
"tgi-gaudi-server" \
|
||||
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
|
||||
|
||||
# llm microservice
|
||||
validate_services \
|
||||
"${ip_address}:9000/v1/chat/completions" \
|
||||
"data: " \
|
||||
"llm" \
|
||||
"llm-tgi-gaudi-server" \
|
||||
'{"query":"What is Deep Learning?"}'
|
||||
|
||||
# tgi for guardrails service
|
||||
validate_services \
|
||||
"${ip_address}:8008/generate" \
|
||||
"generated_text" \
|
||||
"tgi-guardrails" \
|
||||
"tgi-guardrails-server" \
|
||||
'{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}'
|
||||
|
||||
# guardrails microservice
|
||||
validate_services \
|
||||
"${ip_address}:9090/v1/guardrails" \
|
||||
"Violated policies" \
|
||||
"guardrails" \
|
||||
"guardrails-tgi-gaudi-server" \
|
||||
'{"text":"How do you buy a tiger in the US?"}'
|
||||
|
||||
}
|
||||
|
||||
function validate_megaservice() {
|
||||
# Curl the Mega Service
|
||||
validate_services \
|
||||
"${ip_address}:8888/v1/chatqna" \
|
||||
"billion" \
|
||||
"mega-chatqna" \
|
||||
"chatqna-gaudi-guardrails-server" \
|
||||
'{"messages": "What is the revenue of Nike in 2023?"}'
|
||||
|
||||
}
|
||||
|
||||
function validate_frontend() {
|
||||
cd $WORKPATH/docker/ui/svelte
|
||||
local conda_env_name="OPEA_e2e"
|
||||
export PATH=${HOME}/miniforge3/bin/:$PATH
|
||||
# conda remove -n ${conda_env_name} --all -y
|
||||
# conda create -n ${conda_env_name} python=3.12 -y
|
||||
source activate ${conda_env_name}
|
||||
|
||||
sed -i "s/localhost/$ip_address/g" playwright.config.ts
|
||||
|
||||
# conda install -c conda-forge nodejs -y
|
||||
npm install && npm ci && npx playwright install --with-deps
|
||||
node -v && npm -v && pip list
|
||||
|
||||
exit_status=0
|
||||
npx playwright test || exit_status=$?
|
||||
|
||||
if [ $exit_status -ne 0 ]; then
|
||||
echo "[TEST INFO]: ---------frontend test failed---------"
|
||||
exit $exit_status
|
||||
else
|
||||
echo "[TEST INFO]: ---------frontend test passed---------"
|
||||
fi
|
||||
}
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker/gaudi
|
||||
container_list=$(cat docker_compose_guardrails.yaml | grep container_name | cut -d':' -f2)
|
||||
for container_name in $container_list; do
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
|
||||
done
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
stop_docker
|
||||
if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi
|
||||
start_time=$(date +%s)
|
||||
start_services
|
||||
end_time=$(date +%s)
|
||||
duration=$((end_time-start_time))
|
||||
echo "Mega service start duration is $duration s"
|
||||
|
||||
validate_microservices
|
||||
validate_megaservice
|
||||
# validate_frontend
|
||||
|
||||
stop_docker
|
||||
echo y | docker system prune
|
||||
|
||||
}
|
||||
|
||||
main
|
||||
Reference in New Issue
Block a user