diff --git a/.github/workflows/docker/compose/embeddings-compose-cd.yaml b/.github/workflows/docker/compose/embeddings-compose-cd.yaml index 2d897459b..d9d0403dd 100644 --- a/.github/workflows/docker/compose/embeddings-compose-cd.yaml +++ b/.github/workflows/docker/compose/embeddings-compose-cd.yaml @@ -26,3 +26,7 @@ services: build: dockerfile: comps/embeddings/multimodal/multimodal_langchain/Dockerfile image: ${REGISTRY:-opea}/embedding-multimodal:${TAG:-latest} + embedding-predictionguard: + build: + dockerfile: comps/embeddings/predictionguard/Dockerfile + image: ${REGISTRY:-opea}/embedding-predictionguard:${TAG:-latest} diff --git a/comps/embeddings/README.md b/comps/embeddings/README.md index 6d48484f1..74117982f 100644 --- a/comps/embeddings/README.md +++ b/comps/embeddings/README.md @@ -31,3 +31,7 @@ For details, please refer to this [readme](multimodal/README.md). ## Embeddings Microservice with Multimodal Clip For details, please refer to this [readme](multimodal_clip/README.md). + +## Embeddings Microservice with Prediction Guard + +For details, please refer to this [readme](predictionguard/README.md). diff --git a/comps/embeddings/predictionguard/Dockerfile b/comps/embeddings/predictionguard/Dockerfile new file mode 100644 index 000000000..e2c10ffa6 --- /dev/null +++ b/comps/embeddings/predictionguard/Dockerfile @@ -0,0 +1,16 @@ +# Copyright (C) 2024 Prediction Guard, Inc +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +COPY comps /home/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/comps/embeddings/predictionguard/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home + +WORKDIR /home/comps/embeddings/predictionguard + +ENTRYPOINT ["python", "embedding_predictionguard.py"] + diff --git a/comps/embeddings/predictionguard/README.md b/comps/embeddings/predictionguard/README.md new file mode 100644 index 000000000..0b64d2b1a --- /dev/null +++ b/comps/embeddings/predictionguard/README.md @@ -0,0 +1,39 @@ +# Embedding Generation Prediction Guard Microservice + +[Prediction Guard](https://docs.predictionguard.com) allows you to utilize hosted open access LLMs, LVMs, and embedding functionality with seamlessly integrated safeguards. In addition to providing a scalable access to open models, Prediction Guard allows you to configure factual consistency checks, toxicity filters, PII filters, and prompt injection blocking. Join the [Prediction Guard Discord channel](https://discord.gg/TFHgnhAFKd) and request an API key to get started. + +This embedding microservice is designed to efficiently convert text into vectorized embeddings using the [BridgeTower model](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-itc). Thus, it is ideal for both RAG or semantic search applications. + +**Note** - The BridgeTower model implemented in Prediction Guard can actually embed text, images, or text + images (jointly). For now this service only embeds text, but a follow on contribution will enable the multimodal functionality. + +# 🚀 Start Microservice with Docker + +## Setup Environment Variables + +Setup the following environment variables first + +```bash +export PREDICTIONGUARD_API_KEY=${your_predictionguard_api_key} +``` + +## Build Docker Images + +```bash +cd ../../.. +docker build -t opea/embedding-predictionguard:latest -f comps/embeddings/predictionguard/Dockerfile . +``` + +## Start Service + +```bash +docker run -d --name="embedding-predictionguard" -p 6000:6000 -e PREDICTIONGUARD_API_KEY=$PREDICTIONGUARD_API_KEY opea/embedding-predictionguard:latest +``` + +# 🚀 Consume Embeddings Service + +```bash +curl localhost:6000/v1/embeddings \ + -X POST \ + -d '{"text":"Hello, world!"}' \ + -H 'Content-Type: application/json' +``` diff --git a/comps/embeddings/predictionguard/docker_compose_embedding.yaml b/comps/embeddings/predictionguard/docker_compose_embedding.yaml new file mode 100644 index 000000000..24a3aba5e --- /dev/null +++ b/comps/embeddings/predictionguard/docker_compose_embedding.yaml @@ -0,0 +1,21 @@ +# Copyright (C) 2024 Prediction Guard, Inc +# SPDX-License-Identifier: Apache-2.0 + +services: + embedding: + image: opea/embedding-predictionguard:latest + container_name: embedding-predictionguard + ports: + - "6000:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PG_EMBEDDING_MODEL_NAME: ${PG_EMBEDDING_MODEL_NAME} + PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/embeddings/predictionguard/embedding_predictionguard.py b/comps/embeddings/predictionguard/embedding_predictionguard.py new file mode 100644 index 000000000..4ea557f70 --- /dev/null +++ b/comps/embeddings/predictionguard/embedding_predictionguard.py @@ -0,0 +1,47 @@ +# Copyright (C) 2024 Prediction Guard, Inc. +# SPDX-License-Identified: Apache-2.0 + + +import os +import time + +from predictionguard import PredictionGuard + +from comps import ( + EmbedDoc, + ServiceType, + TextDoc, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + +# Initialize Prediction Guard client +client = PredictionGuard() + + +@register_microservice( + name="opea_service@embedding_predictionguard", + service_type=ServiceType.EMBEDDING, + endpoint="/v1/embeddings", + host="0.0.0.0", + port=6000, + input_datatype=TextDoc, + output_datatype=EmbedDoc, +) +@register_statistics(names=["opea_service@embedding_predictionguard"]) +def embedding(input: TextDoc) -> EmbedDoc: + start = time.time() + response = client.embeddings.create(model=pg_embedding_model_name, input=[{"text": input.text}]) + embed_vector = response["data"][0]["embedding"] + embed_vector = embed_vector[:512] # Keep only the first 512 elements + res = EmbedDoc(text=input.text, embedding=embed_vector) + statistics_dict["opea_service@embedding_predictionguard"].append_latency(time.time() - start, None) + return res + + +if __name__ == "__main__": + pg_embedding_model_name = os.getenv("PG_EMBEDDING_MODEL_NAME", "bridgetower-large-itm-mlm-itc") + print("Prediction Guard Embedding initialized.") + opea_microservices["opea_service@embedding_predictionguard"].start() diff --git a/comps/embeddings/predictionguard/requirements.txt b/comps/embeddings/predictionguard/requirements.txt new file mode 100644 index 000000000..0c1b8527f --- /dev/null +++ b/comps/embeddings/predictionguard/requirements.txt @@ -0,0 +1,12 @@ +aiohttp +docarray +fastapi +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +Pillow +predictionguard==2.2.1 +prometheus-fastapi-instrumentator +PyYAML +shortuuid +uvicorn diff --git a/tests/embeddings/test_embeddings_predictionguard.sh b/tests/embeddings/test_embeddings_predictionguard.sh new file mode 100644 index 000000000..567b4fc0b --- /dev/null +++ b/tests/embeddings/test_embeddings_predictionguard.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') # Adjust to a more reliable command +if [ -z "$ip_address" ]; then + ip_address="localhost" # Default to localhost if IP address is empty +fi + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/embedding-pg:comps -f comps/embeddings/predictionguard/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/embedding-pg built fail" + exit 1 + else + echo "opea/embedding-pg built successfully" + fi +} + +function start_service() { + tei_service_port=6000 + unset http_proxy + docker run -d --name=test-comps-embedding-pg-server \ + -e http_proxy= -e https_proxy= \ + -e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \ + -p 6000:6000 --ipc=host opea/embedding-pg:comps + sleep 60 # Sleep for 1 minute to allow the service to start +} + +function validate_microservice() { + tei_service_port=6000 + result=$(http_proxy="" curl http://${ip_address}:${tei_service_port}/v1/embeddings \ + -X POST \ + -d '{"text":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json') + + # Check for a proper response format + if [[ $result == *"embedding"* ]]; then + echo "Result correct." + elif [[ $result == *"error"* || $result == *"detail"* ]]; then + echo "Result wrong. Error received was: $result" + docker logs test-comps-embedding-pg-server + exit 1 + else + echo "Unexpected result format received was: $result" + docker logs test-comps-embedding-pg-server + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-embedding-pg-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main