Prediction Guard embeddings component (#675)

* added files for PG embeddingso component

Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>

* added package

Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>

* fixed dockerfile link

Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>

* Fix pre-commit issues: end-of-file, requirements.txt, trailing whitespace, imports, and formatting

Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>

* added package

Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>

* added package

Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>

* fixed embedoc call

Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>

* file structure updated to latest

Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>

* Fix pre-commit issues: end-of-file, requirements.txt, trailing whitespace, imports, and formatting

Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>

* added package

Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>

---------

Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>
This commit is contained in:
Sharan Shirodkar
2024-09-17 09:33:19 -04:00
committed by GitHub
parent b4a7f261fe
commit 191061b642
8 changed files with 218 additions and 0 deletions

View File

@@ -26,3 +26,7 @@ services:
build:
dockerfile: comps/embeddings/multimodal/multimodal_langchain/Dockerfile
image: ${REGISTRY:-opea}/embedding-multimodal:${TAG:-latest}
embedding-predictionguard:
build:
dockerfile: comps/embeddings/predictionguard/Dockerfile
image: ${REGISTRY:-opea}/embedding-predictionguard:${TAG:-latest}

View File

@@ -31,3 +31,7 @@ For details, please refer to this [readme](multimodal/README.md).
## Embeddings Microservice with Multimodal Clip
For details, please refer to this [readme](multimodal_clip/README.md).
## Embeddings Microservice with Prediction Guard
For details, please refer to this [readme](predictionguard/README.md).

View File

@@ -0,0 +1,16 @@
# Copyright (C) 2024 Prediction Guard, Inc
# SPDX-License-Identifier: Apache-2.0
FROM python:3.11-slim
COPY comps /home/comps
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/comps/embeddings/predictionguard/requirements.txt
ENV PYTHONPATH=$PYTHONPATH:/home
WORKDIR /home/comps/embeddings/predictionguard
ENTRYPOINT ["python", "embedding_predictionguard.py"]

View File

@@ -0,0 +1,39 @@
# Embedding Generation Prediction Guard Microservice
[Prediction Guard](https://docs.predictionguard.com) allows you to utilize hosted open access LLMs, LVMs, and embedding functionality with seamlessly integrated safeguards. In addition to providing a scalable access to open models, Prediction Guard allows you to configure factual consistency checks, toxicity filters, PII filters, and prompt injection blocking. Join the [Prediction Guard Discord channel](https://discord.gg/TFHgnhAFKd) and request an API key to get started.
This embedding microservice is designed to efficiently convert text into vectorized embeddings using the [BridgeTower model](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-itc). Thus, it is ideal for both RAG or semantic search applications.
**Note** - The BridgeTower model implemented in Prediction Guard can actually embed text, images, or text + images (jointly). For now this service only embeds text, but a follow on contribution will enable the multimodal functionality.
# 🚀 Start Microservice with Docker
## Setup Environment Variables
Setup the following environment variables first
```bash
export PREDICTIONGUARD_API_KEY=${your_predictionguard_api_key}
```
## Build Docker Images
```bash
cd ../../..
docker build -t opea/embedding-predictionguard:latest -f comps/embeddings/predictionguard/Dockerfile .
```
## Start Service
```bash
docker run -d --name="embedding-predictionguard" -p 6000:6000 -e PREDICTIONGUARD_API_KEY=$PREDICTIONGUARD_API_KEY opea/embedding-predictionguard:latest
```
# 🚀 Consume Embeddings Service
```bash
curl localhost:6000/v1/embeddings \
-X POST \
-d '{"text":"Hello, world!"}' \
-H 'Content-Type: application/json'
```

View File

@@ -0,0 +1,21 @@
# Copyright (C) 2024 Prediction Guard, Inc
# SPDX-License-Identifier: Apache-2.0
services:
embedding:
image: opea/embedding-predictionguard:latest
container_name: embedding-predictionguard
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
PG_EMBEDDING_MODEL_NAME: ${PG_EMBEDDING_MODEL_NAME}
PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY}
restart: unless-stopped
networks:
default:
driver: bridge

View File

@@ -0,0 +1,47 @@
# Copyright (C) 2024 Prediction Guard, Inc.
# SPDX-License-Identified: Apache-2.0
import os
import time
from predictionguard import PredictionGuard
from comps import (
EmbedDoc,
ServiceType,
TextDoc,
opea_microservices,
register_microservice,
register_statistics,
statistics_dict,
)
# Initialize Prediction Guard client
client = PredictionGuard()
@register_microservice(
name="opea_service@embedding_predictionguard",
service_type=ServiceType.EMBEDDING,
endpoint="/v1/embeddings",
host="0.0.0.0",
port=6000,
input_datatype=TextDoc,
output_datatype=EmbedDoc,
)
@register_statistics(names=["opea_service@embedding_predictionguard"])
def embedding(input: TextDoc) -> EmbedDoc:
start = time.time()
response = client.embeddings.create(model=pg_embedding_model_name, input=[{"text": input.text}])
embed_vector = response["data"][0]["embedding"]
embed_vector = embed_vector[:512] # Keep only the first 512 elements
res = EmbedDoc(text=input.text, embedding=embed_vector)
statistics_dict["opea_service@embedding_predictionguard"].append_latency(time.time() - start, None)
return res
if __name__ == "__main__":
pg_embedding_model_name = os.getenv("PG_EMBEDDING_MODEL_NAME", "bridgetower-large-itm-mlm-itc")
print("Prediction Guard Embedding initialized.")
opea_microservices["opea_service@embedding_predictionguard"].start()

View File

@@ -0,0 +1,12 @@
aiohttp
docarray
fastapi
opentelemetry-api
opentelemetry-exporter-otlp
opentelemetry-sdk
Pillow
predictionguard==2.2.1
prometheus-fastapi-instrumentator
PyYAML
shortuuid
uvicorn

View File

@@ -0,0 +1,75 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -x
WORKPATH=$(dirname "$PWD")
ip_address=$(hostname -I | awk '{print $1}') # Adjust to a more reliable command
if [ -z "$ip_address" ]; then
ip_address="localhost" # Default to localhost if IP address is empty
fi
function build_docker_images() {
cd $WORKPATH
echo $(pwd)
docker build --no-cache -t opea/embedding-pg:comps -f comps/embeddings/predictionguard/Dockerfile .
if [ $? -ne 0 ]; then
echo "opea/embedding-pg built fail"
exit 1
else
echo "opea/embedding-pg built successfully"
fi
}
function start_service() {
tei_service_port=6000
unset http_proxy
docker run -d --name=test-comps-embedding-pg-server \
-e http_proxy= -e https_proxy= \
-e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \
-p 6000:6000 --ipc=host opea/embedding-pg:comps
sleep 60 # Sleep for 1 minute to allow the service to start
}
function validate_microservice() {
tei_service_port=6000
result=$(http_proxy="" curl http://${ip_address}:${tei_service_port}/v1/embeddings \
-X POST \
-d '{"text":"What is Deep Learning?"}' \
-H 'Content-Type: application/json')
# Check for a proper response format
if [[ $result == *"embedding"* ]]; then
echo "Result correct."
elif [[ $result == *"error"* || $result == *"detail"* ]]; then
echo "Result wrong. Error received was: $result"
docker logs test-comps-embedding-pg-server
exit 1
else
echo "Unexpected result format received was: $result"
docker logs test-comps-embedding-pg-server
exit 1
fi
}
function stop_docker() {
cid=$(docker ps -aq --filter "name=test-comps-embedding-pg-*")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}
function main() {
stop_docker
build_docker_images
start_service
validate_microservice
stop_docker
echo y | docker system prune
}
main