Prediction Guard embeddings component (#675)
* added files for PG embeddingso component Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com> * added package Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com> * fixed dockerfile link Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com> * Fix pre-commit issues: end-of-file, requirements.txt, trailing whitespace, imports, and formatting Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com> * added package Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com> * added package Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com> * fixed embedoc call Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com> * file structure updated to latest Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com> * Fix pre-commit issues: end-of-file, requirements.txt, trailing whitespace, imports, and formatting Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com> * added package Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com> --------- Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>
This commit is contained in:
@@ -26,3 +26,7 @@ services:
|
||||
build:
|
||||
dockerfile: comps/embeddings/multimodal/multimodal_langchain/Dockerfile
|
||||
image: ${REGISTRY:-opea}/embedding-multimodal:${TAG:-latest}
|
||||
embedding-predictionguard:
|
||||
build:
|
||||
dockerfile: comps/embeddings/predictionguard/Dockerfile
|
||||
image: ${REGISTRY:-opea}/embedding-predictionguard:${TAG:-latest}
|
||||
|
||||
@@ -31,3 +31,7 @@ For details, please refer to this [readme](multimodal/README.md).
|
||||
## Embeddings Microservice with Multimodal Clip
|
||||
|
||||
For details, please refer to this [readme](multimodal_clip/README.md).
|
||||
|
||||
## Embeddings Microservice with Prediction Guard
|
||||
|
||||
For details, please refer to this [readme](predictionguard/README.md).
|
||||
|
||||
16
comps/embeddings/predictionguard/Dockerfile
Normal file
16
comps/embeddings/predictionguard/Dockerfile
Normal file
@@ -0,0 +1,16 @@
|
||||
# Copyright (C) 2024 Prediction Guard, Inc
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
FROM python:3.11-slim
|
||||
|
||||
COPY comps /home/comps
|
||||
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/comps/embeddings/predictionguard/requirements.txt
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home
|
||||
|
||||
WORKDIR /home/comps/embeddings/predictionguard
|
||||
|
||||
ENTRYPOINT ["python", "embedding_predictionguard.py"]
|
||||
|
||||
39
comps/embeddings/predictionguard/README.md
Normal file
39
comps/embeddings/predictionguard/README.md
Normal file
@@ -0,0 +1,39 @@
|
||||
# Embedding Generation Prediction Guard Microservice
|
||||
|
||||
[Prediction Guard](https://docs.predictionguard.com) allows you to utilize hosted open access LLMs, LVMs, and embedding functionality with seamlessly integrated safeguards. In addition to providing a scalable access to open models, Prediction Guard allows you to configure factual consistency checks, toxicity filters, PII filters, and prompt injection blocking. Join the [Prediction Guard Discord channel](https://discord.gg/TFHgnhAFKd) and request an API key to get started.
|
||||
|
||||
This embedding microservice is designed to efficiently convert text into vectorized embeddings using the [BridgeTower model](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-itc). Thus, it is ideal for both RAG or semantic search applications.
|
||||
|
||||
**Note** - The BridgeTower model implemented in Prediction Guard can actually embed text, images, or text + images (jointly). For now this service only embeds text, but a follow on contribution will enable the multimodal functionality.
|
||||
|
||||
# 🚀 Start Microservice with Docker
|
||||
|
||||
## Setup Environment Variables
|
||||
|
||||
Setup the following environment variables first
|
||||
|
||||
```bash
|
||||
export PREDICTIONGUARD_API_KEY=${your_predictionguard_api_key}
|
||||
```
|
||||
|
||||
## Build Docker Images
|
||||
|
||||
```bash
|
||||
cd ../../..
|
||||
docker build -t opea/embedding-predictionguard:latest -f comps/embeddings/predictionguard/Dockerfile .
|
||||
```
|
||||
|
||||
## Start Service
|
||||
|
||||
```bash
|
||||
docker run -d --name="embedding-predictionguard" -p 6000:6000 -e PREDICTIONGUARD_API_KEY=$PREDICTIONGUARD_API_KEY opea/embedding-predictionguard:latest
|
||||
```
|
||||
|
||||
# 🚀 Consume Embeddings Service
|
||||
|
||||
```bash
|
||||
curl localhost:6000/v1/embeddings \
|
||||
-X POST \
|
||||
-d '{"text":"Hello, world!"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
@@ -0,0 +1,21 @@
|
||||
# Copyright (C) 2024 Prediction Guard, Inc
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
embedding:
|
||||
image: opea/embedding-predictionguard:latest
|
||||
container_name: embedding-predictionguard
|
||||
ports:
|
||||
- "6000:6000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
PG_EMBEDDING_MODEL_NAME: ${PG_EMBEDDING_MODEL_NAME}
|
||||
PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY}
|
||||
restart: unless-stopped
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
@@ -0,0 +1,47 @@
|
||||
# Copyright (C) 2024 Prediction Guard, Inc.
|
||||
# SPDX-License-Identified: Apache-2.0
|
||||
|
||||
|
||||
import os
|
||||
import time
|
||||
|
||||
from predictionguard import PredictionGuard
|
||||
|
||||
from comps import (
|
||||
EmbedDoc,
|
||||
ServiceType,
|
||||
TextDoc,
|
||||
opea_microservices,
|
||||
register_microservice,
|
||||
register_statistics,
|
||||
statistics_dict,
|
||||
)
|
||||
|
||||
# Initialize Prediction Guard client
|
||||
client = PredictionGuard()
|
||||
|
||||
|
||||
@register_microservice(
|
||||
name="opea_service@embedding_predictionguard",
|
||||
service_type=ServiceType.EMBEDDING,
|
||||
endpoint="/v1/embeddings",
|
||||
host="0.0.0.0",
|
||||
port=6000,
|
||||
input_datatype=TextDoc,
|
||||
output_datatype=EmbedDoc,
|
||||
)
|
||||
@register_statistics(names=["opea_service@embedding_predictionguard"])
|
||||
def embedding(input: TextDoc) -> EmbedDoc:
|
||||
start = time.time()
|
||||
response = client.embeddings.create(model=pg_embedding_model_name, input=[{"text": input.text}])
|
||||
embed_vector = response["data"][0]["embedding"]
|
||||
embed_vector = embed_vector[:512] # Keep only the first 512 elements
|
||||
res = EmbedDoc(text=input.text, embedding=embed_vector)
|
||||
statistics_dict["opea_service@embedding_predictionguard"].append_latency(time.time() - start, None)
|
||||
return res
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pg_embedding_model_name = os.getenv("PG_EMBEDDING_MODEL_NAME", "bridgetower-large-itm-mlm-itc")
|
||||
print("Prediction Guard Embedding initialized.")
|
||||
opea_microservices["opea_service@embedding_predictionguard"].start()
|
||||
12
comps/embeddings/predictionguard/requirements.txt
Normal file
12
comps/embeddings/predictionguard/requirements.txt
Normal file
@@ -0,0 +1,12 @@
|
||||
aiohttp
|
||||
docarray
|
||||
fastapi
|
||||
opentelemetry-api
|
||||
opentelemetry-exporter-otlp
|
||||
opentelemetry-sdk
|
||||
Pillow
|
||||
predictionguard==2.2.1
|
||||
prometheus-fastapi-instrumentator
|
||||
PyYAML
|
||||
shortuuid
|
||||
uvicorn
|
||||
75
tests/embeddings/test_embeddings_predictionguard.sh
Normal file
75
tests/embeddings/test_embeddings_predictionguard.sh
Normal file
@@ -0,0 +1,75 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -x
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
ip_address=$(hostname -I | awk '{print $1}') # Adjust to a more reliable command
|
||||
if [ -z "$ip_address" ]; then
|
||||
ip_address="localhost" # Default to localhost if IP address is empty
|
||||
fi
|
||||
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH
|
||||
echo $(pwd)
|
||||
docker build --no-cache -t opea/embedding-pg:comps -f comps/embeddings/predictionguard/Dockerfile .
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "opea/embedding-pg built fail"
|
||||
exit 1
|
||||
else
|
||||
echo "opea/embedding-pg built successfully"
|
||||
fi
|
||||
}
|
||||
|
||||
function start_service() {
|
||||
tei_service_port=6000
|
||||
unset http_proxy
|
||||
docker run -d --name=test-comps-embedding-pg-server \
|
||||
-e http_proxy= -e https_proxy= \
|
||||
-e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \
|
||||
-p 6000:6000 --ipc=host opea/embedding-pg:comps
|
||||
sleep 60 # Sleep for 1 minute to allow the service to start
|
||||
}
|
||||
|
||||
function validate_microservice() {
|
||||
tei_service_port=6000
|
||||
result=$(http_proxy="" curl http://${ip_address}:${tei_service_port}/v1/embeddings \
|
||||
-X POST \
|
||||
-d '{"text":"What is Deep Learning?"}' \
|
||||
-H 'Content-Type: application/json')
|
||||
|
||||
# Check for a proper response format
|
||||
if [[ $result == *"embedding"* ]]; then
|
||||
echo "Result correct."
|
||||
elif [[ $result == *"error"* || $result == *"detail"* ]]; then
|
||||
echo "Result wrong. Error received was: $result"
|
||||
docker logs test-comps-embedding-pg-server
|
||||
exit 1
|
||||
else
|
||||
echo "Unexpected result format received was: $result"
|
||||
docker logs test-comps-embedding-pg-server
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function stop_docker() {
|
||||
cid=$(docker ps -aq --filter "name=test-comps-embedding-pg-*")
|
||||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
stop_docker
|
||||
|
||||
build_docker_images
|
||||
start_service
|
||||
|
||||
validate_microservice
|
||||
|
||||
stop_docker
|
||||
echo y | docker system prune
|
||||
|
||||
}
|
||||
|
||||
main
|
||||
Reference in New Issue
Block a user