diff --git a/.github/workflows/docker/compose/nginx-compose.yaml b/.github/workflows/docker/compose/3rd_parties-compose.yaml similarity index 63% rename from .github/workflows/docker/compose/nginx-compose.yaml rename to .github/workflows/docker/compose/3rd_parties-compose.yaml index b298f6f5a..180256e18 100644 --- a/.github/workflows/docker/compose/nginx-compose.yaml +++ b/.github/workflows/docker/compose/3rd_parties-compose.yaml @@ -1,9 +1,8 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# this file should be run in the root of the repo services: nginx: build: - dockerfile: comps/nginx/Dockerfile + dockerfile: comps/3rd_parties/nginx/src/Dockerfile image: ${REGISTRY:-opea}/nginx:${TAG:-latest} diff --git a/.github/workflows/docker/compose/dataprep-compose.yaml b/.github/workflows/docker/compose/dataprep-compose.yaml index d18c141c8..69cbadd53 100644 --- a/.github/workflows/docker/compose/dataprep-compose.yaml +++ b/.github/workflows/docker/compose/dataprep-compose.yaml @@ -3,6 +3,10 @@ # this file should be run in the root of the repo services: + dataprep: + build: + dockerfile: comps/dataprep/src/Dockerfile + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} dataprep-redis: build: dockerfile: comps/dataprep/redis/langchain/Dockerfile diff --git a/.github/workflows/docker/compose/embeddings-compose.yaml b/.github/workflows/docker/compose/embeddings-compose.yaml index 417a005c6..5fc274c24 100644 --- a/.github/workflows/docker/compose/embeddings-compose.yaml +++ b/.github/workflows/docker/compose/embeddings-compose.yaml @@ -5,41 +5,25 @@ services: embedding-tei: build: - dockerfile: comps/embeddings/tei/langchain/Dockerfile + dockerfile: comps/embeddings/src/Dockerfile image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest} embedding-multimodal-clip: build: - dockerfile: comps/embeddings/multimodal_clip/Dockerfile + dockerfile: comps/embeddings/src/integrations/dependency/clip/Dockerfile image: ${REGISTRY:-opea}/embedding-multimodal-clip:${TAG:-latest} embedding-multimodal-bridgetower: build: - dockerfile: comps/embeddings/multimodal/bridgetower/Dockerfile + dockerfile: comps/embeddings/src/integrations/dependency/bridgetower/Dockerfile image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower:${TAG:-latest} embedding-multimodal: build: - dockerfile: comps/embeddings/multimodal/multimodal_langchain/Dockerfile + dockerfile: comps/embeddings/src/Dockerfile image: ${REGISTRY:-opea}/embedding-multimodal:${TAG:-latest} - embedding-langchain-mosec-endpoint: - build: - dockerfile: comps/embeddings/mosec/langchain/dependency/Dockerfile - image: ${REGISTRY:-opea}/embedding-langchain-mosec-endpoint:${TAG:-latest} - embedding-langchain-mosec: - build: - dockerfile: comps/embeddings/mosec/langchain/Dockerfile - image: ${REGISTRY:-opea}/embedding-langchain-mosec:${TAG:-latest} - embedding-tei-llama-index: - build: - dockerfile: comps/embeddings/tei/llama_index/Dockerfile - image: ${REGISTRY:-opea}/embedding-tei-llama-index:${TAG:-latest} embedding-multimodal-bridgetower-gaudi: build: - dockerfile: comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu + dockerfile: comps/embeddings/src/integrations/dependency/bridgetower/Dockerfile.intel_hpu image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower-gaudi:${TAG:-latest} embedding-predictionguard: build: - dockerfile: comps/embeddings/predictionguard/Dockerfile + dockerfile: comps/embeddings/src/Dockerfile image: ${REGISTRY:-opea}/embedding-predictionguard:${TAG:-latest} - embedding-reranking-local: - build: - dockerfile: comps/embeddings/tei/langchain/Dockerfile.dynamic_batching - image: ${REGISTRY:-opea}/embedding-reranking-local:${TAG:-latest} diff --git a/.github/workflows/docker/compose/llms-compose.yaml b/.github/workflows/docker/compose/llms-compose.yaml index 73d4ad1f1..2d42e6f46 100644 --- a/.github/workflows/docker/compose/llms-compose.yaml +++ b/.github/workflows/docker/compose/llms-compose.yaml @@ -5,7 +5,7 @@ services: llm-tgi: build: - dockerfile: comps/llms/text-generation/tgi/Dockerfile + dockerfile: comps/llms/src/text-generation/Dockerfile image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} llm-ollama: build: @@ -21,7 +21,7 @@ services: image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest} llm-vllm: build: - dockerfile: comps/llms/text-generation/vllm/langchain/Dockerfile + dockerfile: comps/llms/src/text-generation/Dockerfile image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest} llm-native: build: @@ -50,10 +50,6 @@ services: build: dockerfile: comps/llms/utils/lm-eval/Dockerfile image: ${REGISTRY:-opea}/llm-eval:${TAG:-latest} - llm-vllm-llamaindex: - build: - dockerfile: comps/llms/text-generation/vllm/llama_index/Dockerfile - image: ${REGISTRY:-opea}/llm-vllm-llamaindex:${TAG:-latest} llm-textgen-predictionguard: build: dockerfile: comps/llms/text-generation/predictionguard/Dockerfile diff --git a/.github/workflows/docker/compose/ragas-compose.yaml b/.github/workflows/docker/compose/ragas-compose.yaml index aedad1b89..9e4d55798 100644 --- a/.github/workflows/docker/compose/ragas-compose.yaml +++ b/.github/workflows/docker/compose/ragas-compose.yaml @@ -3,5 +3,5 @@ services: ragas: build: - dockerfile: comps/ragas/tgi/langchain/Dockerfile + dockerfile: comps/ragas/src/tgi/langchain/Dockerfile image: ${REGISTRY:-opea}/ragas:${TAG:-latest} diff --git a/.github/workflows/docker/compose/reranks-compose.yaml b/.github/workflows/docker/compose/reranks-compose.yaml index 2d3526c1c..ff71162bf 100644 --- a/.github/workflows/docker/compose/reranks-compose.yaml +++ b/.github/workflows/docker/compose/reranks-compose.yaml @@ -5,21 +5,13 @@ services: reranking-tei: build: - dockerfile: comps/reranks/tei/Dockerfile + dockerfile: comps/reranks/src/Dockerfile image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} reranking-videoqna: build: - dockerfile: comps/reranks/videoqna/Dockerfile + dockerfile: comps/reranks/src/Dockerfile # TODO. need to update image: ${REGISTRY:-opea}/reranking-videoqna:${TAG:-latest} reranking-fastrag: build: - dockerfile: comps/reranks/fastrag/Dockerfile + dockerfile: comps/reranks/src/Dockerfile # TODO. need to update image: ${REGISTRY:-opea}/reranking-fastrag:${TAG:-latest} - reranking-langchain-mosec-endpoint: - build: - dockerfile: comps/reranks/mosec/langchain/dependency/Dockerfile - image: ${REGISTRY:-opea}/reranking-langchain-mosec-endpoint:${TAG:-latest} - reranking-langchain-mosec: - build: - dockerfile: comps/reranks/mosec/langchain/Dockerfile - image: ${REGISTRY:-opea}/reranking-langchain-mosec:${TAG:-latest} diff --git a/.github/workflows/docker/compose/retrievers-compose.yaml b/.github/workflows/docker/compose/retrievers-compose.yaml index 00d95fe6b..a81b6a995 100644 --- a/.github/workflows/docker/compose/retrievers-compose.yaml +++ b/.github/workflows/docker/compose/retrievers-compose.yaml @@ -3,6 +3,10 @@ # this file should be run in the root of the repo services: + retriever: + build: + dockerfile: comps/retrievers/src/Dockerfile + image: ${REGISTRY:-opea}/retriever:${TAG:-latest} retriever-redis: build: dockerfile: comps/retrievers/redis/langchain/Dockerfile diff --git a/.github/workflows/pr-examples-test.yml b/.github/workflows/pr-examples-test.yml index 825be1ab0..5554dc868 100644 --- a/.github/workflows/pr-examples-test.yml +++ b/.github/workflows/pr-examples-test.yml @@ -10,11 +10,8 @@ on: paths: - .github/workflows/pr-examples-test.yml - comps/cores/** - - comps/embeddings/langchain/** - - comps/retrievers/langchain/redis/** - - comps/reranks/tei/** - - comps/llms/text-generation/tgi/** - - comps/dataprep/redis/langchain/** + - comps/dataprep/src/redis/langchain/** + - comps/retrievers/src/redis/langchain/** - requirements.txt - "!**.md" diff --git a/.github/workflows/pr-microservice-test.yml b/.github/workflows/pr-microservice-test.yml index bded5ddc1..d0a56cdac 100644 --- a/.github/workflows/pr-microservice-test.yml +++ b/.github/workflows/pr-microservice-test.yml @@ -52,9 +52,8 @@ jobs: hardware: ${{ matrix.hardware }} run: | cd tests - echo "log_name=${service}_${hardware}" >> $GITHUB_ENV - if [ ${hardware} = "intel_cpu" ]; then on_hw=""; else on_hw="_on_${hardware}"; fi - timeout 60m bash $(find . -type f -name test_${service}${on_hw}.sh) + echo "log_name=${service}" >> $GITHUB_ENV + timeout 60m bash $(find . -type f -name test_${service}.sh) - name: Clean up container if: cancelled() || failure() diff --git a/.github/workflows/scripts/get_test_matrix.sh b/.github/workflows/scripts/get_test_matrix.sh index 114e5ad3b..d58b4fae4 100644 --- a/.github/workflows/scripts/get_test_matrix.sh +++ b/.github/workflows/scripts/get_test_matrix.sh @@ -5,79 +5,128 @@ # service: service path name, like 'agent_langchain', 'asr_whisper' # hardware: 'intel_cpu', 'intel_hpu', ... -set -xe +set -e cd $WORKSPACE changed_files_full=$changed_files_full run_matrix="{\"include\":[" # add test services when comps code change function find_test_1() { - local pre_service=$1 + local pre_service_path=$1 local n=$2 local all_service=$3 - common_file_change=$(printf '%s\n' "${changed_files[@]}"| grep ${pre_service} | cut -d'/' -f$n | grep -E '*.py' | grep -vE '__init__.py|version.py' | sort -u) || true + common_file_change=$(printf '%s\n' "${changed_files[@]}"| grep ${pre_service_path} | cut -d'/' -f$n | grep -E '*.py' | grep -vE '__init__.py|version.py' | sort -u) || true if [ "$common_file_change" ] || [ "$all_service" = "true" ]; then # if common files changed, run all services - services=$(ls ${pre_service} | cut -d'/' -f$n | grep -vE '*.md|*.py|*.sh|*.yaml|*.yml|*.pdf' | sort -u) || true + services=$(ls ${pre_service_path} | cut -d'/' -f$n | grep -vE '*.md|*.py|*.sh|*.yaml|*.yml|*.pdf' | sort -u) || true all_service="true" else # if specific service files changed, only run the specific service - services=$(printf '%s\n' "${changed_files[@]}"| grep ${pre_service} | cut -d'/' -f$n | grep -vE '*.py|*.sh|*.yaml|*.yml|*.pdf' | sort -u) || true + services=$(printf '%s\n' "${changed_files[@]}"| grep ${pre_service_path} | cut -d'/' -f$n | grep -vE '*.py|*.sh|*.yaml|*.yml|*.pdf' | sort -u) || true fi for service in ${services}; do - service=$pre_service/$service - if [[ $(ls ${service} | grep -E "Dockerfile*") ]]; then - service_name=$(echo $service | tr '/' '_' | cut -c7-) # comps/dataprep/redis/langchain -> dataprep_redis_langchain - default_service_script_path=$(find ./tests -type f -name test_${service_name}.sh) || true - if [ "$default_service_script_path" ]; then - run_matrix="${run_matrix}{\"service\":\"${service_name}\",\"hardware\":\"intel_cpu\"}," + service_path=$pre_service_path/$service + if [[ $(ls ${service_path} | grep -E "Dockerfile*") ]]; then + if [[ $(ls ${service_path} | grep "integrations") ]]; then + # new org with `src` and `integrations` folder + run_all_interation="false" + service_name=$(echo $service_path | sed 's:/src::' | tr '/' '_' | cut -c7-) # comps/retrievers/src/redis/langchain -> retrievers_redis_langchain + common_file_change_insight=$(printf '%s\n' "${changed_files[@]}"| grep ${service_path} | grep -vE 'integrations' | sort -u) || true + if [ "$common_file_change_insight" ]; then + # if common file changed, run all integrations + run_all_interation="true" + fi + if [ "$run_all_interation" = "false" ]; then + changed_integrations=$(printf '%s\n' "${changed_files[@]}"| grep ${service_path} | grep -E 'integrations' | cut -d'/' -f$((n+2)) | cut -d'.' -f1 | sort -u) || true + for integration in ${changed_integrations}; do + # Accurate matching test scripts + # find_test=$(find ./tests -type f \( -name test_${service_name}_${integration}.sh -o -name test_${service_name}_${integration}_on_*.sh \)) || true + # Fuzzy matching test scripts, for example, llms/src/text-generation/integrations/opea.py match several tests. + find_test=$(find ./tests -type f -name test_${service_name}_${integration}*.sh) || true + if [ "$find_test" ]; then + fill_in_matrix "$find_test" + else + run_all_interation="true" + break + fi + done + fi + if [ "$run_all_interation" = "true" ]; then + find_test=$(find ./tests -type f -name test_${service_name}*.sh) || true + if [ "$find_test" ]; then + fill_in_matrix "$find_test" + fi + fi + else + # old org without 'src' folder + service_name=$(echo $service_path | tr '/' '_' | cut -c7-) # comps/retrievers/redis/langchain -> retrievers_redis_langchain + find_test=$(find ./tests -type f -name test_${service_name}*.sh) || true + if [ "$find_test" ]; then + fill_in_matrix "$find_test" + fi fi - other_service_script_path=$(find ./tests -type f -name test_${service_name}_on_*.sh) || true - for script in ${other_service_script_path}; do - _service=$(echo $script | cut -d'/' -f4 | cut -d'.' -f1 | cut -c6-) - hardware=${_service#*_on_} - run_matrix="${run_matrix}{\"service\":\"${service_name}\",\"hardware\":\"${hardware}\"}," - done else - find_test_1 $service $((n+1)) $all_service + find_test_1 $service_path $((n+1)) $all_service fi done } +function fill_in_matrix() { + find_test=$1 + for test in ${find_test}; do + _service=$(echo $test | cut -d'/' -f4 | cut -d'.' -f1 | cut -c6-) + _fill_in_matrix $_service + done +} + +function _fill_in_matrix() { + _service=$1 + if [ $(echo ${_service} | grep -c "_on_") == 0 ]; then + service=${_service} + hardware="intel_cpu" + else + hardware=${_service#*_on_} + fi + echo "service=${_service}, hardware=${hardware}" + if [[ $(echo ${run_matrix} | grep -c "{\"service\":\"${_service}\",\"hardware\":\"${hardware}\"},") == 0 ]]; then + run_matrix="${run_matrix}{\"service\":\"${_service}\",\"hardware\":\"${hardware}\"}," + echo "------------------ add one service ------------------" + fi + sleep 1s +} + # add test case when test scripts code change function find_test_2() { test_files=$(printf '%s\n' "${changed_files[@]}" | grep -E "*.sh") || true for test_file in ${test_files}; do - _service=$(echo $test_file | cut -d'/' -f3 | cut -d'.' -f1 | cut -c6-) - if [ $(echo ${_service} | grep -c "_on_") == 0 ]; then - service=${_service} - hardware="intel_cpu" - else - service=${_service%_on_*} - hardware=${_service#*_on_} - fi - if [[ $(echo ${run_matrix} | grep -c "{\"service\":\"${service}\",\"hardware\":\"${hardware}\"},") == 0 ]]; then - run_matrix="${run_matrix}{\"service\":\"${service}\",\"hardware\":\"${hardware}\"}," + if [ -f $test_file ]; then + _service=$(echo $test_file | cut -d'/' -f3 | cut -d'.' -f1 | cut -c6-) + _fill_in_matrix $_service fi done } function main() { - changed_files=$(printf '%s\n' "${changed_files_full[@]}" | grep 'comps/' | grep -vE '*.md|comps/cores') || true + changed_files=$(printf '%s\n' "${changed_files_full[@]}" | grep 'comps/' | grep -vE '*.md|comps/cores|comps/3rd_parties|deployment|*.yaml') || true + echo "===========start find_test_1============" + echo "changed_files=${changed_files}" find_test_1 "comps" 2 false sleep 1s + echo "run_matrix=${run_matrix}" echo "===========finish find_test_1============" changed_files=$(printf '%s\n' "${changed_files_full[@]}" | grep 'tests/' | grep -vE '*.md|*.txt|tests/cores') || true + echo "===========start find_test_2============" + echo "changed_files=${changed_files}" find_test_2 sleep 1s + echo "run_matrix=${run_matrix}" echo "===========finish find_test_2============" run_matrix=$run_matrix"]}" - echo "run_matrix=${run_matrix}" echo "run_matrix=${run_matrix}" >> $GITHUB_OUTPUT } diff --git a/README.md b/README.md index b9cbf453b..c6e706433 100644 --- a/README.md +++ b/README.md @@ -36,27 +36,27 @@ This modular approach allows developers to independently develop, deploy, and sc The initially supported `Microservices` are described in the below table. More `Microservices` are on the way. -| MicroService | Framework | Model | Serving | HW | Description | -| --------------------------------------------- | ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | ------ | ------------------------------------- | -| [Embedding](./comps/embeddings/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI-Gaudi](https://github.com/huggingface/tei-gaudi) | Gaudi2 | Embedding on Gaudi2 | -| [Embedding](./comps/embeddings/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Embedding on Xeon CPU | -| [Retriever](./comps/retrievers/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Retriever on Xeon CPU | -| [Reranking](./comps/reranks/tei/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | [TEI-Gaudi](https://github.com/huggingface/tei-gaudi) | Gaudi2 | Reranking on Gaudi2 | -| [Reranking](./comps/reranks/tei/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BBAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Reranking on Xeon CPU | -| [ASR](./comps/asr/src/README.md) | NA | [openai/whisper-small](https://huggingface.co/openai/whisper-small) | NA | Gaudi2 | Audio-Speech-Recognition on Gaudi2 | -| [ASR](./comps/asr/src/README.md) | NA | [openai/whisper-small](https://huggingface.co/openai/whisper-small) | NA | Xeon | Audio-Speech-RecognitionS on Xeon CPU | -| [TTS](./comps/tts/src/README.md) | NA | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) | NA | Gaudi2 | Text-To-Speech on Gaudi2 | -| [TTS](./comps/tts/src/README.md) | NA | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) | NA | Xeon | Text-To-Speech on Xeon CPU | -| [Dataprep](./comps/dataprep/README.md) | [Qdrant](https://qdrant.tech/) | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA | Gaudi2 | Dataprep on Gaudi2 | -| [Dataprep](./comps/dataprep/README.md) | [Qdrant](https://qdrant.tech/) | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA | Xeon | Dataprep on Xeon CPU | -| [Dataprep](./comps/dataprep/README.md) | [Redis](https://redis.io/) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | NA | Gaudi2 | Dataprep on Gaudi2 | -| [Dataprep](./comps/dataprep/README.md) | [Redis](https://redis.io/) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | NA | Xeon | Dataprep on Xeon CPU | -| [LLM](./comps/llms/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [TGI Gaudi](https://github.com/huggingface/tgi-gaudi) | Gaudi2 | LLM on Gaudi2 | -| [LLM](./comps/llms/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon | LLM on Xeon CPU | -| [LLM](./comps/llms/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [Ray Serve](https://github.com/ray-project/ray) | Gaudi2 | LLM on Gaudi2 | -| [LLM](./comps/llms/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [Ray Serve](https://github.com/ray-project/ray) | Xeon | LLM on Xeon CPU | -| [LLM](./comps/llms/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [vLLM](https://github.com/vllm-project/vllm/) | Gaudi2 | LLM on Gaudi2 | -| [LLM](./comps/llms/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [vLLM](https://github.com/vllm-project/vllm/) | Xeon | LLM on Xeon CPU | +| MicroService | Framework | Model | Serving | HW | Description | +| ------------------------------------------------- | ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | ------ | ------------------------------------- | +| [Embedding](./comps/embeddings/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI-Gaudi](https://github.com/huggingface/tei-gaudi) | Gaudi2 | Embedding on Gaudi2 | +| [Embedding](./comps/embeddings/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Embedding on Xeon CPU | +| [Retriever](./comps/retrievers/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Retriever on Xeon CPU | +| [Reranking](./comps/reranks/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | [TEI-Gaudi](https://github.com/huggingface/tei-gaudi) | Gaudi2 | Reranking on Gaudi2 | +| [Reranking](./comps/reranks/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BBAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Reranking on Xeon CPU | +| [ASR](./comps/asr/src/README.md) | NA | [openai/whisper-small](https://huggingface.co/openai/whisper-small) | NA | Gaudi2 | Audio-Speech-Recognition on Gaudi2 | +| [ASR](./comps/asr/src/README.md) | NA | [openai/whisper-small](https://huggingface.co/openai/whisper-small) | NA | Xeon | Audio-Speech-RecognitionS on Xeon CPU | +| [TTS](./comps/tts/src/README.md) | NA | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) | NA | Gaudi2 | Text-To-Speech on Gaudi2 | +| [TTS](./comps/tts/src/README.md) | NA | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) | NA | Xeon | Text-To-Speech on Xeon CPU | +| [Dataprep](./comps/dataprep/src/README.md) | [Qdrant](https://qdrant.tech/) | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA | Gaudi2 | Dataprep on Gaudi2 | +| [Dataprep](./comps/dataprep/src/README.md) | [Qdrant](https://qdrant.tech/) | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA | Xeon | Dataprep on Xeon CPU | +| [Dataprep](./comps/dataprep/src/README.md) | [Redis](https://redis.io/) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | NA | Gaudi2 | Dataprep on Gaudi2 | +| [Dataprep](./comps/dataprep/src/README.md) | [Redis](https://redis.io/) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | NA | Xeon | Dataprep on Xeon CPU | +| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [TGI Gaudi](https://github.com/huggingface/tgi-gaudi) | Gaudi2 | LLM on Gaudi2 | +| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon | LLM on Xeon CPU | +| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [Ray Serve](https://github.com/ray-project/ray) | Gaudi2 | LLM on Gaudi2 | +| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [Ray Serve](https://github.com/ray-project/ray) | Xeon | LLM on Xeon CPU | +| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [vLLM](https://github.com/vllm-project/vllm/) | Gaudi2 | LLM on Gaudi2 | +| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [vLLM](https://github.com/vllm-project/vllm/) | Xeon | LLM on Xeon CPU | A `Microservices` can be created by using the decorator `register_microservice`. Taking the `embedding microservice` as an example: diff --git a/comps/nginx/docker-compose.yaml b/comps/3rd_parties/nginx/deployment/docker_compose/docker-compose.yaml similarity index 100% rename from comps/nginx/docker-compose.yaml rename to comps/3rd_parties/nginx/deployment/docker_compose/docker-compose.yaml diff --git a/comps/nginx/start-nginx.sh b/comps/3rd_parties/nginx/deployment/docker_compose/start-nginx.sh similarity index 100% rename from comps/nginx/start-nginx.sh rename to comps/3rd_parties/nginx/deployment/docker_compose/start-nginx.sh diff --git a/comps/3rd_parties/nginx/deployment/kubernetes/README.md b/comps/3rd_parties/nginx/deployment/kubernetes/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/nginx/Dockerfile b/comps/3rd_parties/nginx/src/Dockerfile similarity index 70% rename from comps/nginx/Dockerfile rename to comps/3rd_parties/nginx/src/Dockerfile index d4517ced0..8d7ba2e6a 100644 --- a/comps/nginx/Dockerfile +++ b/comps/3rd_parties/nginx/src/Dockerfile @@ -6,7 +6,7 @@ FROM nginx:alpine RUN apk add --no-cache gettext -COPY comps/nginx/nginx.conf.template /etc/nginx/nginx.conf.template +COPY comps/3rd_parties/nginx/src/nginx.conf.template /etc/nginx/nginx.conf.template ENV FRONTEND_SERVICE_IP=localhost ENV FRONTEND_SERVICE_PORT=5173 @@ -16,7 +16,7 @@ ENV BACKEND_SERVICE_PORT=8888 ENV DATAPREP_SERVICE_IP=localhost ENV DATAPREP_SERVICE_PORT=6007 -COPY comps/nginx/start-nginx.sh /usr/local/bin/start-nginx.sh +COPY comps/3rd_parties/nginx/deployment/docker_compose/start-nginx.sh /usr/local/bin/start-nginx.sh RUN chmod +x /usr/local/bin/start-nginx.sh CMD ["/usr/local/bin/start-nginx.sh"] diff --git a/comps/nginx/nginx.conf.template b/comps/3rd_parties/nginx/src/nginx.conf.template similarity index 100% rename from comps/nginx/nginx.conf.template rename to comps/3rd_parties/nginx/src/nginx.conf.template diff --git a/comps/llms/text-generation/tgi/launch_tgi_service.sh b/comps/3rd_parties/tgi/deployment/docker_compose/launch_tgi_service.sh similarity index 100% rename from comps/llms/text-generation/tgi/launch_tgi_service.sh rename to comps/3rd_parties/tgi/deployment/docker_compose/launch_tgi_service.sh diff --git a/comps/3rd_parties/tgi/deployment/kubernetes/README.md b/comps/3rd_parties/tgi/deployment/kubernetes/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm.sh b/comps/3rd_parties/vllm/deployment/docker_compose/build_docker_vllm.sh similarity index 90% rename from comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm.sh rename to comps/3rd_parties/vllm/deployment/docker_compose/build_docker_vllm.sh index c94dd7237..bcbf20c4a 100644 --- a/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm.sh +++ b/comps/3rd_parties/vllm/deployment/docker_compose/build_docker_vllm.sh @@ -14,6 +14,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Get script directory +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +echo "Script directory: $SCRIPT_DIR" +cd $SCRIPT_DIR + # Set default values default_hw_mode="cpu" @@ -40,4 +45,6 @@ else git clone https://github.com/vllm-project/vllm.git cd ./vllm/ docker build -f Dockerfile.cpu -t opea/vllm-cpu:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy + cd .. + rm -rf vllm fi diff --git a/comps/3rd_parties/vllm/deployment/docker_compose/build_docker_vllm_openvino.sh b/comps/3rd_parties/vllm/deployment/docker_compose/build_docker_vllm_openvino.sh new file mode 100644 index 000000000..c7ca87cac --- /dev/null +++ b/comps/3rd_parties/vllm/deployment/docker_compose/build_docker_vllm_openvino.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Set default values +default_hw_mode="cpu" + +# Assign arguments to variable +hw_mode=${1:-$default_hw_mode} + +# Check if all required arguments are provided +if [ "$#" -lt 0 ] || [ "$#" -gt 1 ]; then + echo "Usage: $0 [hw_mode]" + echo "Please customize the arguments you want to use. + - hw_mode: The hardware mode for the vLLM endpoint, with the default being 'cpu', and the optional selection can be 'cpu' and 'gpu'." + exit 1 +fi + +# Build the docker image for vLLM based on the hardware mode +if [ "$hw_mode" = "gpu" ]; then + docker build -f Dockerfile.intel_gpu -t opea/vllm-arc:latest . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy +else + BASEDIR="$( cd "$( dirname "$0" )" && pwd )" + git clone https://github.com/vllm-project/vllm.git vllm + cd ./vllm/ && git checkout v0.6.1 + docker build -t vllm-openvino:latest -f Dockerfile.openvino . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy + cd $BASEDIR && rm -rf vllm +fi diff --git a/comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service.sh b/comps/3rd_parties/vllm/deployment/docker_compose/launch_vllm_service.sh similarity index 82% rename from comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service.sh rename to comps/3rd_parties/vllm/deployment/docker_compose/launch_vllm_service.sh index c8b2790b4..83ecd6753 100644 --- a/comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service.sh +++ b/comps/3rd_parties/vllm/deployment/docker_compose/launch_vllm_service.sh @@ -38,7 +38,7 @@ volume=$PWD/data # Build the Docker run command based on hardware mode if [ "$hw_mode" = "hpu" ]; then - docker run -d --rm --runtime=habana --name="vllm-service" -p $port_number:80 -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} opea/vllm-gaudi:latest --model $model_name --tensor-parallel-size $parallel_number --host 0.0.0.0 --port 80 --block-size $block_size --max-num-seqs $max_num_seqs --max-seq_len-to-capture $max_seq_len_to_capture + docker run -d --rm --runtime=habana --name="vllm-service" -p $port_number:80 -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HF_TOKEN} opea/vllm-gaudi:latest --model $model_name --tensor-parallel-size $parallel_number --host 0.0.0.0 --port 80 --block-size $block_size --max-num-seqs $max_num_seqs --max-seq_len-to-capture $max_seq_len_to_capture else - docker run -d --rm --name="vllm-service" -p $port_number:80 --network=host -v $volume:/data -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e VLLM_CPU_KVCACHE_SPACE=40 opea/vllm-cpu:latest --model $model_name --host 0.0.0.0 --port 80 + docker run -d --rm --name="vllm-service" -p $port_number:80 --network=host -v $volume:/data -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HF_TOKEN} -e VLLM_CPU_KVCACHE_SPACE=40 opea/vllm-cpu:latest --model $model_name --host 0.0.0.0 --port 80 fi diff --git a/comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service_openvino.sh b/comps/3rd_parties/vllm/deployment/docker_compose/launch_vllm_service_openvino.sh similarity index 63% rename from comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service_openvino.sh rename to comps/3rd_parties/vllm/deployment/docker_compose/launch_vllm_service_openvino.sh index 18ce714da..140df6a0f 100644 --- a/comps/llms/text-generation/vllm/llama_index/dependency/launch_vllm_service_openvino.sh +++ b/comps/3rd_parties/vllm/deployment/docker_compose/launch_vllm_service_openvino.sh @@ -9,16 +9,20 @@ default_port=8008 default_model="meta-llama/Llama-2-7b-hf" +default_device="cpu" swap_space=50 +image="vllm:openvino" -while getopts ":hm:p:" opt; do +while getopts ":hm:p:d:" opt; do case $opt in h) - echo "Usage: $0 [-h] [-m model] [-p port]" + echo "Usage: $0 [-h] [-m model] [-p port] [-d device]" echo "Options:" echo " -h Display this help message" - echo " -m model Model (default: meta-llama/Llama-2-7b-hf)" + echo " -m model Model (default: meta-llama/Llama-2-7b-hf for cpu" + echo " meta-llama/Llama-3.2-3B-Instruct for gpu)" echo " -p port Port (default: 8000)" + echo " -d device Target Device (Default: cpu, optional selection can be 'cpu' and 'gpu')" exit 0 ;; m) @@ -27,6 +31,9 @@ while getopts ":hm:p:" opt; do p) port=$OPTARG ;; + d) + device=$OPTARG + ;; \?) echo "Invalid option: -$OPTARG" >&2 exit 1 @@ -37,25 +44,33 @@ done # Assign arguments to variables model_name=${model:-$default_model} port_number=${port:-$default_port} +device=${device:-$default_device} # Set the Huggingface cache directory variable HF_CACHE_DIR=$HOME/.cache/huggingface - +if [ "$device" = "gpu" ]; then + docker_args="-e VLLM_OPENVINO_DEVICE=GPU --device /dev/dri -v /dev/dri/by-path:/dev/dri/by-path" + vllm_args="--max_model_len=1024" + model_name="meta-llama/Llama-3.2-3B-Instruct" + image="opea/vllm-arc:latest" +fi # Start the model server using Openvino as the backend inference engine. # Provide the container name that is unique and meaningful, typically one that includes the model name. docker run -d --rm --name="vllm-openvino-server" \ -p $port_number:80 \ --ipc=host \ + $docker_args \ -e HTTPS_PROXY=$https_proxy \ -e HTTP_PROXY=$https_proxy \ -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ - -v $HOME/.cache/huggingface:/home/user/.cache/huggingface \ - vllm-openvino:latest /bin/bash -c "\ + -v $HOME/.cache/huggingface:/root/.cache/huggingface \ + $image /bin/bash -c "\ cd / && \ export VLLM_CPU_KVCACHE_SPACE=50 && \ python3 -m vllm.entrypoints.openai.api_server \ --model \"$model_name\" \ + $vllm_args \ --host 0.0.0.0 \ --port 80" diff --git a/comps/3rd_parties/vllm/deployment/kubernetes/README.md b/comps/3rd_parties/vllm/deployment/kubernetes/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/agent/deployment/docker_compose/README.md b/comps/agent/deployment/docker_compose/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/agent/deployment/kubernetes/README.md b/comps/agent/deployment/kubernetes/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/agent/langchain/README.md b/comps/agent/langchain/README.md index 67bce6a63..e6ad4e6f8 100644 --- a/comps/agent/langchain/README.md +++ b/comps/agent/langchain/README.md @@ -206,4 +206,4 @@ data: [DONE] ## 5. Customize agent strategy For advanced developers who want to implement their own agent strategies, you can add a separate folder in `src\strategy`, implement your agent by inherit the `BaseAgent` class, and add your strategy into the `src\agent.py`. The architecture of this agent microservice is shown in the diagram below as a reference. -![Architecture Overview](assets/agent_arch.jpg) +![Architecture Overview](agent_arch.jpg) diff --git a/comps/agent/langchain/assets/agent_arch.jpg b/comps/agent/langchain/agent_arch.jpg similarity index 100% rename from comps/agent/langchain/assets/agent_arch.jpg rename to comps/agent/langchain/agent_arch.jpg diff --git a/comps/agent/langchain/assets/sql_agent.png b/comps/agent/langchain/sql_agent.png similarity index 100% rename from comps/agent/langchain/assets/sql_agent.png rename to comps/agent/langchain/sql_agent.png diff --git a/comps/agent/langchain/assets/sql_agent_llama.png b/comps/agent/langchain/sql_agent_llama.png similarity index 100% rename from comps/agent/langchain/assets/sql_agent_llama.png rename to comps/agent/langchain/sql_agent_llama.png diff --git a/comps/agent/langchain/src/strategy/sqlagent/README.md b/comps/agent/langchain/src/strategy/sqlagent/README.md index d3e7d7e14..3704da56e 100644 --- a/comps/agent/langchain/src/strategy/sqlagent/README.md +++ b/comps/agent/langchain/src/strategy/sqlagent/README.md @@ -10,7 +10,7 @@ We currently have two types of SQL agents: The architecture of `sql_agent_llama` is shown in the figure below. The agent node takes user question, hints (optional) and history (when available), and thinks step by step to solve the problem. -![SQL Agent Llama Architecture](../../../assets/sql_agent_llama.png) +![SQL Agent Llama Architecture](../../../sql_agent_llama.png) ### Database schema: @@ -33,7 +33,7 @@ Due to the current limitations of open source LLMs and serving frameworks (tgi a The architecture of `sql_agent` is shown in the figure below. The agent node takes user question, hints (optional) and history (when available), and thinks step by step to solve the problem. The basic idea is the same as `sql_agent_llama`. However, since OpenAI APIs produce well-structured tool call objects, we don't need a special output parser. Instead, we only keep the query fixer. -![SQL Agent Architecture](../../../assets/sql_agent.png) +![SQL Agent Architecture](../../../sql_agent.png) ## Limitations diff --git a/comps/animation/deployment/docker_compose/README.md b/comps/animation/deployment/docker_compose/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/animation/deployment/kubernetes/README.md b/comps/animation/deployment/kubernetes/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/asr/deployment/docker_compose/README.md b/comps/asr/deployment/docker_compose/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/asr/deployment/kubernetes/README.md b/comps/asr/deployment/kubernetes/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/chathistory/mongo/docker-compose-chathistory-mongo.yaml b/comps/chathistory/deployment/docker_compose/monga.yaml similarity index 100% rename from comps/chathistory/mongo/docker-compose-chathistory-mongo.yaml rename to comps/chathistory/deployment/docker_compose/monga.yaml diff --git a/comps/chathistory/deployment/kubernetes/README.md b/comps/chathistory/deployment/kubernetes/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/cores/common/component.py b/comps/cores/common/component.py index 8b59d9511..2bbd436f2 100644 --- a/comps/cores/common/component.py +++ b/comps/cores/common/component.py @@ -3,6 +3,10 @@ from abc import ABC, abstractmethod +from ..mega.logger import CustomLogger + +logger = CustomLogger("OpeaComponent") + class OpeaComponent(ABC): """The OpeaComponent class serves as the base class for all components in the GenAIComps. @@ -58,10 +62,10 @@ class OpeaComponent(ABC): Returns: bool: True if the component is healthy, False otherwise. """ - pass + raise NotImplementedError("The 'check_health' method must be implemented by subclasses.") @abstractmethod - def invoke(self, *args, **kwargs): + async def invoke(self, *args, **kwargs): """Invoke service accessing using the component. Args: @@ -71,7 +75,7 @@ class OpeaComponent(ABC): Returns: Any: The result of the service accessing. """ - pass + raise NotImplementedError("The 'invoke' method must be implemented by subclasses.") def __repr__(self): """Provides a string representation of the component for debugging and logging purposes. @@ -107,6 +111,7 @@ class OpeaComponentController(ABC): """ if component.name in self.components: raise ValueError(f"Component '{component.name}' is already registered.") + logger.info(f"Registered component: {component.name}") self.components[component.name] = component def discover_and_activate(self): @@ -117,11 +122,11 @@ class OpeaComponentController(ABC): for component in self.components.values(): if component.check_health(): self.active_component = component - print(f"Activated component: {component.name}") + logger.info(f"Activated component: {component.name}") return raise RuntimeError("No healthy components available.") - def invoke(self, *args, **kwargs): + async def invoke(self, *args, **kwargs): """Invokes service accessing using the active component. Args: @@ -136,7 +141,7 @@ class OpeaComponentController(ABC): """ if not self.active_component: raise RuntimeError("No active component. Call 'discover_and_activate' first.") - return self.active_component.invoke(*args, **kwargs) + return await self.active_component.invoke(*args, **kwargs) def list_components(self): """Lists all registered components. diff --git a/comps/dataprep/elasticsearch/langchain/prepare_doc_elasticsearch.py b/comps/dataprep/elasticsearch/langchain/prepare_doc_elasticsearch.py index 115207de5..0d5c67824 100644 --- a/comps/dataprep/elasticsearch/langchain/prepare_doc_elasticsearch.py +++ b/comps/dataprep/elasticsearch/langchain/prepare_doc_elasticsearch.py @@ -25,7 +25,7 @@ from langchain_elasticsearch import ElasticsearchStore from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.utils import ( +from comps.dataprep.src.utils import ( create_upload_folder, document_loader, encode_filename, diff --git a/comps/dataprep/milvus/langchain/README.md b/comps/dataprep/milvus/langchain/README.md index f349df54c..7fa5fe2b3 100644 --- a/comps/dataprep/milvus/langchain/README.md +++ b/comps/dataprep/milvus/langchain/README.md @@ -24,29 +24,23 @@ export https_proxy=${your_http_proxy} export MILVUS_HOST=${your_milvus_host_ip} export MILVUS_PORT=19530 export COLLECTION_NAME=${your_collection_name} -export MOSEC_EMBEDDING_ENDPOINT=${your_embedding_endpoint} +export TEI_EMBEDDING_ENDPOINT=${your_embedding_endpoint} ``` -### 1.4 Start Mosec Embedding Service +### 1.4 Start TEI Embedding Service -First, you need to build a mosec embedding serving docker image. - -```bash -cd ../../.. -docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/embedding-mosec-endpoint:latest -f comps/embeddings/mosec/langchain/dependency/Dockerfile . -``` - -Then start the mosec embedding server. +First, start the TEI embedding server. ```bash your_port=6010 -docker run -d --name="embedding-mosec-endpoint" -p $your_port:8000 opea/embedding-mosec-endpoint:latest +model="BAAI/bge-base-en-v1.5" +docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model ``` Setup environment variables: ```bash -export MOSEC_EMBEDDING_ENDPOINT="http://localhost:$your_port" +export TEI_EMBEDDING_ENDPOINT="http://localhost:$your_port" export MILVUS_HOST=${your_host_ip} ``` @@ -68,8 +62,6 @@ Please refer to this [readme](../../../vectorstores/milvus/README.md). ```bash cd ../../.. -# build mosec embedding docker image -docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/embedding-langchain-mosec-endpoint:latest -f comps/embeddings/mosec/langchain/dependency/Dockerfile . # build dataprep milvus docker image docker build -t opea/dataprep-milvus:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg no_proxy=$no_proxy -f comps/dataprep/milvus/langchain/Dockerfile . ``` @@ -77,14 +69,14 @@ docker build -t opea/dataprep-milvus:latest --build-arg https_proxy=$https_proxy ### 2.3 Setup Environment Variables ```bash -export MOSEC_EMBEDDING_ENDPOINT="http://localhost:$your_port" +export TEI_EMBEDDING_ENDPOINT="http://localhost:$your_port" export MILVUS_HOST=${your_host_ip} ``` ### 2.3 Run Docker with CLI (Option A) ```bash -docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MOSEC_EMBEDDING_ENDPOINT=${MOSEC_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} opea/dataprep-milvus:latest +docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} opea/dataprep-milvus:latest ``` ### 2.4 Run with Docker Compose (Option B) @@ -241,7 +233,7 @@ curl -X POST \ ## 🚀4. Troubleshooting -1. If you get errors from Mosec Embedding Endpoint like `cannot find this task, maybe it has expired` while uploading files, try to reduce the `chunk_size` in the curl command like below (the default chunk_size=1500). +1. If you get errors from TEI Embedding Endpoint like `cannot find this task, maybe it has expired` while uploading files, try to reduce the `chunk_size` in the curl command like below (the default chunk_size=1500). ```bash curl -X POST \ diff --git a/comps/dataprep/milvus/langchain/config.py b/comps/dataprep/milvus/langchain/config.py index 914abcd14..da037a0d9 100644 --- a/comps/dataprep/milvus/langchain/config.py +++ b/comps/dataprep/milvus/langchain/config.py @@ -5,14 +5,13 @@ import os # Local Embedding model LOCAL_EMBEDDING_MODEL = os.getenv("LOCAL_EMBEDDING_MODEL", "maidalun1020/bce-embedding-base_v1") -# TEI Embedding endpoints -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") + # MILVUS configuration MILVUS_HOST = os.getenv("MILVUS_HOST", "localhost") MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530)) COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag_milvus") -# MOSEC configuration -MOSEC_EMBEDDING_MODEL = os.environ.get("MOSEC_EMBEDDING_MODEL", "/home/user/bge-large-zh-v1.5") -MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "") -os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT +# TEI configuration +TEI_EMBEDDING_MODEL = os.environ.get("TEI_EMBEDDING_MODEL", "/home/user/bge-large-zh-v1.5") +TEI_EMBEDDING_ENDPOINT = os.environ.get("TEI_EMBEDDING_ENDPOINT", "") +os.environ["OPENAI_API_BASE"] = TEI_EMBEDDING_ENDPOINT os.environ["OPENAI_API_KEY"] = "Dummy key" diff --git a/comps/dataprep/milvus/langchain/docker-compose.yml b/comps/dataprep/milvus/langchain/docker-compose.yaml similarity index 100% rename from comps/dataprep/milvus/langchain/docker-compose.yml rename to comps/dataprep/milvus/langchain/docker-compose.yaml diff --git a/comps/dataprep/milvus/langchain/prepare_doc_milvus.py b/comps/dataprep/milvus/langchain/prepare_doc_milvus.py index a6014b621..a741fc634 100644 --- a/comps/dataprep/milvus/langchain/prepare_doc_milvus.py +++ b/comps/dataprep/milvus/langchain/prepare_doc_milvus.py @@ -11,9 +11,8 @@ from config import ( LOCAL_EMBEDDING_MODEL, MILVUS_HOST, MILVUS_PORT, - MOSEC_EMBEDDING_ENDPOINT, - MOSEC_EMBEDDING_MODEL, TEI_EMBEDDING_ENDPOINT, + TEI_EMBEDDING_MODEL, ) from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter @@ -23,7 +22,7 @@ from langchain_milvus.vectorstores import Milvus from langchain_text_splitters import HTMLHeaderTextSplitter from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.utils import ( +from comps.dataprep.src.utils import ( create_upload_folder, decode_filename, document_loader, @@ -45,30 +44,6 @@ upload_folder = "./uploaded_files/" milvus_uri = f"http://{MILVUS_HOST}:{MILVUS_PORT}" -class MosecEmbeddings(OpenAIEmbeddings): - def _get_len_safe_embeddings( - self, texts: List[str], *, engine: str, chunk_size: Optional[int] = None - ) -> List[List[float]]: - batched_embeddings: List[List[float]] = [] - response = self.client.create(input=texts, **self._invocation_params) - if not isinstance(response, dict): - response = response.model_dump() - batched_embeddings.extend(r["embedding"] for r in response["data"]) - - _cached_empty_embedding: Optional[List[float]] = None - - def empty_embedding() -> List[float]: - nonlocal _cached_empty_embedding - if _cached_empty_embedding is None: - average_embedded = self.client.create(input="", **self._invocation_params) - if not isinstance(average_embedded, dict): - average_embedded = average_embedded.model_dump() - _cached_empty_embedding = average_embedded["data"][0]["embedding"] - return _cached_empty_embedding - - return [e if e is not None else empty_embedding() for e in batched_embeddings] - - def ingest_chunks_to_milvus(file_name: str, chunks: List): if logflag: logger.info(f"[ ingest chunks ] file name: {file_name}") @@ -470,14 +445,7 @@ if __name__ == "__main__": create_upload_folder(upload_folder) # Create vectorstore - if MOSEC_EMBEDDING_ENDPOINT: - # create embeddings using MOSEC endpoint service - if logflag: - logger.info( - f"[ prepare_doc_milvus ] MOSEC_EMBEDDING_ENDPOINT:{MOSEC_EMBEDDING_ENDPOINT}, MOSEC_EMBEDDING_MODEL:{MOSEC_EMBEDDING_MODEL}" - ) - embeddings = MosecEmbeddings(model=MOSEC_EMBEDDING_MODEL) - elif TEI_EMBEDDING_ENDPOINT: + if TEI_EMBEDDING_ENDPOINT: # create embeddings using TEI endpoint service if logflag: logger.info(f"[ prepare_doc_milvus ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") diff --git a/comps/dataprep/multimodal/redis/langchain/prepare_videodoc_redis.py b/comps/dataprep/multimodal/redis/langchain/prepare_videodoc_redis.py index fa8ed4896..023bebaa2 100644 --- a/comps/dataprep/multimodal/redis/langchain/prepare_videodoc_redis.py +++ b/comps/dataprep/multimodal/redis/langchain/prepare_videodoc_redis.py @@ -32,7 +32,7 @@ from multimodal_utils import ( from PIL import Image from comps import opea_microservices, register_microservice -from comps.embeddings.multimodal.bridgetower.bridgetower_embedding import BridgeTowerEmbedding +from comps.embeddings.src.integrations.dependency.bridgetower.bridgetower_embedding import BridgeTowerEmbedding device = "cpu" upload_folder = "./uploaded_files/" diff --git a/comps/dataprep/neo4j/langchain/prepare_doc_neo4j.py b/comps/dataprep/neo4j/langchain/prepare_doc_neo4j.py index 50257fbeb..efe1fa4aa 100644 --- a/comps/dataprep/neo4j/langchain/prepare_doc_neo4j.py +++ b/comps/dataprep/neo4j/langchain/prepare_doc_neo4j.py @@ -17,7 +17,7 @@ from langchain_openai import ChatOpenAI from langchain_text_splitters import HTMLHeaderTextSplitter from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.utils import ( +from comps.dataprep.src.utils import ( document_loader, encode_filename, get_separators, diff --git a/comps/dataprep/neo4j/llama_index/extract_graph_neo4j.py b/comps/dataprep/neo4j/llama_index/extract_graph_neo4j.py index a7ece023f..7785778a4 100644 --- a/comps/dataprep/neo4j/llama_index/extract_graph_neo4j.py +++ b/comps/dataprep/neo4j/llama_index/extract_graph_neo4j.py @@ -43,7 +43,7 @@ from openai import Client from transformers import AutoTokenizer from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.utils import ( +from comps.dataprep.src.utils import ( document_loader, encode_filename, get_separators, diff --git a/comps/dataprep/neo4j/llama_index/compose.yaml b/comps/dataprep/neo4j/llama_index/neo4j_llama_index.yaml similarity index 100% rename from comps/dataprep/neo4j/llama_index/compose.yaml rename to comps/dataprep/neo4j/llama_index/neo4j_llama_index.yaml diff --git a/comps/dataprep/opensearch/langchain/prepare_doc_opensearch.py b/comps/dataprep/opensearch/langchain/prepare_doc_opensearch.py index 10c9f8353..499b4ba63 100644 --- a/comps/dataprep/opensearch/langchain/prepare_doc_opensearch.py +++ b/comps/dataprep/opensearch/langchain/prepare_doc_opensearch.py @@ -25,7 +25,7 @@ from langchain_text_splitters import HTMLHeaderTextSplitter from opensearchpy import OpenSearch, helpers from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.utils import ( +from comps.dataprep.src.utils import ( create_upload_folder, document_loader, encode_filename, diff --git a/comps/dataprep/pgvector/langchain/docker-compose-dataprep-pgvector.yaml b/comps/dataprep/pgvector/langchain/pgvector_langchain.yaml similarity index 100% rename from comps/dataprep/pgvector/langchain/docker-compose-dataprep-pgvector.yaml rename to comps/dataprep/pgvector/langchain/pgvector_langchain.yaml diff --git a/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py b/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py index 78f9e3eea..9893b9628 100644 --- a/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py +++ b/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py @@ -15,7 +15,7 @@ from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFace from langchain_community.vectorstores import PGVector from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.utils import ( +from comps.dataprep.src.utils import ( create_upload_folder, document_loader, encode_filename, diff --git a/comps/dataprep/pinecone/langchain/README.md b/comps/dataprep/pinecone/langchain/README.md index 9087f13e4..980772c4e 100644 --- a/comps/dataprep/pinecone/langchain/README.md +++ b/comps/dataprep/pinecone/langchain/README.md @@ -41,7 +41,7 @@ docker build -t opea/dataprep-pinecone:latest --build-arg https_proxy=$https_pro ### Run Docker with CLI ```bash -docker run -d --name="dataprep-pinecone-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/dataprep-pinecone:latest +docker run -d --name="dataprep-pinecone-server" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/dataprep-pinecone:latest ``` ### Setup Environment Variables @@ -65,5 +65,5 @@ docker compose -f docker-compose-dataprep-pinecone.yaml up -d Once document preparation microservice for Pinecone is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database. ```bash -curl -X POST -H "Content-Type: application/json" -d '{"path":"/path/to/document"}' http://localhost:6000/v1/dataprep +curl -X POST -H "Content-Type: application/json" -d '{"path":"/path/to/document"}' http://localhost:6007/v1/dataprep ``` diff --git a/comps/dataprep/pinecone/langchain/docker-compose-dataprep-pinecone.yaml b/comps/dataprep/pinecone/langchain/pinecone_langchain.yaml similarity index 96% rename from comps/dataprep/pinecone/langchain/docker-compose-dataprep-pinecone.yaml rename to comps/dataprep/pinecone/langchain/pinecone_langchain.yaml index 0ee20389d..851bad586 100644 --- a/comps/dataprep/pinecone/langchain/docker-compose-dataprep-pinecone.yaml +++ b/comps/dataprep/pinecone/langchain/pinecone_langchain.yaml @@ -23,8 +23,6 @@ services: container_name: dataprep-pinecone-server ports: - "6007:6007" - - "6008:6008" - - "6009:6009" ipc: host environment: no_proxy: ${no_proxy} diff --git a/comps/dataprep/pinecone/langchain/prepare_doc_pinecone.py b/comps/dataprep/pinecone/langchain/prepare_doc_pinecone.py index aa24e44b1..877b7f89e 100644 --- a/comps/dataprep/pinecone/langchain/prepare_doc_pinecone.py +++ b/comps/dataprep/pinecone/langchain/prepare_doc_pinecone.py @@ -17,7 +17,7 @@ from langchain_text_splitters import HTMLHeaderTextSplitter from pinecone import Pinecone, ServerlessSpec from comps import CustomLogger, DocPath, opea_microservices, opea_telemetry, register_microservice -from comps.dataprep.utils import ( +from comps.dataprep.src.utils import ( create_upload_folder, document_loader, encode_filename, @@ -118,7 +118,7 @@ def ingest_data_to_pinecone(doc_path: DocPath): table_chunks = get_tables_result(path, doc_path.table_strategy) chunks = chunks + table_chunks if logflag: - logger.info("Done preprocessing. Created ", len(chunks), " chunks of the original file.") + logger.info(f"Done preprocessing. Created {len(chunks)} chunks of the original file.") # Create vectorstore if tei_embedding_endpoint: @@ -135,7 +135,7 @@ def ingest_data_to_pinecone(doc_path: DocPath): # Creating the index create_index(pc) if logflag: - logger.info("Successfully created the index", PINECONE_INDEX_NAME) + logger.info(f"Successfully created the index {PINECONE_INDEX_NAME}") # Batch size batch_size = 32 @@ -174,7 +174,7 @@ async def ingest_link_to_pinecone(link_list: List[str], chunk_size, chunk_overla # Creating the index create_index(pc) if logflag: - logger.info("Successfully created the index", PINECONE_INDEX_NAME) + logger.info(f"Successfully created the index {PINECONE_INDEX_NAME}") # save link contents and doc_ids one by one for link in link_list: @@ -252,7 +252,7 @@ async def ingest_documents( @register_microservice( - name="opea_service@prepare_doc_pinecone_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6008 + name="opea_service@prepare_doc_pinecone", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007 ) async def rag_get_file_structure(): if logflag: @@ -270,7 +270,7 @@ async def rag_get_file_structure(): @register_microservice( - name="opea_service@prepare_doc_pinecone_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6009 + name="opea_service@prepare_doc_pinecone", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007 ) async def delete_all(file_path: str = Body(..., embed=True)): """Delete file according to `file_path`. @@ -288,7 +288,7 @@ async def delete_all(file_path: str = Body(..., embed=True)): logger.info("[dataprep - del] successfully delete all files.") create_upload_folder(upload_folder) if logflag: - logger.info({"status": True}) + logger.info('{"status": True}') return {"status": True} else: raise HTTPException(status_code=404, detail="Single file deletion is not implemented yet") @@ -297,5 +297,3 @@ async def delete_all(file_path: str = Body(..., embed=True)): if __name__ == "__main__": create_upload_folder(upload_folder) opea_microservices["opea_service@prepare_doc_pinecone"].start() - opea_microservices["opea_service@prepare_doc_pinecone_file"].start() - opea_microservices["opea_service@prepare_doc_pinecone_del"].start() diff --git a/comps/dataprep/qdrant/langchain/prepare_doc_qdrant.py b/comps/dataprep/qdrant/langchain/prepare_doc_qdrant.py index 80678e98e..6c74f5cbb 100644 --- a/comps/dataprep/qdrant/langchain/prepare_doc_qdrant.py +++ b/comps/dataprep/qdrant/langchain/prepare_doc_qdrant.py @@ -14,7 +14,7 @@ from langchain_huggingface import HuggingFaceEndpointEmbeddings from langchain_text_splitters import HTMLHeaderTextSplitter from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.utils import ( +from comps.dataprep.src.utils import ( document_loader, encode_filename, get_separators, diff --git a/comps/dataprep/qdrant/langchain/docker-compose-dataprep-qdrant.yaml b/comps/dataprep/qdrant/langchain/qdrant_langchain.yaml similarity index 100% rename from comps/dataprep/qdrant/langchain/docker-compose-dataprep-qdrant.yaml rename to comps/dataprep/qdrant/langchain/qdrant_langchain.yaml diff --git a/comps/dataprep/redis/langchain/prepare_doc_redis.py b/comps/dataprep/redis/langchain/prepare_doc_redis.py index ae69a28fc..ed73d5675 100644 --- a/comps/dataprep/redis/langchain/prepare_doc_redis.py +++ b/comps/dataprep/redis/langchain/prepare_doc_redis.py @@ -19,7 +19,7 @@ from redis.commands.search.field import TextField from redis.commands.search.indexDefinition import IndexDefinition, IndexType from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.utils import ( +from comps.dataprep.src.utils import ( create_upload_folder, document_loader, encode_filename, diff --git a/comps/dataprep/redis/langchain/docker-compose-dataprep-redis.yaml b/comps/dataprep/redis/langchain/redis_langchain.yaml similarity index 91% rename from comps/dataprep/redis/langchain/docker-compose-dataprep-redis.yaml rename to comps/dataprep/redis/langchain/redis_langchain.yaml index 4ac5c871c..ea716cc2a 100644 --- a/comps/dataprep/redis/langchain/docker-compose-dataprep-redis.yaml +++ b/comps/dataprep/redis/langchain/redis_langchain.yaml @@ -26,7 +26,7 @@ services: image: opea/dataprep-redis:latest container_name: dataprep-redis-server ports: - - "6007:6007" + - "5000:5000" ipc: host environment: no_proxy: ${no_proxy} @@ -34,7 +34,8 @@ services: https_proxy: ${https_proxy} REDIS_URL: ${REDIS_URL} INDEX_NAME: ${INDEX_NAME} - TEI_ENDPOINT: ${TEI_ENDPOINT} + TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LOGFLAG: ${LOGFLAG} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped diff --git a/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py b/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py index 2af834cac..83bd3e585 100644 --- a/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py +++ b/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py @@ -41,7 +41,7 @@ from ray.data.datasource import FileBasedDatasource from tqdm import tqdm from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.utils import ( +from comps.dataprep.src.utils import ( Timer, create_upload_folder, document_loader, @@ -340,7 +340,7 @@ async def ingest_documents(files: List[UploadFile] = File(None), link_list: str @register_microservice( - name="opea_service@prepare_doc_redis_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6008 + name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007 ) async def rag_get_file_structure(): if logflag: @@ -358,7 +358,7 @@ async def rag_get_file_structure(): @register_microservice( - name="opea_service@prepare_doc_redis_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6009 + name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007 ) async def delete_single_file(file_path: str = Body(..., embed=True)): """Delete file according to `file_path`. @@ -416,4 +416,3 @@ async def delete_single_file(file_path: str = Body(..., embed=True)): if __name__ == "__main__": opea_microservices["opea_service@prepare_doc_redis"].start() - opea_microservices["opea_service@prepare_doc_redis_file"].start() diff --git a/comps/dataprep/redis/langchain_ray/docker-compose-dataprep-redis.yaml b/comps/dataprep/redis/langchain_ray/redis_langchain_ray.yaml similarity index 100% rename from comps/dataprep/redis/langchain_ray/docker-compose-dataprep-redis.yaml rename to comps/dataprep/redis/langchain_ray/redis_langchain_ray.yaml diff --git a/comps/dataprep/redis/llama_index/prepare_doc_redis.py b/comps/dataprep/redis/llama_index/prepare_doc_redis.py index fc93ebaad..546f951d9 100644 --- a/comps/dataprep/redis/llama_index/prepare_doc_redis.py +++ b/comps/dataprep/redis/llama_index/prepare_doc_redis.py @@ -85,7 +85,7 @@ async def ingest_documents(files: Optional[Union[UploadFile, List[UploadFile]]] @register_microservice( - name="opea_service@prepare_doc_redis_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6008 + name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007 ) async def rag_get_file_structure(): if logflag: @@ -103,7 +103,7 @@ async def rag_get_file_structure(): @register_microservice( - name="opea_service@prepare_doc_redis_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6009 + name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007 ) async def delete_single_file(file_path: str = Body(..., embed=True)): """Delete file according to `file_path`. @@ -160,5 +160,3 @@ async def delete_single_file(file_path: str = Body(..., embed=True)): if __name__ == "__main__": opea_microservices["opea_service@prepare_doc_redis"].start() - opea_microservices["opea_service@prepare_doc_redis_file"].start() - opea_microservices["opea_service@prepare_doc_redis_del"].start() diff --git a/comps/dataprep/redis/llama_index/docker-compose-dataprep-redis.yaml b/comps/dataprep/redis/llama_index/redis_llama_index.yaml similarity index 94% rename from comps/dataprep/redis/llama_index/docker-compose-dataprep-redis.yaml rename to comps/dataprep/redis/llama_index/redis_llama_index.yaml index 7a52c1cef..ecb1bf4bd 100644 --- a/comps/dataprep/redis/llama_index/docker-compose-dataprep-redis.yaml +++ b/comps/dataprep/redis/llama_index/redis_llama_index.yaml @@ -14,8 +14,6 @@ services: container_name: dataprep-redis-server ports: - "6007:6007" - - "6008:6008" - - "6009:6009" ipc: host environment: no_proxy: ${no_proxy} diff --git a/comps/dataprep/src/Dockerfile b/comps/dataprep/src/Dockerfile new file mode 100644 index 000000000..547ad3027 --- /dev/null +++ b/comps/dataprep/src/Dockerfile @@ -0,0 +1,41 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +ENV LANG=C.UTF-8 + +ARG ARCH="cpu" + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + default-jre \ + libgl1-mesa-glx \ + libjemalloc-dev \ + libreoffice \ + poppler-utils \ + tesseract-ocr + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +USER user + +COPY comps /home/user/comps + +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ + pip install --no-cache-dir -r /home/user/comps/dataprep/src/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +USER root + +RUN mkdir -p /home/user/comps/dataprep/src/uploaded_files && chown -R user /home/user/comps/dataprep/src/uploaded_files + +USER user + +WORKDIR /home/user/comps/dataprep/src + +ENTRYPOINT ["python", "opea_dataprep_microservice.py"] diff --git a/comps/dataprep/src/README.md b/comps/dataprep/src/README.md new file mode 100644 index 000000000..2550ec248 --- /dev/null +++ b/comps/dataprep/src/README.md @@ -0,0 +1,18 @@ +# Dataprep Microservice + +The Dataprep Microservice aims to preprocess the data from various sources (either structured or unstructured data) to text data, and convert the text data to embedding vectors then store them in the database. + +## Install Requirements + +```bash +apt-get update +apt-get install libreoffice +``` + +## Use LVM (Large Vision Model) for Summarizing Image Data + +Occasionally unstructured data will contain image data, to convert the image data to the text data, LVM can be used to summarize the image. To leverage LVM, please refer to this [readme](../../lvms/llava/README.md) to start the LVM microservice first and then set the below environment variable, before starting any dataprep microservice. + +```bash +export SUMMARIZE_IMAGE_VIA_LVM=1 +``` diff --git a/comps/dataprep/__init__.py b/comps/dataprep/src/__init__.py similarity index 100% rename from comps/dataprep/__init__.py rename to comps/dataprep/src/__init__.py diff --git a/comps/embeddings/__init__.py b/comps/dataprep/src/integrations/__init__.py similarity index 100% rename from comps/embeddings/__init__.py rename to comps/dataprep/src/integrations/__init__.py diff --git a/comps/dataprep/src/integrations/config.py b/comps/dataprep/src/integrations/config.py new file mode 100644 index 000000000..43a43471f --- /dev/null +++ b/comps/dataprep/src/integrations/config.py @@ -0,0 +1,91 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + + +####################################################### +# Common Functions # +####################################################### +def get_boolean_env_var(var_name, default_value=False): + """Retrieve the boolean value of an environment variable. + + Args: + var_name (str): The name of the environment variable to retrieve. + default_value (bool): The default value to return if the variable + is not found. + + Returns: + bool: The value of the environment variable, interpreted as a boolean. + """ + true_values = {"true", "1", "t", "y", "yes"} + false_values = {"false", "0", "f", "n", "no"} + + # Retrieve the environment variable's value + value = os.getenv(var_name, "").lower() + + # Decide the boolean value based on the content of the string + if value in true_values: + return True + elif value in false_values: + return False + else: + return default_value + + +# Embedding model +EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") +# TEI Embedding endpoints +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") + +# Vector Index Configuration +INDEX_NAME = os.getenv("INDEX_NAME", "rag_redis") +KEY_INDEX_NAME = os.getenv("KEY_INDEX_NAME", "file-keys") +TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", 600)) +SEARCH_BATCH_SIZE = int(os.getenv("SEARCH_BATCH_SIZE", 10)) + + +####################################################### +# Redis # +####################################################### +# Redis Connection Information +REDIS_HOST = os.getenv("REDIS_HOST", "localhost") +REDIS_PORT = int(os.getenv("REDIS_PORT", 6379)) + + +def format_redis_conn_from_env(): + redis_url = os.getenv("REDIS_URL", None) + if redis_url: + return redis_url + else: + using_ssl = get_boolean_env_var("REDIS_SSL", False) + start = "rediss://" if using_ssl else "redis://" + + # if using RBAC + password = os.getenv("REDIS_PASSWORD", None) + username = os.getenv("REDIS_USERNAME", "default") + if password is not None: + start += f"{username}:{password}@" + + return start + f"{REDIS_HOST}:{REDIS_PORT}" + + +REDIS_URL = format_redis_conn_from_env() + + +####################################################### +# Milvus # +####################################################### +# Local Embedding model +LOCAL_EMBEDDING_MODEL = os.getenv("LOCAL_EMBEDDING_MODEL", "maidalun1020/bce-embedding-base_v1") +# TEI configuration +TEI_EMBEDDING_MODEL = os.environ.get("TEI_EMBEDDING_MODEL", "/home/user/bge-large-zh-v1.5") +TEI_EMBEDDING_ENDPOINT = os.environ.get("TEI_EMBEDDING_ENDPOINT", "") +os.environ["OPENAI_API_BASE"] = TEI_EMBEDDING_ENDPOINT +os.environ["OPENAI_API_KEY"] = "Dummy key" +# MILVUS configuration +MILVUS_HOST = os.getenv("MILVUS_HOST", "localhost") +MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530)) +MILVUS_URI = f"http://{MILVUS_HOST}:{MILVUS_PORT}" +INDEX_PARAMS = {"index_type": "FLAT", "metric_type": "IP", "params": {}} +COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag_milvus") diff --git a/comps/dataprep/src/integrations/milvus.py b/comps/dataprep/src/integrations/milvus.py new file mode 100644 index 000000000..c0f4b959e --- /dev/null +++ b/comps/dataprep/src/integrations/milvus.py @@ -0,0 +1,452 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# for test + + +import json +import os +from pathlib import Path +from typing import List, Optional, Union + +from fastapi import Body, File, Form, HTTPException, UploadFile +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings, OpenAIEmbeddings +from langchain_core.documents import Document +from langchain_milvus.vectorstores import Milvus +from langchain_text_splitters import HTMLHeaderTextSplitter + +from comps import CustomLogger, DocPath, OpeaComponent, ServiceType +from comps.dataprep.src.utils import ( + create_upload_folder, + document_loader, + encode_filename, + format_file_list, + get_separators, + get_tables_result, + parse_html_new, + remove_folder_with_ignore, + save_content_to_local_disk, +) + +from .config import COLLECTION_NAME, INDEX_PARAMS, LOCAL_EMBEDDING_MODEL, MILVUS_URI, TEI_EMBEDDING_ENDPOINT + +logger = CustomLogger("milvus_dataprep") +logflag = os.getenv("LOGFLAG", False) +partition_field_name = "filename" +upload_folder = "./uploaded_files/" + + +def ingest_chunks_to_milvus(embeddings, file_name: str, chunks: List): + if logflag: + logger.info(f"[ ingest chunks ] file name: {file_name}") + + # insert documents to Milvus + insert_docs = [] + for chunk in chunks: + insert_docs.append(Document(page_content=chunk, metadata={partition_field_name: file_name})) + + # Batch size + batch_size = 32 + num_chunks = len(chunks) + + for i in range(0, num_chunks, batch_size): + if logflag: + logger.info(f"[ ingest chunks ] Current batch: {i}") + batch_docs = insert_docs[i : i + batch_size] + + try: + _ = Milvus.from_documents( + batch_docs, + embeddings, + collection_name=COLLECTION_NAME, + connection_args={"uri": MILVUS_URI}, + partition_key_field=partition_field_name, + ) + except Exception as e: + if logflag: + logger.info(f"[ ingest chunks ] fail to ingest chunks into Milvus. error: {e}") + raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}.") + + if logflag: + logger.info(f"[ ingest chunks ] Docs ingested file {file_name} to Milvus collection {COLLECTION_NAME}.") + + return True + + +def ingest_data_to_milvus(doc_path: DocPath, embeddings): + """Ingest document to Milvus.""" + path = doc_path.path + file_name = path.split("/")[-1] + if logflag: + logger.info(f"[ ingest data ] Parsing document {path}, file name: {file_name}.") + + if path.endswith(".html"): + headers_to_split_on = [ + ("h1", "Header 1"), + ("h2", "Header 2"), + ("h3", "Header 3"), + ] + text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) + else: + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=doc_path.chunk_size, + chunk_overlap=doc_path.chunk_overlap, + add_start_index=True, + separators=get_separators(), + ) + + content = document_loader(path) + + if logflag: + logger.info("[ ingest data ] file content loaded") + + structured_types = [".xlsx", ".csv", ".json", "jsonl"] + _, ext = os.path.splitext(path) + + if ext in structured_types: + chunks = content + else: + chunks = text_splitter.split_text(content) + + if doc_path.process_table and path.endswith(".pdf"): + table_chunks = get_tables_result(path, doc_path.table_strategy) + chunks = chunks + table_chunks + if logflag: + logger.info(f"[ ingest data ] Done preprocessing. Created {len(chunks)} chunks of the original file.") + + return ingest_chunks_to_milvus(embeddings, file_name, chunks) + + +def search_by_file(collection, file_name): + query = f"{partition_field_name} == '{file_name}'" + results = collection.query( + expr=query, + output_fields=[partition_field_name, "pk"], + ) + if logflag: + logger.info(f"[ search by file ] searched by {file_name}") + logger.info(f"[ search by file ] {len(results)} results: {results}") + return results + + +def search_all(collection): + results = collection.query(expr="pk >= 0", output_fields=[partition_field_name, "pk"]) + if logflag: + logger.info(f"[ search all ] {len(results)} results: {results}") + return results + + +def delete_all_data(my_milvus): + if logflag: + logger.info("[ delete all ] deleting all data in milvus") + if my_milvus.col: + my_milvus.col.drop() + if logflag: + logger.info("[ delete all ] delete success: all data") + + +def delete_by_partition_field(my_milvus, partition_field): + if logflag: + logger.info(f"[ delete partition ] deleting {partition_field_name} {partition_field}") + pks = my_milvus.get_pks(f'{partition_field_name} == "{partition_field}"') + if logflag: + logger.info(f"[ delete partition ] target pks: {pks}") + res = my_milvus.delete(pks) + my_milvus.col.flush() + if logflag: + logger.info(f"[ delete partition ] delete success: {res}") + + +class OpeaMilvusDataprep(OpeaComponent): + """A specialized dataprep component derived from OpeaComponent for milvus dataprep services. + + Attributes: + client (Milvus): An instance of the milvus client for vector database operations. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) + self.embedder = self._initialize_embedder() + + def _initialize_embedder(self): + if logflag: + logger.info("[ initialize embedder ] initializing milvus embedder...") + if TEI_EMBEDDING_ENDPOINT: + # create embeddings using TEI endpoint service + if logflag: + logger.info(f"[ milvus embedding ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") + embeddings = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + else: + # create embeddings using local embedding model + if logflag: + logger.info(f"[ milvus embedding ] LOCAL_EMBEDDING_MODEL:{LOCAL_EMBEDDING_MODEL}") + embeddings = HuggingFaceBgeEmbeddings(model_name=LOCAL_EMBEDDING_MODEL) + return embeddings + + def check_health(self) -> bool: + """Checks the health of the dataprep service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + if logflag: + logger.info("[ health check ] start to check health of milvus") + try: + client = Milvus( + embedding_function=self.embedder, + collection_name=COLLECTION_NAME, + connection_args={"uri": MILVUS_URI}, + index_params=INDEX_PARAMS, + auto_id=True, + ) + _ = client.client.list_collections() + if logflag: + logger.info("[ health check ] Successfully connected to Milvus!") + return True + except Exception as e: + logger.info(f"[ health check ] Failed to connect to Milvus: {e}") + return False + + def invoke(self, *args, **kwargs): + pass + + async def ingest_files( + self, + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), + link_list: Optional[str] = Form(None), + chunk_size: int = Form(1500), + chunk_overlap: int = Form(100), + process_table: bool = Form(False), + table_strategy: str = Form("fast"), + ): + """Ingest files/links content into milvus database. + + Save in the format of vector[], the vector length depends on the emedding model type. + Returns '{"status": 200, "message": "Data preparation succeeded"}' if successful. + Args: + files (Union[UploadFile, List[UploadFile]], optional): A file or a list of files to be ingested. Defaults to File(None). + link_list (str, optional): A list of links to be ingested. Defaults to Form(None). + chunk_size (int, optional): The size of the chunks to be split. Defaults to Form(1500). + chunk_overlap (int, optional): The overlap between chunks. Defaults to Form(100). + process_table (bool, optional): Whether to process tables in PDFs. Defaults to Form(False). + table_strategy (str, optional): The strategy to process tables in PDFs. Defaults to Form("fast"). + """ + if logflag: + logger.info(f"[ milvus ingest ] files:{files}") + logger.info(f"[ milvus ingest ] link_list:{link_list}") + + my_milvus = Milvus( + embedding_function=self.embedder, + collection_name=COLLECTION_NAME, + connection_args={"uri": MILVUS_URI}, + index_params=INDEX_PARAMS, + auto_id=True, + ) + + if files: + if not isinstance(files, list): + files = [files] + uploaded_files = [] + + for file in files: + encode_file = encode_filename(file.filename) + save_path = upload_folder + encode_file + if logflag: + logger.info(f"[ upload ] processing file {save_path}") + + if my_milvus.col: + # check whether the file is already uploaded + try: + search_res = search_by_file(my_milvus.col, encode_file) + except Exception as e: + raise HTTPException( + status_code=500, detail=f"Failed when searching in Milvus db for file {file.filename}." + ) + if len(search_res) > 0: + if logflag: + logger.info(f"[ upload ] File {file.filename} already exists.") + raise HTTPException( + status_code=400, + detail=f"Uploaded file {file.filename} already exists. Please change file name.", + ) + + await save_content_to_local_disk(save_path, file) + ingest_data_to_milvus( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ), + self.embedder, + ) + uploaded_files.append(save_path) + if logflag: + logger.info(f"[ milvus ingest] Successfully saved file {save_path}") + + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + + if link_list: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail=f"Link_list {link_list} should be a list.") + for link in link_list: + encoded_link = encode_filename(link) + if logflag: + logger.info(f"[ milvus ingest] processing link {encoded_link}") + + # check whether the link file already exists + if my_milvus.col: + try: + search_res = search_by_file(my_milvus.col, encoded_link + ".txt") + except Exception as e: + raise HTTPException( + status_code=500, detail=f"Failed when searching in Milvus db for link {link}." + ) + if len(search_res) > 0: + if logflag: + logger.info(f"[ milvus ingest ] Link {link} already exists.") + raise HTTPException( + status_code=400, detail=f"Uploaded link {link} already exists. Please change link." + ) + + save_path = upload_folder + encoded_link + ".txt" + content = parse_html_new([link], chunk_size=chunk_size, chunk_overlap=chunk_overlap) + await save_content_to_local_disk(save_path, content) + ingest_data_to_milvus( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ), + self.embedder, + ) + if logflag: + logger.info(f"[ milvus ingest] Successfully saved link list {link_list}") + return {"status": 200, "message": "Data preparation succeeded"} + + raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") + + async def get_files(self): + """Get file structure from milvus database in the format of + { + "name": "File Name", + "id": "File Name", + "type": "File", + "parent": "", + }""" + + if logflag: + logger.info("[ milvus get ] start to get file structure") + + my_milvus = Milvus( + embedding_function=self.embedder, + collection_name=COLLECTION_NAME, + connection_args={"uri": MILVUS_URI}, + index_params=INDEX_PARAMS, + auto_id=True, + ) + + if not my_milvus.col: + logger.info(f"[ milvus get ] collection {COLLECTION_NAME} does not exist.") + return [] + + # get all files from db + try: + all_data = search_all(my_milvus.col) + except Exception as e: + raise HTTPException(status_code=500, detail="Failed when searching in Milvus db for all files.") + + # return [] if no data in db + if len(all_data) == 0: + return [] + + res_file = [res["filename"] for res in all_data] + unique_list = list(set(res_file)) + if logflag: + logger.info(f"[ milvus get ] unique list from db: {unique_list}") + + # construct result file list in format + file_list = format_file_list(unique_list) + + if logflag: + logger.info(f"[ milvus get ] final file list: {file_list}") + return file_list + + async def delete_files(self, file_path: str = Body(..., embed=True)): + """Delete file according to `file_path`. + + `file_path`: + - specific file path (e.g. /path/to/file.txt) + - "all": delete all files uploaded + """ + if logflag: + logger.info(f"[ milvus delete ] delete files: {file_path}") + + my_milvus = Milvus( + embedding_function=self.embedder, + collection_name=COLLECTION_NAME, + connection_args={"uri": MILVUS_URI}, + index_params=INDEX_PARAMS, + auto_id=True, + ) + + # delete all uploaded files + if file_path == "all": + + delete_all_data(my_milvus) + + # delete files on local disk + try: + remove_folder_with_ignore(upload_folder) + except Exception as e: + if logflag: + logger.info(f"[ milvus delete ] {e}. Fail to delete {upload_folder}.") + raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}.") + + if logflag: + logger.info("[ milvus delete ] successfully delete all files.") + + create_upload_folder(upload_folder) + if logflag: + logger.info("[ milvus delete ] new upload folder created.") + return {"status": True} + + encode_file_name = encode_filename(file_path) + delete_path = Path(upload_folder + "/" + encode_file_name) + if logflag: + logger.info(f"[ milvus delete ] delete_path: {delete_path}") + + # partially delete files + if delete_path.exists(): + + # TODO: check existence before delete + + # delete file + if delete_path.is_file(): + if logflag: + logger.info(f"[ milvus delete ] deleting file {encode_file_name}") + try: + delete_by_partition_field(my_milvus, encode_file_name) + except Exception as e: + if logflag: + logger.info(f"[ milvus delete ] fail to delete file {delete_path}: {e}") + return {"status": False} + delete_path.unlink() + if logflag: + logger.info(f"[ milvus delete ] file {file_path} deleted") + return {"status": True} + + # delete folder + else: + if logflag: + logger.info(f"[ milvus delete ] delete folder {file_path} is not supported for now.") + raise HTTPException(status_code=404, detail=f"Delete folder {file_path} is not supported for now.") + else: + raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") diff --git a/comps/dataprep/src/integrations/redis.py b/comps/dataprep/src/integrations/redis.py new file mode 100644 index 000000000..64375473d --- /dev/null +++ b/comps/dataprep/src/integrations/redis.py @@ -0,0 +1,530 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# for test + + +import json +import os +from pathlib import Path +from typing import List, Optional, Union + +import redis +from fastapi import Body, File, Form, HTTPException, UploadFile +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_community.vectorstores import Redis +from langchain_huggingface import HuggingFaceEndpointEmbeddings +from langchain_text_splitters import HTMLHeaderTextSplitter +from redis.commands.search.field import TextField +from redis.commands.search.indexDefinition import IndexDefinition, IndexType + +from comps import CustomLogger, DocPath, OpeaComponent, ServiceType +from comps.dataprep.src.utils import ( + create_upload_folder, + document_loader, + encode_filename, + format_search_results, + get_separators, + get_tables_result, + parse_html_new, + remove_folder_with_ignore, + save_content_to_local_disk, +) + +from .config import EMBED_MODEL, INDEX_NAME, KEY_INDEX_NAME, REDIS_URL, SEARCH_BATCH_SIZE, TEI_EMBEDDING_ENDPOINT + +logger = CustomLogger("redis_dataprep") +logflag = os.getenv("LOGFLAG", False) +upload_folder = "./uploaded_files/" +redis_pool = redis.ConnectionPool.from_url(REDIS_URL) + + +def check_index_existance(client): + if logflag: + logger.info(f"[ check index existence ] checking {client}") + try: + results = client.search("*") + if logflag: + logger.info(f"[ check index existence ] index of client exists: {client}") + return results + except Exception as e: + if logflag: + logger.info(f"[ check index existence ] index does not exist: {e}") + return None + + +def create_index(client, index_name: str = KEY_INDEX_NAME): + if logflag: + logger.info(f"[ create index ] creating index {index_name}") + try: + definition = IndexDefinition(index_type=IndexType.HASH, prefix=["file:"]) + client.create_index((TextField("file_name"), TextField("key_ids")), definition=definition) + if logflag: + logger.info(f"[ create index ] index {index_name} successfully created") + except Exception as e: + if logflag: + logger.info(f"[ create index ] fail to create index {index_name}: {e}") + return False + return True + + +def store_by_id(client, key, value): + if logflag: + logger.info(f"[ store by id ] storing ids of {key}") + try: + client.add_document(doc_id="file:" + key, file_name=key, key_ids=value) + if logflag: + logger.info(f"[ store by id ] store document success. id: file:{key}") + except Exception as e: + if logflag: + logger.info(f"[ store by id ] fail to store document file:{key}: {e}") + return False + return True + + +def search_by_id(client, doc_id): + if logflag: + logger.info(f"[ search by id ] searching docs of {doc_id}") + try: + results = client.load_document(doc_id) + if logflag: + logger.info(f"[ search by id ] search success of {doc_id}: {results}") + return results + except Exception as e: + if logflag: + logger.info(f"[ search by id ] fail to search docs of {doc_id}: {e}") + return None + + +def drop_index(index_name, redis_url=REDIS_URL): + if logflag: + logger.info(f"[ drop index ] dropping index {index_name}") + try: + assert Redis.drop_index(index_name=index_name, delete_documents=True, redis_url=redis_url) + if logflag: + logger.info(f"[ drop index ] index {index_name} deleted") + except Exception as e: + if logflag: + logger.info(f"[ drop index ] index {index_name} delete failed: {e}") + return False + return True + + +def delete_by_id(client, id): + try: + assert client.delete_document(id) + if logflag: + logger.info(f"[ delete by id ] delete id success: {id}") + except Exception as e: + if logflag: + logger.info(f"[ delete by id ] fail to delete ids {id}: {e}") + return False + return True + + +def ingest_chunks_to_redis(file_name: str, chunks: List): + if logflag: + logger.info(f"[ redis ingest chunks ] file name: {file_name}") + # Create vectorstore + if TEI_EMBEDDING_ENDPOINT: + # create embeddings using TEI endpoint service + embedder = HuggingFaceEndpointEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + else: + # create embeddings using local embedding model + embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + + # Batch size + batch_size = 32 + num_chunks = len(chunks) + + file_ids = [] + for i in range(0, num_chunks, batch_size): + if logflag: + logger.info(f"[ redis ingest chunks ] Current batch: {i}") + batch_chunks = chunks[i : i + batch_size] + batch_texts = batch_chunks + + _, keys = Redis.from_texts_return_keys( + texts=batch_texts, + embedding=embedder, + index_name=INDEX_NAME, + redis_url=REDIS_URL, + ) + if logflag: + logger.info(f"[ redis ingest chunks ] keys: {keys}") + file_ids.extend(keys) + if logflag: + logger.info(f"[ redis ingest chunks ] Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + + # store file_ids into index file-keys + r = redis.Redis(connection_pool=redis_pool) + client = r.ft(KEY_INDEX_NAME) + if not check_index_existance(client): + assert create_index(client) + + try: + assert store_by_id(client, key=file_name, value="#".join(file_ids)) + except Exception as e: + if logflag: + logger.info(f"[ redis ingest chunks ] {e}. Fail to store chunks of file {file_name}.") + raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}.") + return True + + +def ingest_data_to_redis(doc_path: DocPath): + """Ingest document to Redis.""" + path = doc_path.path + if logflag: + logger.info(f"[ redis ingest data ] Parsing document {path}.") + + if path.endswith(".html"): + headers_to_split_on = [ + ("h1", "Header 1"), + ("h2", "Header 2"), + ("h3", "Header 3"), + ] + text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) + else: + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=doc_path.chunk_size, + chunk_overlap=doc_path.chunk_overlap, + add_start_index=True, + separators=get_separators(), + ) + + content = document_loader(path) + if logflag: + logger.info("[ redis ingest data ] file content loaded") + + structured_types = [".xlsx", ".csv", ".json", "jsonl"] + _, ext = os.path.splitext(path) + + if ext in structured_types: + chunks = content + else: + chunks = text_splitter.split_text(content) + + ### Specially processing for the table content in PDFs + if doc_path.process_table and path.endswith(".pdf"): + table_chunks = get_tables_result(path, doc_path.table_strategy) + chunks = chunks + table_chunks + if logflag: + logger.info(f"[ redis ingest data ] Done preprocessing. Created {len(chunks)} chunks of the given file.") + + file_name = doc_path.path.split("/")[-1] + return ingest_chunks_to_redis(file_name, chunks) + + +class OpeaRedisDataprep(OpeaComponent): + """A specialized dataprep component derived from OpeaComponent for redis dataprep services. + + Attributes: + client (redis.Redis): An instance of the redis client for vector database operations. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) + self.client = self._initialize_client() + self.data_index_client = self.client.ft(INDEX_NAME) + self.key_index_client = self.client.ft(KEY_INDEX_NAME) + + def _initialize_client(self) -> redis.Redis: + if logflag: + logger.info("[ initialize client ] initializing redis client...") + + """Initializes the redis client.""" + try: + client = redis.Redis(connection_pool=redis_pool) + return client + except Exception as e: + logger.error(f"fail to initialize redis client: {e}") + return None + + def check_health(self) -> bool: + """Checks the health of the dataprep service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + if logflag: + logger.info("[ health check ] start to check health of redis") + try: + if self.client.ping(): + if logflag: + logger.info("[ health check ] Successfully connected to Redis!") + return True + except redis.ConnectionError as e: + logger.info(f"[ health check ] Failed to connect to Redis: {e}") + return False + + def invoke(self, *args, **kwargs): + pass + + async def ingest_files( + self, + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), + link_list: Optional[str] = Form(None), + chunk_size: int = Form(1500), + chunk_overlap: int = Form(100), + process_table: bool = Form(False), + table_strategy: str = Form("fast"), + ): + """Ingest files/links content into redis database. + + Save in the format of vector[768]. + Returns '{"status": 200, "message": "Data preparation succeeded"}' if successful. + Args: + files (Union[UploadFile, List[UploadFile]], optional): A file or a list of files to be ingested. Defaults to File(None). + link_list (str, optional): A list of links to be ingested. Defaults to Form(None). + chunk_size (int, optional): The size of the chunks to be split. Defaults to Form(1500). + chunk_overlap (int, optional): The overlap between chunks. Defaults to Form(100). + process_table (bool, optional): Whether to process tables in PDFs. Defaults to Form(False). + table_strategy (str, optional): The strategy to process tables in PDFs. Defaults to Form("fast"). + """ + if logflag: + logger.info(f"[ redis ingest ] files:{files}") + logger.info(f"[ redis ingest ] link_list:{link_list}") + + if files: + if not isinstance(files, list): + files = [files] + uploaded_files = [] + + for file in files: + encode_file = encode_filename(file.filename) + doc_id = "file:" + encode_file + if logflag: + logger.info(f"[ redis ingest ] processing file {doc_id}") + + # check whether the file already exists + key_ids = None + try: + key_ids = search_by_id(self.key_index_client, doc_id).key_ids + if logflag: + logger.info(f"[ redis ingest] File {file.filename} already exists.") + except Exception as e: + logger.info(f"[ redis ingest] File {file.filename} does not exist.") + if key_ids: + raise HTTPException( + status_code=400, + detail=f"Uploaded file {file.filename} already exists. Please change file name.", + ) + + save_path = upload_folder + encode_file + await save_content_to_local_disk(save_path, file) + ingest_data_to_redis( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + uploaded_files.append(save_path) + if logflag: + logger.info(f"[ redis ingest] Successfully saved file {save_path}") + + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + + if link_list: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail=f"Link_list {link_list} should be a list.") + for link in link_list: + encoded_link = encode_filename(link) + doc_id = "file:" + encoded_link + ".txt" + if logflag: + logger.info(f"[ redis ingest] processing link {doc_id}") + + # check whether the link file already exists + key_ids = None + try: + key_ids = search_by_id(self.key_index_client, doc_id).key_ids + if logflag: + logger.info(f"[ redis ingest] Link {link} already exists.") + except Exception as e: + logger.info(f"[ redis ingest] Link {link} does not exist. Keep storing.") + if key_ids: + raise HTTPException( + status_code=400, detail=f"Uploaded link {link} already exists. Please change another link." + ) + + save_path = upload_folder + encoded_link + ".txt" + content = parse_html_new([link], chunk_size=chunk_size, chunk_overlap=chunk_overlap) + await save_content_to_local_disk(save_path, content) + ingest_data_to_redis( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + if logflag: + logger.info(f"[ redis ingest] Successfully saved link list {link_list}") + return {"status": 200, "message": "Data preparation succeeded"} + + raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") + + async def get_files(self): + """Get file structure from redis database in the format of + { + "name": "File Name", + "id": "File Name", + "type": "File", + "parent": "", + }""" + + if logflag: + logger.info("[ redis get ] start to get file structure") + + offset = 0 + file_list = [] + + # check index existence + res = check_index_existance(self.key_index_client) + if not res: + if logflag: + logger.info(f"[ redis get ] index {KEY_INDEX_NAME} does not exist") + return file_list + + while True: + response = self.client.execute_command( + "FT.SEARCH", KEY_INDEX_NAME, "*", "LIMIT", offset, offset + SEARCH_BATCH_SIZE + ) + # no doc retrieved + if len(response) < 2: + break + file_list = format_search_results(response, file_list) + offset += SEARCH_BATCH_SIZE + # last batch + if (len(response) - 1) // 2 < SEARCH_BATCH_SIZE: + break + if logflag: + logger.info(f"[get] final file_list: {file_list}") + return file_list + + async def delete_files(self, file_path: str = Body(..., embed=True)): + """Delete file according to `file_path`. + + `file_path`: + - specific file path (e.g. /path/to/file.txt) + - "all": delete all files uploaded + """ + if logflag: + logger.info(f"[ redis delete ] delete files: {file_path}") + + # delete all uploaded files + if file_path == "all": + if logflag: + logger.info("[ redis delete ] delete all files") + + # drop index KEY_INDEX_NAME + if check_index_existance(self.key_index_client): + try: + assert drop_index(index_name=KEY_INDEX_NAME) + except Exception as e: + if logflag: + logger.info(f"[ redis delete ] {e}. Fail to drop index {KEY_INDEX_NAME}.") + raise HTTPException(status_code=500, detail=f"Fail to drop index {KEY_INDEX_NAME}.") + else: + logger.info(f"[ redis delete ] Index {KEY_INDEX_NAME} does not exits.") + + # drop index INDEX_NAME + if check_index_existance(self.data_index_client): + try: + assert drop_index(index_name=INDEX_NAME) + except Exception as e: + if logflag: + logger.info(f"[ redis delete ] {e}. Fail to drop index {INDEX_NAME}.") + raise HTTPException(status_code=500, detail=f"Fail to drop index {INDEX_NAME}.") + else: + if logflag: + logger.info(f"[ redis delete ] Index {INDEX_NAME} does not exits.") + + # delete files on local disk + try: + remove_folder_with_ignore(upload_folder) + except Exception as e: + if logflag: + logger.info(f"[ redis delete ] {e}. Fail to delete {upload_folder}.") + raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}.") + + if logflag: + logger.info("[ redis delete ] successfully delete all files.") + create_upload_folder(upload_folder) + if logflag: + logger.info({"status": True}) + return {"status": True} + + delete_path = Path(upload_folder + "/" + encode_filename(file_path)) + if logflag: + logger.info(f"[ redis delete ] delete_path: {delete_path}") + + # partially delete files + doc_id = "file:" + encode_filename(file_path) + logger.info(f"[ redis delete ] doc id: {doc_id}") + + # determine whether this file exists in db KEY_INDEX_NAME + try: + key_ids = search_by_id(self.key_index_client, doc_id).key_ids + except Exception as e: + if logflag: + logger.info(f"[ redis delete ] {e}, File {file_path} does not exists.") + raise HTTPException( + status_code=404, detail=f"File not found in db {KEY_INDEX_NAME}. Please check file_path." + ) + file_ids = key_ids.split("#") + + # delete file keys id in db KEY_INDEX_NAME + try: + assert delete_by_id(self.key_index_client, doc_id) + except Exception as e: + if logflag: + logger.info(f"[ redis delete ] {e}. File {file_path} delete failed for db {KEY_INDEX_NAME}.") + raise HTTPException(status_code=500, detail=f"File {file_path} delete failed for key index.") + + # delete file content in db INDEX_NAME + for file_id in file_ids: + # determine whether this file exists in db INDEX_NAME + try: + search_by_id(self.data_index_client, file_id) + except Exception as e: + if logflag: + logger.info(f"[ redis delete ] {e}. File {file_path} does not exists.") + raise HTTPException( + status_code=404, detail=f"File not found in db {INDEX_NAME}. Please check file_path." + ) + + # delete file content + try: + assert delete_by_id(self.data_index_client, file_id) + except Exception as e: + if logflag: + logger.info(f"[ redis delete ] {e}. File {file_path} delete failed for db {INDEX_NAME}") + raise HTTPException(status_code=500, detail=f"File {file_path} delete failed for index.") + + # local file does not exist (restarted docker container) + if not delete_path.exists(): + if logflag: + logger.info(f"[ redis delete ] File {file_path} not saved locally.") + return {"status": True} + + # delete local file + if delete_path.is_file(): + # delete file on local disk + delete_path.unlink() + if logflag: + logger.info(f"[ redis delete ] File {file_path} deleted successfully.") + return {"status": True} + + # delete folder + else: + if logflag: + logger.info(f"[ redis delete ] Delete folder {file_path} is not supported for now.") + raise HTTPException(status_code=404, detail=f"Delete folder {file_path} is not supported for now.") diff --git a/comps/dataprep/src/opea_dataprep_controller.py b/comps/dataprep/src/opea_dataprep_controller.py new file mode 100644 index 000000000..f879776b5 --- /dev/null +++ b/comps/dataprep/src/opea_dataprep_controller.py @@ -0,0 +1,33 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os + +from comps import CustomLogger, OpeaComponentController + +logger = CustomLogger("opea_dataprep_controller") +logflag = os.getenv("LOGFLAG", False) + + +class OpeaDataprepController(OpeaComponentController): + def __init__(self): + super().__init__() + + def invoke(self, *args, **kwargs): + pass + + async def ingest_files(self, *args, **kwargs): + if logflag: + logger.info("[ dataprep controller ] ingest files") + return await self.active_component.ingest_files(*args, **kwargs) + + async def get_files(self, *args, **kwargs): + if logflag: + logger.info("[ dataprep controller ] get files") + return await self.active_component.get_files(*args, **kwargs) + + async def delete_files(self, *args, **kwargs): + if logflag: + logger.info("[ dataprep controller ] delete files") + return await self.active_component.delete_files(*args, **kwargs) diff --git a/comps/dataprep/src/opea_dataprep_microservice.py b/comps/dataprep/src/opea_dataprep_microservice.py new file mode 100644 index 000000000..4ead0588b --- /dev/null +++ b/comps/dataprep/src/opea_dataprep_microservice.py @@ -0,0 +1,151 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os +import time +from typing import List, Optional, Union + +from fastapi import Body, File, Form, UploadFile +from integrations.milvus import OpeaMilvusDataprep +from integrations.redis import OpeaRedisDataprep +from opea_dataprep_controller import OpeaDataprepController + +from comps import ( + CustomLogger, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) +from comps.dataprep.src.utils import create_upload_folder + +logger = CustomLogger("opea_dataprep_microservice") +logflag = os.getenv("LOGFLAG", False) +dataprep_type = os.getenv("DATAPREP_TYPE", False) +upload_folder = "./uploaded_files/" +# Initialize Controller +controller = OpeaDataprepController() + + +# Register components +try: + # Instantiate Dataprep components and register it to controller + if dataprep_type == "redis": + redis_dataprep = OpeaRedisDataprep( + name="OpeaRedisDataprep", + description="OPEA Redis Dataprep Service", + ) + controller.register(redis_dataprep) + elif dataprep_type == "milvus": + milvus_dataprep = OpeaMilvusDataprep( + name="OpeaMilvusDataprep", + description="OPEA Milvus Dataprep Service", + ) + controller.register(milvus_dataprep) + + # Discover and activate a healthy component + controller.discover_and_activate() +except Exception as e: + logger.error(f"Failed to initialize components: {e}") + + +@register_microservice( + name="opea_service@dataprep", + service_type=ServiceType.DATAPREP, + endpoint="/v1/dataprep/ingest", + host="0.0.0.0", + port=5000, +) +@register_statistics(names=["opea_service@dataprep"]) +async def ingest_files( + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), + link_list: Optional[str] = Form(None), + chunk_size: int = Form(1500), + chunk_overlap: int = Form(100), + process_table: bool = Form(False), + table_strategy: str = Form("fast"), +): + start = time.time() + + if logflag: + logger.info(f"[ ingest ] files:{files}") + logger.info(f"[ ingest ] link_list:{link_list}") + + try: + # Use the controller to invoke the active component + response = await controller.ingest_files( + files, link_list, chunk_size, chunk_overlap, process_table, table_strategy + ) + # Log the result if logging is enabled + if logflag: + logger.info(f"[ ingest ] Output generated: {response}") + # Record statistics + statistics_dict["opea_service@dataprep"].append_latency(time.time() - start, None) + return response + except Exception as e: + logger.error(f"Error during dataprep ingest invocation: {e}") + raise + + +@register_microservice( + name="opea_service@dataprep", + service_type=ServiceType.DATAPREP, + endpoint="/v1/dataprep/get", + host="0.0.0.0", + port=5000, +) +@register_statistics(names=["opea_service@dataprep"]) +async def get_files(): + start = time.time() + + if logflag: + logger.info("[ get ] start to get ingested files") + + try: + # Use the controller to invoke the active component + response = await controller.get_files() + # Log the result if logging is enabled + if logflag: + logger.info(f"[ get ] ingested files: {response}") + # Record statistics + statistics_dict["opea_service@dataprep"].append_latency(time.time() - start, None) + return response + except Exception as e: + logger.error(f"Error during dataprep get invocation: {e}") + raise + + +@register_microservice( + name="opea_service@dataprep", + service_type=ServiceType.DATAPREP, + endpoint="/v1/dataprep/delete", + host="0.0.0.0", + port=5000, +) +@register_statistics(names=["opea_service@dataprep"]) +async def delete_files(file_path: str = Body(..., embed=True)): + start = time.time() + + if logflag: + logger.info("[ delete ] start to delete ingested files") + + try: + # Use the controller to invoke the active component + response = await controller.delete_files(file_path) + # Log the result if logging is enabled + if logflag: + logger.info(f"[ delete ] deleted result: {response}") + # Record statistics + statistics_dict["opea_service@dataprep"].append_latency(time.time() - start, None) + return response + except Exception as e: + logger.error(f"Error during dataprep delete invocation: {e}") + raise + + +if __name__ == "__main__": + logger.info("OPEA Dataprep Microservice is starting...") + create_upload_folder(upload_folder) + opea_microservices["opea_service@dataprep"].start() diff --git a/comps/dataprep/src/requirements.txt b/comps/dataprep/src/requirements.txt new file mode 100644 index 000000000..fed324296 --- /dev/null +++ b/comps/dataprep/src/requirements.txt @@ -0,0 +1,33 @@ +beautifulsoup4 +cairosvg +docarray[full] +docx2txt +easyocr +fastapi +html2text +huggingface_hub +langchain --extra-index-url https://download.pytorch.org/whl/cpu +langchain-community --extra-index-url https://download.pytorch.org/whl/cpu +langchain-text-splitters --extra-index-url https://download.pytorch.org/whl/cpu +langchain_huggingface --extra-index-url https://download.pytorch.org/whl/cpu +langchain_milvus --extra-index-url https://download.pytorch.org/whl/cpu +markdown +numpy +openai +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +pandas +Pillow +prometheus-fastapi-instrumentator +pymupdf +pyspark +pytesseract +python-bidi +python-docx +python-pptx +redis +sentence_transformers +shortuuid +unstructured[all-docs] +uvicorn diff --git a/comps/dataprep/utils.py b/comps/dataprep/src/utils.py similarity index 98% rename from comps/dataprep/utils.py rename to comps/dataprep/src/utils.py index cf104017f..f657fd2ce 100644 --- a/comps/dataprep/utils.py +++ b/comps/dataprep/src/utils.py @@ -813,6 +813,19 @@ def format_search_results(response, file_list: list): return file_list +def format_file_list(file_list: list): + res_list = [] + for file_name in file_list: + file_dict = { + "name": decode_filename(file_name), + "id": decode_filename(file_name), + "type": "File", + "parent": "", + } + res_list.append(file_dict) + return res_list + + def remove_folder_with_ignore(folder_path: str, except_patterns: List = []): """Remove the specific folder, and ignore some files/folders. diff --git a/comps/dataprep/vdms/langchain/prepare_doc_vdms.py b/comps/dataprep/vdms/langchain/prepare_doc_vdms.py index a50a95853..a6d1958c1 100644 --- a/comps/dataprep/vdms/langchain/prepare_doc_vdms.py +++ b/comps/dataprep/vdms/langchain/prepare_doc_vdms.py @@ -13,7 +13,7 @@ from langchain_community.vectorstores.vdms import VDMS, VDMS_Client from langchain_text_splitters import HTMLHeaderTextSplitter from comps import CustomLogger, DocPath, opea_microservices, register_microservice -from comps.dataprep.utils import ( +from comps.dataprep.src.utils import ( create_upload_folder, document_loader, encode_filename, diff --git a/comps/dataprep/vdms/langchain/docker-compose-dataprep-vdms.yaml b/comps/dataprep/vdms/langchain/vdms_langchain.yaml similarity index 100% rename from comps/dataprep/vdms/langchain/docker-compose-dataprep-vdms.yaml rename to comps/dataprep/vdms/langchain/vdms_langchain.yaml diff --git a/comps/dataprep/vdms/multimodal_langchain/docker-compose-dataprep-vdms.yaml b/comps/dataprep/vdms/multimodal_langchain/vdms_multimodal_langchain.yaml similarity index 100% rename from comps/dataprep/vdms/multimodal_langchain/docker-compose-dataprep-vdms.yaml rename to comps/dataprep/vdms/multimodal_langchain/vdms_multimodal_langchain.yaml diff --git a/comps/embeddings/deployment/docker_compose/compose_multimodal_bridgetower.yaml b/comps/embeddings/deployment/docker_compose/compose_multimodal_bridgetower.yaml new file mode 100644 index 000000000..ef7e136ae --- /dev/null +++ b/comps/embeddings/deployment/docker_compose/compose_multimodal_bridgetower.yaml @@ -0,0 +1,43 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + embedding-multimodal-bridgetower: + image: opea/embedding-multimodal-bridgetower:latest + container_name: embedding-multimodal-bridgetower + ports: + - ${EMBEDDER_PORT}:${EMBEDDER_PORT} + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PORT: ${EMBEDDER_PORT} + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "http_proxy='' curl -f http://localhost:${EMBEDDER_PORT}/v1/health_check"] + interval: 10s + timeout: 6s + retries: 18 + start_period: 30s + embedding-multimodal: + image: opea/embedding:latest + container_name: embedding-multimodal-bridgetower-server + ports: + - ${MM_EMBEDDING_PORT_MICROSERVICE}:${MM_EMBEDDING_PORT_MICROSERVICE} + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MULTIMODAL_EMBEDDING: true + MMEI_EMBEDDING_ENDPOINT: ${MMEI_EMBEDDING_ENDPOINT} + MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE} + restart: unless-stopped + depends_on: + embedding-multimodal-bridgetower: + condition: service_healthy + +networks: + default: + driver: bridge diff --git a/comps/embeddings/deployment/docker_compose/compose_multimodal_bridgetower_intel_hpu.yaml b/comps/embeddings/deployment/docker_compose/compose_multimodal_bridgetower_intel_hpu.yaml new file mode 100644 index 000000000..347150f45 --- /dev/null +++ b/comps/embeddings/deployment/docker_compose/compose_multimodal_bridgetower_intel_hpu.yaml @@ -0,0 +1,47 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + embedding-multimodal-bridgetower: + image: opea/embedding-multimodal-bridgetower-hpu:latest + container_name: embedding-multimodal-bridgetower + ports: + - ${EMBEDDER_PORT}:${EMBEDDER_PORT} + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PORT: ${EMBEDDER_PORT} + HABANA_VISIBLE_DEVICES: all + runtime: habana + cap_add: + - SYS_NICE + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "http_proxy='' curl -f http://localhost:${EMBEDDER_PORT}/v1/health_check"] + interval: 10s + timeout: 6s + retries: 18 + start_period: 30s + embedding-multimodal: + image: opea/embedding:latest + container_name: embedding-multimodal-bridgetower-server + ports: + - ${MM_EMBEDDING_PORT_MICROSERVICE}:${MM_EMBEDDING_PORT_MICROSERVICE} + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MULTIMODAL_EMBEDDING: true + MMEI_EMBEDDING_ENDPOINT: ${MMEI_EMBEDDING_ENDPOINT} + MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE} + restart: unless-stopped + depends_on: + embedding-multimodal-bridgetower: + condition: service_healthy + +networks: + default: + driver: bridge diff --git a/comps/embeddings/predictionguard/docker_compose_embedding.yaml b/comps/embeddings/deployment/docker_compose/compose_predictionguard.yaml similarity index 90% rename from comps/embeddings/predictionguard/docker_compose_embedding.yaml rename to comps/embeddings/deployment/docker_compose/compose_predictionguard.yaml index 24a3aba5e..192f27815 100644 --- a/comps/embeddings/predictionguard/docker_compose_embedding.yaml +++ b/comps/embeddings/deployment/docker_compose/compose_predictionguard.yaml @@ -3,7 +3,7 @@ services: embedding: - image: opea/embedding-predictionguard:latest + image: opea/embedding:latest container_name: embedding-predictionguard ports: - "6000:6000" diff --git a/comps/embeddings/deployment/docker_compose/compose_tei.yaml b/comps/embeddings/deployment/docker_compose/compose_tei.yaml new file mode 100644 index 000000000..0ab83969e --- /dev/null +++ b/comps/embeddings/deployment/docker_compose/compose_tei.yaml @@ -0,0 +1,43 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-server + ports: + - "6006:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:6006/health"] + interval: 10s + timeout: 6s + retries: 18 + embedding: + image: opea/embedding:latest + container_name: embedding-tei-server + ports: + - "6000:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + depends_on: + tei-embedding-service: + condition: service_healthy + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/embeddings/deployment/kubernetes/README.md b/comps/embeddings/deployment/kubernetes/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/embeddings/mosec/langchain/Dockerfile b/comps/embeddings/mosec/langchain/Dockerfile deleted file mode 100644 index dafccecb0..000000000 --- a/comps/embeddings/mosec/langchain/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/user/comps/embeddings/mosec/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/embeddings/mosec/langchain - -ENTRYPOINT ["python", "embedding_mosec.py"] diff --git a/comps/embeddings/mosec/langchain/README.md b/comps/embeddings/mosec/langchain/README.md deleted file mode 100644 index 2ea3f32bc..000000000 --- a/comps/embeddings/mosec/langchain/README.md +++ /dev/null @@ -1,58 +0,0 @@ -# build Mosec endpoint docker image - -``` -docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/embedding-langchain-mosec-endpoint:latest -f comps/embeddings/mosec/langchain/dependency/Dockerfile . -``` - -## build embedding microservice docker image - -``` -docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/embedding-langchain-mosec:latest -f comps/embeddings/mosec/langchain/Dockerfile . -``` - -## launch Mosec endpoint docker container - -``` -docker run -d --name="embedding-langchain-mosec-endpoint" -p 6001:8000 opea/embedding-langchain-mosec-endpoint:latest -``` - -## launch embedding microservice docker container - -``` -export MOSEC_EMBEDDING_ENDPOINT=http://{mosec_embedding_host_ip}:6001 -docker run -d --name="embedding-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 6000:6000 --ipc=host -e MOSEC_EMBEDDING_ENDPOINT=$MOSEC_EMBEDDING_ENDPOINT opea/embedding-langchain-mosec:latest -``` - -## run client test - -Use our basic API. - -```bash -## query with single text -curl http://localhost:6000/v1/embeddings\ - -X POST \ - -d '{"text":"Hello, world!"}' \ - -H 'Content-Type: application/json' - -## query with multiple texts -curl http://localhost:6000/v1/embeddings\ - -X POST \ - -d '{"text":["Hello, world!","How are you?"]}' \ - -H 'Content-Type: application/json' -``` - -We are also compatible with [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings). - -```bash -## Input single text -curl http://localhost:6000/v1/embeddings\ - -X POST \ - -d '{"input":"Hello, world!"}' \ - -H 'Content-Type: application/json' - -## Input multiple texts with parameters -curl http://localhost:6000/v1/embeddings\ - -X POST \ - -d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \ - -H 'Content-Type: application/json' -``` diff --git a/comps/embeddings/mosec/langchain/dependency/Dockerfile b/comps/embeddings/mosec/langchain/dependency/Dockerfile deleted file mode 100644 index 0fa6bb26e..000000000 --- a/comps/embeddings/mosec/langchain/dependency/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -ARG DEBIAN_FRONTEND=noninteractive - -ENV GLIBC_TUNABLES glibc.cpu.x86_shstk=permissive - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools -RUN pip install --no-cache-dir torch==2.2.2 torchvision --index-url https://download.pytorch.org/whl/cpu -RUN pip install --no-cache-dir intel-extension-for-pytorch==2.2.0 -RUN pip install --no-cache-dir transformers llmspec mosec - -RUN cd /home/user/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-large-zh-v1.5 --local-dir /home/user/bge-large-zh-v1.5 -USER user -ENV MOSEC_EMBEDDING_MODEL="/home/user/bge-large-zh-v1.5/" - -WORKDIR /home/user/comps/embeddings/mosec/langchain/dependency - -CMD ["python3", "server-ipex.py"] diff --git a/comps/embeddings/mosec/langchain/dependency/README.md b/comps/embeddings/mosec/langchain/dependency/README.md deleted file mode 100644 index baf7afca6..000000000 --- a/comps/embeddings/mosec/langchain/dependency/README.md +++ /dev/null @@ -1,43 +0,0 @@ -# Embedding Server - -## 1. Introduction - -This service has an OpenAI compatible restful API to extract text features. -It is dedicated to be used on Xeon to accelerate embedding model serving. -Currently the local model is BGE-large-zh-v1.5. - -## 2. Quick Start - -### 2.1 Build Docker image - -```shell -docker build -t embedding:latest -f ./docker/Dockerfile . -``` - -### 2.2 Launch server - -```shell -docker run -itd -p 8000:8000 embedding:latest -``` - -### 2.3 Client test - -- Restful API by curl - -```shell -curl -X POST http://127.0.0.1:8000/v1/embeddings -H "Content-Type: application/json" -d '{ "model": "/home/user/bge-large-zh-v1.5/", "input": "hello world"}' -``` - -- generate embedding from python - -```python -DEFAULT_MODEL = "/home/user/bge-large-zh-v1.5/" -SERVICE_URL = "http://127.0.0.1:8000" -INPUT_STR = "Hello world!" - -client = Client(api_key="fake", base_url=SERVICE_URL) -emb = client.embeddings.create( - model=DEFAULT_MODEL, - input=INPUT_STR, -) -``` diff --git a/comps/embeddings/mosec/langchain/dependency/server-ipex.py b/comps/embeddings/mosec/langchain/dependency/server-ipex.py deleted file mode 100644 index 246c17e40..000000000 --- a/comps/embeddings/mosec/langchain/dependency/server-ipex.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -from typing import List, Union - -import intel_extension_for_pytorch as ipex -import torch # type: ignore -import torch.nn.functional as F # type: ignore -import transformers # type: ignore -from llmspec import EmbeddingData, EmbeddingRequest, EmbeddingResponse, TokenUsage -from mosec import Runtime, Server, Worker - -DEFAULT_MODEL = "/home/user/bge-large-zh-v1.5/" - - -class Embedding(Worker): - def __init__(self): - self.model_name = os.environ.get("MOSEC_EMBEDDING_MODEL", DEFAULT_MODEL) - self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name) - self.model = transformers.AutoModel.from_pretrained(self.model_name) - self.device = torch.cuda.current_device() if torch.cuda.is_available() else "cpu" - - self.model = self.model.to(self.device) - self.model.eval() - - # jit trace model - self.model = ipex.optimize(self.model, dtype=torch.bfloat16) - vocab_size = self.model.config.vocab_size - batch_size = 16 - seq_length = 512 - d = torch.randint(vocab_size, size=[batch_size, seq_length]) - t = torch.randint(0, 1, size=[batch_size, seq_length]) - m = torch.randint(1, 2, size=[batch_size, seq_length]) - model_inputs = [d] - if "token_type_ids" in self.tokenizer.model_input_names: - model_inputs.append(t) - if "attention_mask" in self.tokenizer.model_input_names: - model_inputs.append(m) - self.model = torch.jit.trace(self.model, model_inputs, check_trace=False, strict=False) - self.model = torch.jit.freeze(self.model) - self.model(*model_inputs) - - def get_embedding_with_token_count(self, sentences: Union[str, List[Union[str, List[int]]]]): - # Mean Pooling - Take attention mask into account for correct averaging - def mean_pooling(model_output, attention_mask): - # First element of model_output contains all token embeddings - token_embeddings = model_output["last_hidden_state"] - input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() - return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp( - input_mask_expanded.sum(1), min=1e-9 - ) - - # Tokenize sentences - # TODO: support `List[List[int]]` input - encoded_input = self.tokenizer(sentences, padding=True, truncation=True, return_tensors="pt") - inputs = encoded_input.to(self.device) - token_count = inputs["attention_mask"].sum(dim=1).tolist() - # Compute token embeddings - model_output = self.model(**inputs) - # Perform pooling - sentence_embeddings = mean_pooling(model_output, inputs["attention_mask"]) - # Normalize embeddings - sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1) - - return token_count, sentence_embeddings - - def deserialize(self, data: bytes) -> EmbeddingRequest: - return EmbeddingRequest.from_bytes(data) - - def serialize(self, data: EmbeddingResponse) -> bytes: - return data.to_json() - - def forward(self, data: List[EmbeddingRequest]) -> List[EmbeddingResponse]: - inputs = [] - inputs_lens = [] - for d in data: - inputs.extend(d.input if isinstance(d.input, list) else [d.input]) - inputs_lens.append(len(d.input) if isinstance(d.input, list) else 1) - token_cnt, embeddings = self.get_embedding_with_token_count(inputs) - - embeddings = embeddings.detach() - if self.device != "cpu": - embeddings = embeddings.cpu() - embeddings = embeddings.numpy() - embeddings = [emb.tolist() for emb in embeddings] - - resp = [] - emb_idx = 0 - for lens in inputs_lens: - token_count = sum(token_cnt[emb_idx : emb_idx + lens]) - resp.append( - EmbeddingResponse( - data=[ - EmbeddingData(embedding=emb, index=i) - for i, emb in enumerate(embeddings[emb_idx : emb_idx + lens]) - ], - model=self.model_name, - usage=TokenUsage( - prompt_tokens=token_count, - # No completions performed, only embeddings generated. - completion_tokens=0, - total_tokens=token_count, - ), - ) - ) - emb_idx += lens - return resp - - -if __name__ == "__main__": - MAX_BATCH_SIZE = int(os.environ.get("MAX_BATCH_SIZE", 128)) - MAX_WAIT_TIME = int(os.environ.get("MAX_WAIT_TIME", 10)) - MAX_FORWARD_TIMEOUT = int(os.environ.get("FORWARD_TIMEOUT", 60)) - server = Server() - emb = Runtime(Embedding, max_batch_size=MAX_BATCH_SIZE, max_wait_time=MAX_WAIT_TIME, timeout=MAX_FORWARD_TIMEOUT) - server.register_runtime( - { - "/v1/embeddings": [emb], - "/embeddings": [emb], - } - ) - server.run() diff --git a/comps/embeddings/mosec/langchain/dependency/test-embedding.py b/comps/embeddings/mosec/langchain/dependency/test-embedding.py deleted file mode 100644 index 67a3939e1..000000000 --- a/comps/embeddings/mosec/langchain/dependency/test-embedding.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""OpenAI embedding client example.""" - -from openai import Client - -DEFAULT_MODEL = "/home/user/bge-large-zh-v1.5/" -SERVICE_URL = "http://127.0.0.1:8000" -INPUT_STR = "Hello world!" - -client = Client(api_key="fake", base_url=SERVICE_URL) -emb = client.embeddings.create( - model=DEFAULT_MODEL, - input=INPUT_STR, -) - -print(len(emb.data)) # type: ignore -print(emb.data[0].embedding) # type: ignore diff --git a/comps/embeddings/mosec/langchain/docker_compose_embedding.yaml b/comps/embeddings/mosec/langchain/docker_compose_embedding.yaml deleted file mode 100644 index a0cc787d0..000000000 --- a/comps/embeddings/mosec/langchain/docker_compose_embedding.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - embedding: - image: opea/embedding-langchain-mosec:latest - container_name: embedding-langchain-mosec-server - ports: - - "6000:6000" - ipc: host - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - MOSEC_EMBEDDING_ENDPOINT: ${MOSEC_EMBEDDING_ENDPOINT} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/embeddings/mosec/langchain/embedding_mosec.py b/comps/embeddings/mosec/langchain/embedding_mosec.py deleted file mode 100644 index e422d92b6..000000000 --- a/comps/embeddings/mosec/langchain/embedding_mosec.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import asyncio -import os -import time -from typing import List, Optional, Union - -from langchain_community.embeddings import OpenAIEmbeddings -from langchain_community.embeddings.openai import async_embed_with_retry - -from comps import ( - CustomLogger, - EmbedDoc, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) -from comps.cores.proto.api_protocol import ( - ChatCompletionRequest, - EmbeddingRequest, - EmbeddingResponse, - EmbeddingResponseData, -) - -logger = CustomLogger("embedding_mosec") -logflag = os.getenv("LOGFLAG", False) - - -class MosecEmbeddings(OpenAIEmbeddings): - async def _aget_len_safe_embeddings( - self, texts: List[str], *, engine: str, chunk_size: Optional[int] = None - ) -> List[List[float]]: - _chunk_size = chunk_size or self.chunk_size - batched_embeddings: List[List[float]] = [] - response = await async_embed_with_retry(self, input=texts, **self._invocation_params) - if not isinstance(response, dict): - response = response.model_dump() - batched_embeddings.extend(r["embedding"] for r in response["data"]) - - _cached_empty_embedding: Optional[List[float]] = None - - async def empty_embedding() -> List[float]: - nonlocal _cached_empty_embedding - if _cached_empty_embedding is None: - average_embedded = await async_embed_with_retry(self, input="", **self._invocation_params) - if not isinstance(average_embedded, dict): - average_embedded = average_embedded.model_dump() - _cached_empty_embedding = average_embedded["data"][0]["embedding"] - return _cached_empty_embedding - - async def get_embedding(e: Optional[List[float]]) -> List[float]: - return e if e is not None else await empty_embedding() - - embeddings = await asyncio.gather(*[get_embedding(e) for e in batched_embeddings]) - return embeddings - - def _get_len_safe_embeddings( - self, texts: List[str], *, engine: str, chunk_size: Optional[int] = None - ) -> List[List[float]]: - _chunk_size = chunk_size or self.chunk_size - batched_embeddings: List[List[float]] = [] - response = self.client.create(input=texts, **self._invocation_params) - if not isinstance(response, dict): - response = response.model_dump() - batched_embeddings.extend(r["embedding"] for r in response["data"]) - - _cached_empty_embedding: Optional[List[float]] = None - - def empty_embedding() -> List[float]: - nonlocal _cached_empty_embedding - if _cached_empty_embedding is None: - average_embedded = self.client.create(input="", **self._invocation_params) - if not isinstance(average_embedded, dict): - average_embedded = average_embedded.model_dump() - _cached_empty_embedding = average_embedded["data"][0]["embedding"] - return _cached_empty_embedding - - return [e if e is not None else empty_embedding() for e in batched_embeddings] - - -@register_microservice( - name="opea_service@embedding_mosec", - service_type=ServiceType.EMBEDDING, - endpoint="/v1/embeddings", - host="0.0.0.0", - port=6000, - input_datatype=TextDoc, - output_datatype=EmbedDoc, -) -@register_statistics(names=["opea_service@embedding_mosec"]) -def embedding( - input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest] -) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]: - if logflag: - logger.info(input) - start = time.time() - if isinstance(input, TextDoc): - embed_vector = get_embeddings(input.text) - embedding_res = embed_vector[0] if isinstance(input.text, str) else embed_vector - res = EmbedDoc(text=input.text, embedding=embedding_res) - else: - embed_vector = get_embeddings(input.input) - if input.dimensions is not None: - embed_vector = [embed_vector[i][: input.dimensions] for i in range(len(embed_vector))] - - # for standard openai embedding format - res = EmbeddingResponse( - data=[EmbeddingResponseData(index=i, embedding=embed_vector[i]) for i in range(len(embed_vector))] - ) - - if isinstance(input, ChatCompletionRequest): - input.embedding = res - # keep - res = input - - statistics_dict["opea_service@embedding_mosec"].append_latency(time.time() - start, None) - if logflag: - logger.info(res) - return res - - -def get_embeddings(text: Union[str, List[str]]) -> List[List[float]]: - texts = [text] if isinstance(text, str) else text - embed_vector = embeddings.embed_documents(texts) - return embed_vector - - -if __name__ == "__main__": - MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "http://127.0.0.1:8080") - os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT - os.environ["OPENAI_API_KEY"] = "Dummy key" - MODEL_ID = "/home/user/bge-large-zh-v1.5" - embeddings = MosecEmbeddings(model=MODEL_ID) - logger.info("Mosec Embedding initialized.") - opea_microservices["opea_service@embedding_mosec"].start() diff --git a/comps/embeddings/mosec/langchain/requirements.txt b/comps/embeddings/mosec/langchain/requirements.txt deleted file mode 100644 index 9fa1a059c..000000000 --- a/comps/embeddings/mosec/langchain/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -docarray[full] -fastapi -langchain -langchain_community -openai -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -shortuuid -uvicorn diff --git a/comps/embeddings/multimodal/README.md b/comps/embeddings/multimodal/README.md deleted file mode 100644 index c75a60f12..000000000 --- a/comps/embeddings/multimodal/README.md +++ /dev/null @@ -1,185 +0,0 @@ -# Multimodal Embeddings Microservice - -The Multimodal Embedding Microservice is designed to efficiently convert pairs of textual string and image into vectorized embeddings, facilitating seamless integration into various machine learning and data processing workflows. This service utilizes advanced algorithms to generate high-quality embeddings that capture the joint semantic essence of the input text-and-image pairs, making it ideal for applications in multi-modal data processing, information retrieval, and similar fields. - -Key Features: - -**High Performance**: Optimized for quick and reliable conversion of textual data and image inputs into vector embeddings. - -**Scalability**: Built to handle high volumes of requests simultaneously, ensuring robust performance even under heavy loads. - -**Ease of Integration**: Provides a simple and intuitive API, allowing for straightforward integration into existing systems and workflows. - -**Customizable**: Supports configuration and customization to meet specific use case requirements, including different embedding models and preprocessing techniques. - -Users are albe to configure and build embedding-related services according to their actual needs. - -## 🚀1. Start Microservice with Python (Option 1) - -Currently, we provide two ways to implement the multimodal embedding service: - -1. Build the multimodal embedding model **locally** from the server, which is faster, but takes up memory on the local server. -2. Build it based on the multimodal embedding inference endpoint (**MMEI endpoint**), which provides more flexibility, but may bring some network latency. - -For both of the implementations, you need to install requirements first. - -### 1.1 Install Requirements - -```bash -# run with langchain -pip install -r multimodal_langchain/requirements.txt -``` - -### 1.2 Start Embedding Service - -You can select one of the following to start the multimodal embedding service: - -**Start Multimodal Embedding Service with MMEI** - -First, you need to start a MMEI service. - -```bash -export your_mmei_port=8080 -export EMBEDDER_PORT=$your_mmei_port -``` - -Currently, we employ [**BridgeTower**](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-gaudi) model for MMEI and provide two ways to start MMEI: - -1. Start MMEI on Gaudi2 HPU -2. Start MMEI on Xeon CPU (if Gaudi2 HPU is not available) - -- Gaudi2 HPU - -```bash -cd ../../.. -docker build -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu . -cd comps/embeddings/multimodal/bridgetower/ -docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d -``` - -- Xeon CPU - -```bash -cd ../../.. -docker build -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile . -cd comps/embeddings/multimodal/bridgetower/ -docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d -``` - -Then you need to test your MMEI service using the following commands: - -```bash -curl http://localhost:$your_mmei_port/v1/encode \ - -X POST \ - -H "Content-Type:application/json" \ - -d '{"text":"This is example"}' -``` - -Start the embedding service with MMEI_EMBEDDING_ENDPOINT. - -```bash -# run with langchain -cd multimodal_langchain -export MMEI_EMBEDDING_ENDPOINT="http://localhost:$your_mmei_port/v1/encode" -export your_embedding_port_microservice=6600 -export MM_EMBEDDING_PORT_MICROSERVICE=$your_embedding_port_microservice -python mm_embedding_mmei.py -``` - -**Start Embedding Service with Local Model** - -```bash -# run with langchain -cd multimodal_langchain -export your_embedding_port_microservice=6600 -export MM_EMBEDDING_PORT_MICROSERVICE=$your_embedding_port_microservice -python local_mm_embedding.py -``` - -## 🚀2. Start Microservice with Docker (Option 2) - -### 2.1 Start Multimodal Embedding Inference (MMEI) Service - -First, you need to start a MMEI service. - -```bash -export your_mmei_port=8080 -export EMBEDDER_PORT=$your_mmei_port -``` - -Currently, we employ [**BridgeTower**](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-gaudi) model for MMEI and provide two ways to start MMEI: - -1. Start MMEI on Gaudi2 HPU -2. Start MMEI on Xeon CPU (if Gaudi2 HPU is not available) - -- Gaudi2 HPU - -```bash -cd ../../.. -docker build -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu . -cd comps/embeddings/multimodal/bridgetower/ -docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d -``` - -- Xeon CPU - -```bash -cd ../../.. -docker build -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile . -cd comps/embeddings/multimodal/bridgetower/ -docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d -``` - -Then you need to test your MMEI service using the following commands: - -```bash -curl http://localhost:$your_mmei_port/v1/encode \ - -X POST \ - -H "Content-Type:application/json" \ - -d '{"text":"This is example"}' -``` - -Export the `MMEI_EMBEDDING_ENDPOINT` for later usage: - -```bash -export ip_address=$(hostname -I | awk '{print $1}') -export MMEI_EMBEDDING_ENDPOINT="http://$ip_address:$your_mmei_port/v1/encode" -``` - -### 2.2 Build Docker Image - -#### Build Langchain Docker - -```bash -cd ../../.. -docker build -t opea/embedding-multimodal:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/multimodal_langchain/Dockerfile . -``` - -### 2.3 Run Docker with Docker Compose - -```bash -cd multimodal_langchain -export your_embedding_port_microservice=6600 -export MM_EMBEDDING_PORT_MICROSERVICE=$your_embedding_port_microservice -docker compose -f docker_compose_multimodal_embedding.yaml up -d -``` - -## 🚀3. Consume Embedding Service - -### 2.2 Consume Embedding Service - -**Compute a joint embedding of an image-text pair** - -```bash -curl -X POST http://0.0.0.0:6600/v1/embeddings \ - -H "Content-Type: application/json" \ - -d '{"text": {"text" : "This is some sample text."}, "image" : {"url": "https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true"}}' -``` - -**Compute an embedding of a text** - -```bash -curl -X POST http://0.0.0.0:6600/v1/embeddings \ - -H "Content-Type: application/json" \ - -d '{"text" : "This is some sample text."}' -``` diff --git a/comps/embeddings/multimodal/multimodal_langchain/Dockerfile b/comps/embeddings/multimodal/multimodal_langchain/Dockerfile deleted file mode 100644 index 282581f1f..000000000 --- a/comps/embeddings/multimodal/multimodal_langchain/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/user/comps/embeddings/multimodal/multimodal_langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/embeddings/multimodal/multimodal_langchain - -ENTRYPOINT ["python", "mm_embedding_mmei.py"] diff --git a/comps/embeddings/multimodal/multimodal_langchain/docker_compose_multimodal_embedding.yaml b/comps/embeddings/multimodal/multimodal_langchain/docker_compose_multimodal_embedding.yaml deleted file mode 100644 index 74927b25e..000000000 --- a/comps/embeddings/multimodal/multimodal_langchain/docker_compose_multimodal_embedding.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - embedding-multimodal: - image: opea/embedding-multimodal:latest - container_name: embedding-multimodal - ports: - - ${MM_EMBEDDING_PORT_MICROSERVICE}:${MM_EMBEDDING_PORT_MICROSERVICE} - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - MMEI_EMBEDDING_ENDPOINT: ${MMEI_EMBEDDING_ENDPOINT} - MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/embeddings/multimodal/multimodal_langchain/local_mm_embedding.py b/comps/embeddings/multimodal/multimodal_langchain/local_mm_embedding.py deleted file mode 100644 index 7728f5eb8..000000000 --- a/comps/embeddings/multimodal/multimodal_langchain/local_mm_embedding.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -from comps import ( - CustomLogger, - EmbedDoc, - EmbedMultimodalDoc, - MultimodalDoc, - ServiceType, - TextDoc, - TextImageDoc, - opea_microservices, - register_microservice, -) -from comps.embeddings.multimodal.bridgetower import BridgeTowerEmbedding - -logger = CustomLogger("local_multimodal_embedding") -logflag = os.getenv("LOGFLAG", False) - -port = int(os.getenv("MM_EMBEDDING_PORT_MICROSERVICE", 6600)) - - -@register_microservice( - name="opea_service@local_multimodal_embedding", - service_type=ServiceType.EMBEDDING, - endpoint="/v1/embeddings", - host="0.0.0.0", - port=port, - input_datatype=MultimodalDoc, - output_datatype=EmbedMultimodalDoc, -) -def embedding(input: MultimodalDoc) -> EmbedDoc: - if logflag: - logger.info(input) - - if isinstance(input, TextDoc): - # Handle text input - embed_vector = embeddings.embed_query(input.text) - res = EmbedDoc(text=input.text, embedding=embed_vector) - - elif isinstance(input, TextImageDoc): - # Handle text + image input - pil_image = input.image.url.load_pil() - embed_vector = embeddings.embed_image_text_pairs([input.text.text], [pil_image], batch_size=1)[0] - res = EmbedMultimodalDoc(text=input.text.text, url=input.image.url, embedding=embed_vector) - else: - raise ValueError("Invalid input type") - - if logflag: - logger.info(res) - return res - - -if __name__ == "__main__": - embeddings = BridgeTowerEmbedding() - opea_microservices["opea_service@local_multimodal_embedding"].start() diff --git a/comps/embeddings/multimodal/multimodal_langchain/mm_embedding_mmei.py b/comps/embeddings/multimodal/multimodal_langchain/mm_embedding_mmei.py deleted file mode 100644 index fbd972a20..000000000 --- a/comps/embeddings/multimodal/multimodal_langchain/mm_embedding_mmei.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import base64 -import os -import time - -import requests -from fastapi.responses import JSONResponse - -from comps import ( - CustomLogger, - EmbedDoc, - EmbedMultimodalDoc, - MultimodalDoc, - ServiceType, - TextDoc, - TextImageDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) - -logger = CustomLogger("multimodal_embedding_mmei_langchain") -logflag = os.getenv("LOGFLAG", False) -port = int(os.getenv("MM_EMBEDDING_PORT_MICROSERVICE", 6600)) -headers = {"Content-Type": "application/json"} - - -@register_microservice( - name="opea_service@multimodal_embedding_mmei_langchain", - service_type=ServiceType.EMBEDDING, - endpoint="/v1/embeddings", - host="0.0.0.0", - port=port, - input_datatype=MultimodalDoc, - output_datatype=EmbedMultimodalDoc, -) -@register_statistics(names=["opea_service@multimodal_embedding_mmei_langchain"]) -def embedding(input: MultimodalDoc) -> EmbedDoc: - start = time.time() - if logflag: - logger.info(input) - - json = {} - if isinstance(input, TextDoc): - json["text"] = input.text - elif isinstance(input, TextImageDoc): - json["text"] = input.text.text - img_bytes = input.image.url.load_bytes() - base64_img = base64.b64encode(img_bytes).decode("utf-8") - json["img_b64_str"] = base64_img - else: - return JSONResponse(status_code=400, content={"message": "Bad request!"}) - - # call multimodal embedding endpoint - try: - response = requests.post(mmei_embedding_endpoint, headers=headers, json=json) - if response.status_code != 200: - return JSONResponse(status_code=503, content={"message": "Multimodal embedding endpoint failed!"}) - - response_json = response.json() - embed_vector = response_json["embedding"] - if isinstance(input, TextDoc): - res = EmbedDoc(text=input.text, embedding=embed_vector) - elif isinstance(input, TextImageDoc): - res = EmbedMultimodalDoc(text=input.text.text, url=input.image.url, embedding=embed_vector) - except requests.exceptions.ConnectionError: - res = JSONResponse(status_code=503, content={"message": "Multimodal embedding endpoint not started!"}) - statistics_dict["opea_service@multimodal_embedding_mmei_langchain"].append_latency(time.time() - start, None) - if logflag: - logger.info(res) - return res - - -if __name__ == "__main__": - url_endpoint = os.getenv("MMEI_EMBEDDING_HOST_ENDPOINT", "http://0.0.0.0") - port_endpoint = os.getenv("MMEI_EMBEDDING_PORT_ENDPOINT", "8080") - path_endpoint = os.getenv("MMEI_EMBEDDING_PATH_ENDPOINT", "/v1/encode") - - mmei_embedding_endpoint = os.getenv("MMEI_EMBEDDING_ENDPOINT", f"{url_endpoint}:{port_endpoint}{path_endpoint}") - logger.info(f"MMEI Gaudi Embedding initialized at {mmei_embedding_endpoint}") - opea_microservices["opea_service@multimodal_embedding_mmei_langchain"].start() diff --git a/comps/embeddings/predictionguard/Dockerfile b/comps/embeddings/predictionguard/Dockerfile deleted file mode 100644 index 4176e1f71..000000000 --- a/comps/embeddings/predictionguard/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (C) 2024 Prediction Guard, Inc -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -COPY comps /home/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/comps/embeddings/predictionguard/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home - -WORKDIR /home/comps/embeddings/predictionguard - -ENTRYPOINT ["python", "embedding_predictionguard.py"] - diff --git a/comps/embeddings/predictionguard/README.md b/comps/embeddings/predictionguard/README.md deleted file mode 100644 index bec54350c..000000000 --- a/comps/embeddings/predictionguard/README.md +++ /dev/null @@ -1,64 +0,0 @@ -# Embedding Generation Prediction Guard Microservice - -[Prediction Guard](https://docs.predictionguard.com) allows you to utilize hosted open access LLMs, LVMs, and embedding functionality with seamlessly integrated safeguards. In addition to providing a scalable access to open models, Prediction Guard allows you to configure factual consistency checks, toxicity filters, PII filters, and prompt injection blocking. Join the [Prediction Guard Discord channel](https://discord.gg/TFHgnhAFKd) and request an API key to get started. - -This embedding microservice is designed to efficiently convert text into vectorized embeddings using the [BridgeTower model](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-itc). Thus, it is ideal for both RAG or semantic search applications. - -**Note** - The BridgeTower model implemented in Prediction Guard can actually embed text, images, or text + images (jointly). For now this service only embeds text, but a follow on contribution will enable the multimodal functionality. - -## 🚀 Start Microservice with Docker - -### Setup Environment Variables - -Setup the following environment variables first - -```bash -export PREDICTIONGUARD_API_KEY=${your_predictionguard_api_key} -``` - -### Build Docker Images - -```bash -cd ../../.. -docker build -t opea/embedding-predictionguard:latest -f comps/embeddings/predictionguard/Dockerfile . -``` - -### Start Service - -```bash -docker run -d --name="embedding-predictionguard" -p 6000:6000 -e PREDICTIONGUARD_API_KEY=$PREDICTIONGUARD_API_KEY opea/embedding-predictionguard:latest -``` - -## 🚀 Consume Embeddings Service - -Use our basic API. - -```bash -## query with single text -curl http://localhost:6000/v1/embeddings\ - -X POST \ - -d '{"text":"Hello, world!"}' \ - -H 'Content-Type: application/json' - -## query with multiple texts -curl http://localhost:6000/v1/embeddings\ - -X POST \ - -d '{"text":["Hello, world!","How are you?"]}' \ - -H 'Content-Type: application/json' -``` - -We are also compatible with [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings). - -```bash -## Input single text -curl http://localhost:6000/v1/embeddings\ - -X POST \ - -d '{"input":"Hello, world!"}' \ - -H 'Content-Type: application/json' - -## Input multiple texts with parameters -curl http://localhost:6000/v1/embeddings\ - -X POST \ - -d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \ - -H 'Content-Type: application/json' -``` diff --git a/comps/embeddings/predictionguard/embedding_predictionguard.py b/comps/embeddings/predictionguard/embedding_predictionguard.py deleted file mode 100644 index f5274e3b5..000000000 --- a/comps/embeddings/predictionguard/embedding_predictionguard.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Prediction Guard, Inc. -# SPDX-License-Identified: Apache-2.0 - - -import os -import time -from typing import List, Optional, Union - -from predictionguard import PredictionGuard - -from comps import ( - CustomLogger, - EmbedDoc, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) -from comps.cores.proto.api_protocol import ( - ChatCompletionRequest, - EmbeddingRequest, - EmbeddingResponse, - EmbeddingResponseData, -) - -logger = CustomLogger("embedding_predictionguard") -logflag = os.getenv("LOGFLAG", False) - -# Initialize Prediction Guard client -client = PredictionGuard() - - -@register_microservice( - name="opea_service@embedding_predictionguard", - service_type=ServiceType.EMBEDDING, - endpoint="/v1/embeddings", - host="0.0.0.0", - port=6000, - input_datatype=TextDoc, - output_datatype=EmbedDoc, -) -@register_statistics(names=["opea_service@embedding_predictionguard"]) -async def embedding( - input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest] -) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]: - if logflag: - logger.info(input) - start = time.time() - - if isinstance(input, TextDoc): - embed_vector = await get_embeddings(input.text) - embedding_res = embed_vector[0] if isinstance(input.text, str) else embed_vector - res = EmbedDoc(text=input.text, embedding=embedding_res) - else: - embed_vector = await get_embeddings(input.input) - input.dimensions = input.dimensions if input.dimensions is not None else 512 - embed_vector = [embed_vector[i][: input.dimensions] for i in range(len(embed_vector))] - - # for standard openai embedding format - res = EmbeddingResponse( - data=[EmbeddingResponseData(index=i, embedding=embed_vector[i]) for i in range(len(embed_vector))] - ) - - if isinstance(input, ChatCompletionRequest): - input.embedding = res - # keep - res = input - - statistics_dict["opea_service@embedding_predictionguard"].append_latency(time.time() - start, None) - if logflag: - logger.info(res) - return res - - -async def get_embeddings(text: Union[str, List[str]]) -> List[List[float]]: - texts = [text] if isinstance(text, str) else text - texts = [{"text": texts[i]} for i in range(len(texts))] - response = client.embeddings.create(model=pg_embedding_model_name, input=texts)["data"] - embed_vector = [response[i]["embedding"] for i in range(len(response))] - return embed_vector - - -if __name__ == "__main__": - pg_embedding_model_name = os.getenv("PG_EMBEDDING_MODEL_NAME", "bridgetower-large-itm-mlm-itc") - print("Prediction Guard Embedding initialized.") - opea_microservices["opea_service@embedding_predictionguard"].start() diff --git a/comps/embeddings/src/Dockerfile b/comps/embeddings/src/Dockerfile new file mode 100644 index 000000000..55990a583 --- /dev/null +++ b/comps/embeddings/src/Dockerfile @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +COPY comps /home/comps + +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + pip install --no-cache-dir -r /home/comps/embeddings/src/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home + +WORKDIR /home/comps/embeddings/src/ + +ENV MULTIMODAL_EMBEDDING="false" + +ENTRYPOINT ["sh", "-c", "python $( [ \"$MULTIMODAL_EMBEDDING\" = \"true\" ] && echo 'opea_multimodal_embedding_microservice.py' || echo 'opea_embedding_microservice.py')"] + diff --git a/comps/embeddings/README.md b/comps/embeddings/src/README.md similarity index 61% rename from comps/embeddings/README.md rename to comps/embeddings/src/README.md index 74117982f..a22f81e49 100644 --- a/comps/embeddings/README.md +++ b/comps/embeddings/src/README.md @@ -13,25 +13,3 @@ Key Features: **Customizable**: Supports configuration and customization to meet specific use case requirements, including different embedding models and preprocessing techniques. Users are albe to configure and build embedding-related services according to their actual needs. - -## Embeddings Microservice with TEI - -We support both `langchain` and `llama_index` for TEI serving. - -For details, please refer to [langchain readme](tei/langchain/README.md) or [llama index readme](tei/llama_index/README.md). - -## Embeddings Microservice with Mosec - -For details, please refer to this [readme](mosec/langchain/README.md). - -## Embeddings Microservice with Multimodal - -For details, please refer to this [readme](multimodal/README.md). - -## Embeddings Microservice with Multimodal Clip - -For details, please refer to this [readme](multimodal_clip/README.md). - -## Embeddings Microservice with Prediction Guard - -For details, please refer to this [readme](predictionguard/README.md). diff --git a/comps/embeddings/mosec/langchain/__init__.py b/comps/embeddings/src/__init__.py similarity index 100% rename from comps/embeddings/mosec/langchain/__init__.py rename to comps/embeddings/src/__init__.py diff --git a/comps/embeddings/multimodal/__init__.py b/comps/embeddings/src/integrations/__init__.py similarity index 100% rename from comps/embeddings/multimodal/__init__.py rename to comps/embeddings/src/integrations/__init__.py diff --git a/comps/embeddings/multimodal/bridgetower/Dockerfile b/comps/embeddings/src/integrations/dependency/bridgetower/Dockerfile similarity index 69% rename from comps/embeddings/multimodal/bridgetower/Dockerfile rename to comps/embeddings/src/integrations/dependency/bridgetower/Dockerfile index 6045d6ed1..c2815dfe0 100644 --- a/comps/embeddings/multimodal/bridgetower/Dockerfile +++ b/comps/embeddings/src/integrations/dependency/bridgetower/Dockerfile @@ -5,6 +5,7 @@ FROM python:3.10-slim RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ +RUN apt-get update && apt-get install -y curl USER user # Set environment variables ENV LANG=en_US.UTF-8 @@ -13,13 +14,14 @@ ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana COPY --chown=user comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/user/comps/embeddings/multimodal/multimodal_langchain/requirements.txt + pip install --no-cache-dir -r /home/user/comps/embeddings/src/integrations/dependency/bridgetower/requirements.txt + ENV PYTHONPATH=$PYTHONPATH:/home/user ARG EMBEDDER_PORT=8080 ENV PORT=$EMBEDDER_PORT -WORKDIR /home/user/comps/embeddings/multimodal/bridgetower +WORKDIR /home/user/comps/embeddings/src/integrations/dependency/bridgetower/ ENTRYPOINT ["python", "bridgetower_server.py", "--device", "cpu"] diff --git a/comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu b/comps/embeddings/src/integrations/dependency/bridgetower/Dockerfile.intel_hpu similarity index 75% rename from comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu rename to comps/embeddings/src/integrations/dependency/bridgetower/Dockerfile.intel_hpu index 86c3ca7ad..1c2427743 100644 --- a/comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu +++ b/comps/embeddings/src/integrations/dependency/bridgetower/Dockerfile.intel_hpu @@ -8,6 +8,7 @@ RUN useradd -m -s /bin/bash user && \ chown -R user /home/user/ RUN rm -rf /etc/ssh/ssh_host* +RUN apt-get update && apt-get install -y curl USER user # Set environment variables ENV LANG=en_US.UTF-8 @@ -17,7 +18,7 @@ COPY --chown=user comps /home/user/comps # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/user/comps/embeddings/multimodal/multimodal_langchain/requirements.txt && \ + pip install --no-cache-dir -r /home/user/comps/embeddings/src/integrations/dependency/bridgetower/requirements.txt && \ pip install --no-cache-dir optimum[habana] ENV PYTHONPATH=$PYTHONPATH:/home/user @@ -25,5 +26,6 @@ ENV PYTHONPATH=$PYTHONPATH:/home/user ARG EMBEDDER_PORT=8080 ENV PORT=$EMBEDDER_PORT -WORKDIR /home/user/comps/embeddings/multimodal/bridgetower +WORKDIR /home/user/comps/embeddings/src/integrations/dependency/bridgetower/ ENTRYPOINT ["python", "bridgetower_server.py", "--device", "hpu"] + diff --git a/comps/embeddings/src/integrations/dependency/bridgetower/README.md b/comps/embeddings/src/integrations/dependency/bridgetower/README.md new file mode 100644 index 000000000..f7cdeb578 --- /dev/null +++ b/comps/embeddings/src/integrations/dependency/bridgetower/README.md @@ -0,0 +1,50 @@ +# Multimodal Embeddings Microservice with BridgePower + +The Multimodal Embedding Microservice is designed to efficiently convert pairs of textual string and image into vectorized embeddings, facilitating seamless integration into various machine learning and data processing workflows. This service utilizes advanced algorithms to generate high-quality embeddings that capture the joint semantic essence of the input text-and-image pairs, making it ideal for applications in multi-modal data processing, information retrieval, and similar fields. + +Key Features: + +**High Performance**: Optimized for quick and reliable conversion of textual data and image inputs into vector embeddings. + +**Scalability**: Built to handle high volumes of requests simultaneously, ensuring robust performance even under heavy loads. + +**Ease of Integration**: Provides a simple and intuitive API, allowing for straightforward integration into existing systems and workflows. + +**Customizable**: Supports configuration and customization to meet specific use case requirements, including different embedding models and preprocessing techniques. + +Users are albe to configure and build embedding-related services according to their actual needs. + +Currently, we employ [**BridgeTower**](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-gaudi) model for MMEI and provide two ways to start MMEI: + +## 🚀1. Start MMEI on Gaudi2 HPU + +- Gaudi2 HPU + +```bash +cd ../../../../../../../ +docker build -t opea/embedding-multimodal-bridgetower-hpu:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/integrations/dependency/multimodal/bridgetower/Dockerfile.intel_hpu . +cd comps/embeddings/src/integrations/dependency/multimodal/bridgetower/ +docker compose -f compose_intel_hpu.yaml up -d +``` + +## 🚀2. Start MMEI on Xeon CPU + +- Xeon CPU + +```bash +cd ../../../../../../../ +docker build -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/integrations/dependency/multimodal/bridgetower/Dockerfile . +cd comps/embeddings/src/integrations/dependency/multimodal/bridgetower/ +docker compose -f compose_intel_cpu.yaml up -d +``` + +## 🚀3. Access the service + +Then you need to test your MMEI service using the following commands: + +```bash +curl http://localhost:$your_mmei_port/v1/encode \ + -X POST \ + -H "Content-Type:application/json" \ + -d '{"text":"This is example"}' +``` diff --git a/comps/embeddings/multimodal/bridgetower/__init__.py b/comps/embeddings/src/integrations/dependency/bridgetower/__init__.py similarity index 100% rename from comps/embeddings/multimodal/bridgetower/__init__.py rename to comps/embeddings/src/integrations/dependency/bridgetower/__init__.py diff --git a/comps/embeddings/multimodal/bridgetower/bridgetower_custom.py b/comps/embeddings/src/integrations/dependency/bridgetower/bridgetower_custom.py similarity index 100% rename from comps/embeddings/multimodal/bridgetower/bridgetower_custom.py rename to comps/embeddings/src/integrations/dependency/bridgetower/bridgetower_custom.py diff --git a/comps/embeddings/multimodal/bridgetower/bridgetower_embedding.py b/comps/embeddings/src/integrations/dependency/bridgetower/bridgetower_embedding.py similarity index 100% rename from comps/embeddings/multimodal/bridgetower/bridgetower_embedding.py rename to comps/embeddings/src/integrations/dependency/bridgetower/bridgetower_embedding.py diff --git a/comps/embeddings/multimodal/bridgetower/bridgetower_server.py b/comps/embeddings/src/integrations/dependency/bridgetower/bridgetower_server.py similarity index 97% rename from comps/embeddings/multimodal/bridgetower/bridgetower_server.py rename to comps/embeddings/src/integrations/dependency/bridgetower/bridgetower_server.py index 0802993ea..66607ae56 100644 --- a/comps/embeddings/multimodal/bridgetower/bridgetower_server.py +++ b/comps/embeddings/src/integrations/dependency/bridgetower/bridgetower_server.py @@ -17,7 +17,7 @@ from fastapi import BackgroundTasks, FastAPI, Request from fastapi.responses import JSONResponse, Response from utils import build_logger -from comps.embeddings.multimodal.bridgetower import BridgeTowerEmbedding +from comps.embeddings.src.integrations.dependency.bridgetower import BridgeTowerEmbedding worker_id = str(uuid.uuid4())[:6] print(f"worker_id: {worker_id}") diff --git a/comps/embeddings/multimodal/bridgetower/docker_compose_bridgetower_embedding_endpoint.yaml b/comps/embeddings/src/integrations/dependency/bridgetower/compose_intel_cpu.yaml similarity index 94% rename from comps/embeddings/multimodal/bridgetower/docker_compose_bridgetower_embedding_endpoint.yaml rename to comps/embeddings/src/integrations/dependency/bridgetower/compose_intel_cpu.yaml index 1e42e6bff..24fab26ae 100644 --- a/comps/embeddings/multimodal/bridgetower/docker_compose_bridgetower_embedding_endpoint.yaml +++ b/comps/embeddings/src/integrations/dependency/bridgetower/compose_intel_cpu.yaml @@ -12,6 +12,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + PORT: ${EMBEDDER_PORT} restart: unless-stopped networks: diff --git a/comps/reranks/videoqna/docker_compose_reranking.yaml b/comps/embeddings/src/integrations/dependency/bridgetower/compose_intel_hpu.yaml similarity index 57% rename from comps/reranks/videoqna/docker_compose_reranking.yaml rename to comps/embeddings/src/integrations/dependency/bridgetower/compose_intel_hpu.yaml index 4b39fd115..3e05f2515 100644 --- a/comps/reranks/videoqna/docker_compose_reranking.yaml +++ b/comps/embeddings/src/integrations/dependency/bridgetower/compose_intel_hpu.yaml @@ -2,18 +2,17 @@ # SPDX-License-Identifier: Apache-2.0 services: - reranking: - image: opea/reranking-videoqna:latest - container_name: reranking-videoqna-server + embedding-multimodal-bridgetower: + image: opea/embedding-multimodal-bridgetower-hpu:latest + container_name: embedding-multimodal-bridgetower ports: - - "8000:8000" + - ${EMBEDDER_PORT}:${EMBEDDER_PORT} ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - CHUNK_DURATION: ${CHUNK_DURATION} - FILE_SERVER_ENDPOINT: ${FILE_SERVER_ENDPOINT} + PORT: ${EMBEDDER_PORT} restart: unless-stopped networks: diff --git a/comps/embeddings/multimodal/multimodal_langchain/requirements.txt b/comps/embeddings/src/integrations/dependency/bridgetower/requirements.txt similarity index 100% rename from comps/embeddings/multimodal/multimodal_langchain/requirements.txt rename to comps/embeddings/src/integrations/dependency/bridgetower/requirements.txt diff --git a/comps/embeddings/multimodal/bridgetower/utils.py b/comps/embeddings/src/integrations/dependency/bridgetower/utils.py similarity index 100% rename from comps/embeddings/multimodal/bridgetower/utils.py rename to comps/embeddings/src/integrations/dependency/bridgetower/utils.py diff --git a/comps/embeddings/tei/langchain/Dockerfile b/comps/embeddings/src/integrations/dependency/clip/Dockerfile similarity index 65% rename from comps/embeddings/tei/langchain/Dockerfile rename to comps/embeddings/src/integrations/dependency/clip/Dockerfile index 1cce58717..6dbcc241d 100644 --- a/comps/embeddings/tei/langchain/Dockerfile +++ b/comps/embeddings/src/integrations/dependency/clip/Dockerfile @@ -18,11 +18,12 @@ USER user COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/embeddings/tei/langchain/requirements.txt + if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ + pip install --no-cache-dir -r /home/user/comps/embeddings/src/integrations/dependency/clip/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/embeddings/tei/langchain +WORKDIR /home/user/comps/embeddings/src/integrations/dependency/clip/ + +ENTRYPOINT ["python", "clip_server.py"] -ENTRYPOINT ["python", "embedding_tei.py"] diff --git a/comps/embeddings/multimodal_clip/README.md b/comps/embeddings/src/integrations/dependency/clip/README.md similarity index 79% rename from comps/embeddings/multimodal_clip/README.md rename to comps/embeddings/src/integrations/dependency/clip/README.md index eb3651495..af880af3a 100644 --- a/comps/embeddings/multimodal_clip/README.md +++ b/comps/embeddings/src/integrations/dependency/clip/README.md @@ -1,4 +1,4 @@ -# Multimodal CLIP Embeddings Microservice +# Multimodal Embeddings Microservice with CLIP The Multimodal CLIP Embedding Microservice is designed to efficiently convert textual strings and images into vectorized embeddings, facilitating seamless integration into various machine learning and data processing workflows. This service utilizes advanced algorithms to generate high-quality embeddings that capture the semantic essence of the input text and images, making it ideal for applications in multi-modal data processing, information retrieval, and similar fields. @@ -21,15 +21,15 @@ Users are albe to configure and build embedding-related services according to th #### Build Langchain Docker ```bash -cd ../../.. -docker build -t opea/embedding-multimodal:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal_clip/Dockerfile . +cd GenAIComps/ +docker build -t opea/embedding-multimodal-clip:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/integrations/dependency/clip/Dockerfile . ``` ### 1.2 Run Docker with Docker Compose ```bash -cd comps/embeddings/multimodal_clip -docker compose -f docker_compose_embedding.yaml up -d +cd comps/embeddings/src/integrations/dependency/clip/ +docker compose -f compose.yaml up -d ``` ## 🚀2. Consume Embedding Service @@ -37,7 +37,7 @@ docker compose -f docker_compose_embedding.yaml up -d ### 2.1 Check Service Status ```bash -curl http://localhost:6000/v1/health_check\ +curl http://localhost:6990/v1/health_check\ -X GET \ -H 'Content-Type: application/json' ``` @@ -48,13 +48,13 @@ Use our basic API. ```bash ## query with single text -curl http://localhost:6000/v1/embeddings\ +curl http://localhost:6990/v1/embeddings\ -X POST \ -d '{"text":"Hello, world!"}' \ -H 'Content-Type: application/json' ## query with multiple texts -curl http://localhost:6000/v1/embeddings\ +curl http://localhost:6990/v1/embeddings\ -X POST \ -d '{"text":["Hello, world!","How are you?"]}' \ -H 'Content-Type: application/json' @@ -64,13 +64,13 @@ We are also compatible with [OpenAI API](https://platform.openai.com/docs/api-re ```bash ## Input single text -curl http://localhost:6000/v1/embeddings\ +curl http://localhost:6990/v1/embeddings\ -X POST \ -d '{"input":"Hello, world!"}' \ -H 'Content-Type: application/json' ## Input multiple texts with parameters -curl http://localhost:6000/v1/embeddings\ +curl http://localhost:6990/v1/embeddings\ -X POST \ -d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \ -H 'Content-Type: application/json' diff --git a/comps/embeddings/multimodal/multimodal_langchain/__init__.py b/comps/embeddings/src/integrations/dependency/clip/__init__.py similarity index 100% rename from comps/embeddings/multimodal/multimodal_langchain/__init__.py rename to comps/embeddings/src/integrations/dependency/clip/__init__.py diff --git a/comps/embeddings/multimodal_clip/embeddings_clip.py b/comps/embeddings/src/integrations/dependency/clip/clip_embedding.py similarity index 100% rename from comps/embeddings/multimodal_clip/embeddings_clip.py rename to comps/embeddings/src/integrations/dependency/clip/clip_embedding.py diff --git a/comps/embeddings/multimodal_clip/embedding_multimodal.py b/comps/embeddings/src/integrations/dependency/clip/clip_server.py similarity index 89% rename from comps/embeddings/multimodal_clip/embedding_multimodal.py rename to comps/embeddings/src/integrations/dependency/clip/clip_server.py index 888278e7a..d74605cdc 100644 --- a/comps/embeddings/multimodal_clip/embedding_multimodal.py +++ b/comps/embeddings/src/integrations/dependency/clip/clip_server.py @@ -4,10 +4,10 @@ import datetime import os import time -from typing import List, Optional, Union +from typing import List, Union +from clip_embedding import vCLIP from dateparser.search import search_dates -from embeddings_clip import vCLIP from comps import ( CustomLogger, @@ -26,7 +26,7 @@ from comps.cores.proto.api_protocol import ( EmbeddingResponseData, ) -logger = CustomLogger("embedding_multimodal") +logger = CustomLogger("embedding_multimodal_clip") logflag = os.getenv("LOGFLAG", False) @@ -67,15 +67,15 @@ def filtler_dates(prompt): @register_microservice( - name="opea_service@embedding_multimodal", + name="opea_service@embedding_multimodal_clip", service_type=ServiceType.EMBEDDING, endpoint="/v1/embeddings", host="0.0.0.0", - port=6000, + port=6990, input_datatype=TextDoc, output_datatype=EmbedDoc, ) -@register_statistics(names=["opea_service@embedding_multimodal"]) +@register_statistics(names=["opea_service@embedding_multimodal_clip"]) async def embedding( input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest] ) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]: @@ -107,7 +107,7 @@ async def embedding( # keep res = input - statistics_dict["opea_service@embedding_multimodal"].append_latency(time.time() - start, None) + statistics_dict["opea_service@embedding_multimodal_clip"].append_latency(time.time() - start, None) if logflag: logger.info(res) return res @@ -121,4 +121,4 @@ async def get_embeddings(text: Union[str, List[str]]) -> List[List[float]]: if __name__ == "__main__": embeddings = vCLIP({"model_name": "openai/clip-vit-base-patch32", "num_frm": 4}) - opea_microservices["opea_service@embedding_multimodal"].start() + opea_microservices["opea_service@embedding_multimodal_clip"].start() diff --git a/comps/embeddings/multimodal_clip/docker_compose_embedding.yaml b/comps/embeddings/src/integrations/dependency/clip/compose_intel_cpu.yaml similarity index 81% rename from comps/embeddings/multimodal_clip/docker_compose_embedding.yaml rename to comps/embeddings/src/integrations/dependency/clip/compose_intel_cpu.yaml index 7d314b93a..112804aa4 100644 --- a/comps/embeddings/multimodal_clip/docker_compose_embedding.yaml +++ b/comps/embeddings/src/integrations/dependency/clip/compose_intel_cpu.yaml @@ -5,7 +5,7 @@ version: "3.8" services: embedding: - image: opea/embedding-multimodal:latest + image: opea/embedding-multimodal-clip:latest container_name: embedding-multimodal-server ports: - "6000:6000" @@ -14,7 +14,6 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} restart: unless-stopped networks: diff --git a/comps/embeddings/multimodal_clip/requirements.txt b/comps/embeddings/src/integrations/dependency/clip/requirements.txt similarity index 99% rename from comps/embeddings/multimodal_clip/requirements.txt rename to comps/embeddings/src/integrations/dependency/clip/requirements.txt index c914a0d52..30d4841d2 100644 --- a/comps/embeddings/multimodal_clip/requirements.txt +++ b/comps/embeddings/src/integrations/dependency/clip/requirements.txt @@ -12,3 +12,4 @@ prometheus-fastapi-instrumentator sentence_transformers shortuuid uvicorn + diff --git a/comps/embeddings/src/integrations/opea_multimodal_embedding_bridgetower.py b/comps/embeddings/src/integrations/opea_multimodal_embedding_bridgetower.py new file mode 100644 index 000000000..9d3e46864 --- /dev/null +++ b/comps/embeddings/src/integrations/opea_multimodal_embedding_bridgetower.py @@ -0,0 +1,72 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import asyncio +import base64 +import os + +import requests + +from comps import CustomLogger, EmbedMultimodalDoc, MultimodalDoc, OpeaComponent, ServiceType, TextDoc, TextImageDoc + +logger = CustomLogger("opea_multimodal_embedding_bridgetower") +logflag = os.getenv("LOGFLAG", False) + + +class OpeaMultimodalEmbeddingBrigeTower(OpeaComponent): + """A specialized embedding component derived from OpeaComponent for local deployed BrigeTower multimodal embedding services. + + Attributes: + model_name (str): The name of the embedding model used. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.EMBEDDING.name.lower(), description, config) + self.base_url = os.getenv("MMEI_EMBEDDING_ENDPOINT", "http://localhost:8080") + + async def invoke(self, input: MultimodalDoc) -> EmbedMultimodalDoc: + """Invokes the embedding service to generate embeddings for the provided input. + + Args: + input (Union[str, List[str]]): The input text(s) for which embeddings are to be generated. + + Returns: + List[List[float]]: A list of embedding vectors for the input text(s). + """ + json = {} + if isinstance(input, TextDoc): + json["text"] = input.text + elif isinstance(input, TextImageDoc): + json["text"] = input.text.text + img_bytes = input.image.url.load_bytes() + base64_img = base64.b64encode(img_bytes).decode("utf-8") + json["img_b64_str"] = base64_img + else: + raise TypeError( + f"Unsupported input type: {type(input)}. " + "Input must be an instance of 'TextDoc' or 'TextImageDoc'. " + "Please verify the input type and try again." + ) + + response = await asyncio.to_thread( + requests.post, f"{self.base_url}/v1/encode", headers={"Content-Type": "application/json"}, json=json + ) + response_json = response.json() + embed_vector = response_json["embedding"] + if isinstance(input, TextDoc): + res = EmbedMultimodalDoc(text=input.text, embedding=embed_vector) + elif isinstance(input, TextImageDoc): + res = EmbedMultimodalDoc(text=input.text.text, url=input.image.url, embedding=embed_vector) + + return res + + def check_health(self) -> bool: + """Check the health of the microservice by making a GET request to /v1/health_check.""" + try: + response = requests.get(f"{self.base_url}/v1/health_check") + if response.status_code == 200: + return True + return False + except requests.exceptions.RequestException as e: + logger.info(f"Health check exception: {e}") + return False diff --git a/comps/embeddings/src/integrations/opea_tei_embedding.py b/comps/embeddings/src/integrations/opea_tei_embedding.py new file mode 100644 index 000000000..1bfc708fe --- /dev/null +++ b/comps/embeddings/src/integrations/opea_tei_embedding.py @@ -0,0 +1,88 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +from typing import List, Union + +import requests +from huggingface_hub import AsyncInferenceClient + +from comps import CustomLogger, OpeaComponent, ServiceType +from comps.cores.mega.utils import get_access_token +from comps.cores.proto.api_protocol import EmbeddingRequest, EmbeddingResponse + +logger = CustomLogger("opea_tei_embedding") +logflag = os.getenv("LOGFLAG", False) +TOKEN_URL = os.getenv("TOKEN_URL") +CLIENTID = os.getenv("CLIENTID") +CLIENT_SECRET = os.getenv("CLIENT_SECRET") + + +class OpeaTEIEmbedding(OpeaComponent): + """A specialized embedding component derived from OpeaComponent for TEI embedding services. + + Attributes: + client (AsyncInferenceClient): An instance of the async client for embedding generation. + model_name (str): The name of the embedding model used. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.EMBEDDING.name.lower(), description, config) + self.base_url = os.getenv("TEI_EMBEDDING_ENDPOINT", "http://localhost:8080") + self.client = self._initialize_client() + + def _initialize_client(self) -> AsyncInferenceClient: + """Initializes the AsyncInferenceClient.""" + access_token = ( + get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None + ) + headers = {"Authorization": f"Bearer {access_token}"} if access_token else {} + return AsyncInferenceClient( + model=f"{self.base_url}/v1/embeddings", + token=os.getenv("HUGGINGFACEHUB_API_TOKEN"), + headers=headers, + ) + + async def invoke(self, input: EmbeddingRequest) -> EmbeddingResponse: + """Invokes the embedding service to generate embeddings for the provided input. + + Args: + input (EmbeddingRequest): The input in OpenAI embedding format, including text(s) and optional parameters like model. + + Returns: + EmbeddingResponse: The response in OpenAI embedding format, including embeddings, model, and usage information. + """ + # Parse input according to the EmbeddingRequest format + if isinstance(input.input, str): + texts = [input.input.replace("\n", " ")] + elif isinstance(input.input, list): + if all(isinstance(item, str) for item in input.input): + texts = [text.replace("\n", " ") for text in input.input] + else: + raise ValueError("Invalid input format: Only string or list of strings are supported.") + else: + raise TypeError("Unsupported input type: input must be a string or list of strings.") + response = await self.client.post( + json={"input": texts, "encoding_format": input.encoding_format, "model": input.model, "user": input.user}, + task="text-embedding", + ) + embeddings = json.loads(response.decode()) + return EmbeddingResponse(**embeddings) + + def check_health(self) -> bool: + """Checks the health of the embedding service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + try: + response = requests.get(f"{self.base_url}/health") + if response.status_code == 200: + return True + else: + return False + except Exception as e: + # Handle connection errors, timeouts, etc. + logger.error(f"Health check failed: {e}") + return False diff --git a/comps/embeddings/src/integrations/predictionguard_embedding.py b/comps/embeddings/src/integrations/predictionguard_embedding.py new file mode 100644 index 000000000..a1fa0991a --- /dev/null +++ b/comps/embeddings/src/integrations/predictionguard_embedding.py @@ -0,0 +1,93 @@ +# Copyright (C) 2024 Prediction Guard, Inc. +# SPDX-License-Identified: Apache-2.0 + +import asyncio +import os + +from predictionguard import PredictionGuard + +from comps import CustomLogger, OpeaComponent, ServiceType +from comps.cores.proto.api_protocol import EmbeddingRequest, EmbeddingResponse, EmbeddingResponseData + +logger = CustomLogger("predictionguard_embedding") +logflag = os.getenv("LOGFLAG", False) + + +class PredictionguardEmbedding(OpeaComponent): + """A specialized embedding component derived from OpeaComponent for interacting with Prediction Guard services. + + Attributes: + client (PredictionGuard): An instance of the PredictionGuard client for embedding generation. + model_name (str): The name of the embedding model used by the Prediction Guard service. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.EMBEDDING.name.lower(), description, config) + api_key = os.getenv("PREDICTIONGUARD_API_KEY") + self.client = None + if api_key: + self.client = PredictionGuard(api_key=api_key) + else: + logger.info("No PredictionGuard API KEY provided, client not instantiated") + self.model_name = os.getenv("PG_EMBEDDING_MODEL_NAME", "bridgetower-large-itm-mlm-itc") + + def check_health(self) -> bool: + """Checks the health of the Prediction Guard embedding service. + + This function sends a request to fetch the list of embedding models + to determine if the service is reachable and operational. + + Returns: + bool: True if the service returns a valid model list, False otherwise. + """ + try: + if not self.client: + return False + # Send a request to retrieve the list of models + response = self.client.embeddings.create(model="bridgetower-large-itm-mlm-itc", input=[{"text": "hello"}]) + + # Check if the response is a valid dictionary and contains the expected 'model' key + if isinstance(response, dict) and "model" in response: + # Check if the model matches the expected model name + if response["model"] == self.model_name: + return True + else: + return False + else: + # Handle the case where the response does not have the expected structure + return False + + except Exception as e: + # Handle exceptions such as network errors or unexpected failures + logger.error(f"Health check failed due to an exception: {e}") + return False + + async def invoke(self, input: EmbeddingRequest) -> EmbeddingResponse: + """Invokes the embedding service to generate embeddings for the provided input. + + Args: + input (EmbeddingRequest): The input in OpenAI embedding format, including text(s) and optional parameters like model. + + Returns: + EmbeddingResponse: The response in OpenAI embedding format, including embeddings, model, and usage information. + """ + # Parse input according to the EmbeddingRequest format + if isinstance(input.input, str): + texts = [input.input.replace("\n", " ")] + elif isinstance(input.input, list): + if all(isinstance(item, str) for item in input.input): + texts = [text.replace("\n", " ") for text in input.input] + else: + raise ValueError("Invalid input format: Only string or list of strings are supported.") + else: + raise TypeError("Unsupported input type: input must be a string or list of strings.") + texts = [{"text": texts[i]} for i in range(len(texts))] + # Run the synchronous `create` method in a separate thread + response = await asyncio.to_thread(self.client.embeddings.create, model=self.model_name, input=texts) + response_data = response["data"] + embed_vector = [response_data[i]["embedding"] for i in range(len(response_data))] + # for standard openai embedding format + res = EmbeddingResponse( + data=[EmbeddingResponseData(index=i, embedding=embed_vector[i]) for i in range(len(embed_vector))] + ) + return res diff --git a/comps/embeddings/src/opea_embedding_microservice.py b/comps/embeddings/src/opea_embedding_microservice.py new file mode 100644 index 000000000..ade1b6b48 --- /dev/null +++ b/comps/embeddings/src/opea_embedding_microservice.py @@ -0,0 +1,83 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time + +from integrations.opea_tei_embedding import OpeaTEIEmbedding +from integrations.predictionguard_embedding import PredictionguardEmbedding + +from comps import ( + CustomLogger, + OpeaComponentController, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) +from comps.cores.proto.api_protocol import EmbeddingRequest, EmbeddingResponse + +logger = CustomLogger("opea_embedding_microservice") +logflag = os.getenv("LOGFLAG", False) + +# Initialize OpeaComponentController +controller = OpeaComponentController() + +# Register components +try: + # Instantiate Embedding components and register it to controller + if os.getenv("TEI_EMBEDDING_ENDPOINT"): + opea_tei_embedding = OpeaTEIEmbedding( + name="OpeaTEIEmbedding", + description="OPEA TEI Embedding Service", + ) + controller.register(opea_tei_embedding) + if os.getenv("PREDICTIONGUARD_API_KEY"): + predictionguard_embedding = PredictionguardEmbedding( + name="PredictionGuardEmbedding", + description="Prediction Guard Embedding Service", + ) + controller.register(predictionguard_embedding) + + # Discover and activate a healthy component + controller.discover_and_activate() +except Exception as e: + logger.error(f"Failed to initialize components: {e}") + + +@register_microservice( + name="opea_service@embedding", + service_type=ServiceType.EMBEDDING, + endpoint="/v1/embeddings", + host="0.0.0.0", + port=6000, +) +@register_statistics(names=["opea_service@embedding"]) +async def embedding(input: EmbeddingRequest) -> EmbeddingResponse: + start = time.time() + + # Log the input if logging is enabled + if logflag: + logger.info(f"Input received: {input}") + + try: + # Use the controller to invoke the active component + embedding_response = await controller.invoke(input) + + # Log the result if logging is enabled + if logflag: + logger.info(f"Output received: {embedding_response}") + + # Record statistics + statistics_dict["opea_service@embedding"].append_latency(time.time() - start, None) + return embedding_response + + except Exception as e: + logger.error(f"Error during embedding invocation: {e}") + raise + + +if __name__ == "__main__": + opea_microservices["opea_service@embedding"].start() + logger.info("OPEA Embedding Microservice is up and running successfully...") diff --git a/comps/embeddings/src/opea_multimodal_embedding_microservice.py b/comps/embeddings/src/opea_multimodal_embedding_microservice.py new file mode 100644 index 000000000..6f0ba8a78 --- /dev/null +++ b/comps/embeddings/src/opea_multimodal_embedding_microservice.py @@ -0,0 +1,81 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time + +from integrations.opea_multimodal_embedding_bridgetower import OpeaMultimodalEmbeddingBrigeTower + +from comps import ( + CustomLogger, + EmbedMultimodalDoc, + MultimodalDoc, + OpeaComponentController, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + +logger = CustomLogger("opea_multimodal_embedding_microservice") +logflag = os.getenv("LOGFLAG", False) + +# Initialize OpeaComponentController +controller = OpeaComponentController() + +# Register components +try: + # Instantiate Embedding components and register it to controller + if os.getenv("MMEI_EMBEDDING_ENDPOINT"): + opea_mm_embedding_bt = OpeaMultimodalEmbeddingBrigeTower( + name="OpeaMultimodalEmbeddingBrigeTower", + description="OPEA Multimodal Embedding Service using BridgeTower", + ) + controller.register(opea_mm_embedding_bt) + + # Discover and activate a healthy component + controller.discover_and_activate() +except Exception as e: + logger.error(f"Failed to initialize components: {e}") + +port = int(os.getenv("MM_EMBEDDING_PORT_MICROSERVICE", 6000)) + + +@register_microservice( + name="opea_service@multimodal_embedding", + service_type=ServiceType.EMBEDDING, + endpoint="/v1/embeddings", + host="0.0.0.0", + port=port, + input_datatype=MultimodalDoc, + output_datatype=EmbedMultimodalDoc, +) +@register_statistics(names=["opea_service@multimodal_embedding"]) +async def embedding(input: MultimodalDoc) -> EmbedMultimodalDoc: + start = time.time() + + # Log the input if logging is enabled + if logflag: + logger.info(f"Input received: {input}") + + try: + # Use the controller to invoke the active component + embedding_response = await controller.invoke(input) + + # Log the result if logging is enabled + if logflag: + logger.info(f"Output received: {embedding_response}") + + # Record statistics + statistics_dict["opea_service@multimodal_embedding"].append_latency(time.time() - start, None) + return embedding_response + + except Exception as e: + logger.error(f"Error during embedding invocation: {e}") + raise + + +if __name__ == "__main__": + opea_microservices["opea_service@multimodal_embedding"].start() + logger.info("OPEA Multimodal Embedding Microservice is up and running successfully...") diff --git a/comps/embeddings/predictionguard/requirements.txt b/comps/embeddings/src/requirements.txt similarity index 88% rename from comps/embeddings/predictionguard/requirements.txt rename to comps/embeddings/src/requirements.txt index 0c1b8527f..80cb07828 100644 --- a/comps/embeddings/predictionguard/requirements.txt +++ b/comps/embeddings/src/requirements.txt @@ -1,6 +1,8 @@ aiohttp docarray fastapi +huggingface_hub +openai opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/embeddings/tei/langchain/Dockerfile.dynamic_batching b/comps/embeddings/tei/langchain/Dockerfile.dynamic_batching deleted file mode 100644 index 56148f320..000000000 --- a/comps/embeddings/tei/langchain/Dockerfile.dynamic_batching +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# FROM opea/habanalabs:1.16.1-pytorch-installer-2.2.2 as hpu -FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest as hpu - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -# Disable user for now -# USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/user/comps/embeddings/tei/langchain/requirements.txt && \ - pip install git+https://github.com/huggingface/optimum-habana.git - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/embeddings/tei/langchain - -ENTRYPOINT ["python", "local_embedding_reranking.py"] diff --git a/comps/embeddings/tei/langchain/README.md b/comps/embeddings/tei/langchain/README.md deleted file mode 100644 index e3cdf98d4..000000000 --- a/comps/embeddings/tei/langchain/README.md +++ /dev/null @@ -1,124 +0,0 @@ -# Embeddings Microservice with Langchain TEI - -## 🚀1. Start Microservice with Python (Option 1) - -Currently, we provide two ways to implement the embedding service: - -1. Build the embedding model **_locally_** from the server, which is faster, but takes up memory on the local server. - -2. Build it based on the **_TEI endpoint_**, which provides more flexibility, but may bring some network latency. - -For both of the implementations, you need to install requirements first. - -### 1.1 Install Requirements - -```bash -pip install -r requirements.txt -``` - -### 1.2 Start Embedding Service - -You can select one of following ways to start the embedding service: - -#### Start Embedding Service with TEI - -First, you need to start a TEI service. - -```bash -your_port=8090 -model="BAAI/bge-large-en-v1.5" -docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model -``` - -Then you need to test your TEI service using the following commands: - -```bash -curl localhost:$your_port/v1/embeddings \ - -X POST \ - -d '{"input":"What is Deep Learning?"}' \ - -H 'Content-Type: application/json' -``` - -Start the embedding service with the TEI_EMBEDDING_ENDPOINT. - -```bash -export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport" -export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5" -python embedding_tei.py -``` - -## 🚀2. Start Microservice with Docker (Optional 2) - -### 2.1 Start Embedding Service with TEI - -First, you need to start a TEI service. - -```bash -your_port=8090 -model="BAAI/bge-large-en-v1.5" -docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model -``` - -Then you need to test your TEI service using the following commands: - -```bash -curl localhost:$your_port/embed/v1/embeddings \ - -X POST \ - -d '{"input":"What is Deep Learning?"}' \ - -H 'Content-Type: application/json' -``` - -Export the `TEI_EMBEDDING_ENDPOINT` for later usage: - -```bash -export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport" -export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5" -``` - -### 2.2 Build Docker Image - -```bash -cd ../../../../ -docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile . -``` - -### 2.3 Run Docker with CLI - -```bash -docker run -d --name="embedding-tei-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_EMBEDDING_MODEL_NAME=$TEI_EMBEDDING_MODEL_NAME opea/embedding-tei:latest -``` - -### 2.4 Run Docker with Docker Compose - -```bash -cd docker -docker compose -f docker_compose_embedding.yaml up -d -``` - -## 🚀3. Consume Embedding Service - -### 3.1 Check Service Status - -```bash -curl http://localhost:6000/v1/health_check\ - -X GET \ - -H 'Content-Type: application/json' -``` - -### 3.2 Consume Embedding Service - -The input/output follows [OpenAI API Embeddings](https://platform.openai.com/docs/api-reference/embeddings) format. - -```bash -## Input single text -curl http://localhost:6000/v1/embeddings\ - -X POST \ - -d '{"input":"Hello, world!"}' \ - -H 'Content-Type: application/json' - -## Input multiple texts with parameters -curl http://localhost:6000/v1/embeddings\ - -X POST \ - -d '{"input":["Hello, world!","How are you?"], "encoding_format":"base64"}' \ - -H 'Content-Type: application/json' -``` diff --git a/comps/embeddings/tei/langchain/embedding_tei.py b/comps/embeddings/tei/langchain/embedding_tei.py deleted file mode 100644 index 5bd6bfab7..000000000 --- a/comps/embeddings/tei/langchain/embedding_tei.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -import time -from typing import Dict, List, Union - -from huggingface_hub import AsyncInferenceClient - -from comps import ( - CustomLogger, - EmbedDoc, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) -from comps.cores.mega.utils import get_access_token -from comps.cores.proto.api_protocol import EmbeddingRequest, EmbeddingResponse, EmbeddingResponseData - -logger = CustomLogger("embedding_tei_langchain") -logflag = os.getenv("LOGFLAG", False) - -# Environment variables -HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") -TOKEN_URL = os.getenv("TOKEN_URL") -CLIENTID = os.getenv("CLIENTID") -CLIENT_SECRET = os.getenv("CLIENT_SECRET") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "http://localhost:8080") - - -@register_microservice( - name="opea_service@embedding_tei_langchain", - service_type=ServiceType.EMBEDDING, - endpoint="/v1/embeddings", - host="0.0.0.0", - port=6000, -) -@register_statistics(names=["opea_service@embedding_tei_langchain"]) -async def embedding(input: Union[TextDoc, EmbeddingRequest]) -> Union[EmbedDoc, EmbeddingResponse]: - start = time.time() - access_token = ( - get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None - ) - async_client = get_async_inference_client(access_token) - if logflag: - logger.info(input) - - if isinstance(input, TextDoc): - embedding_res = await aembed_query({"input": input.text}, async_client) - embedding_vec = [data["embedding"] for data in embedding_res["data"]] - embedding_vec = embedding_vec[0] if isinstance(input.text, str) else embedding_vec - res = EmbedDoc(text=input.text, embedding=embedding_vec) - else: - embedding_res = await aembed_query( - {"input": input.input, "encoding_format": input.encoding_format, "model": input.model, "user": input.user}, - async_client, - ) - res = EmbeddingResponse(**embedding_res) - - statistics_dict["opea_service@embedding_tei_langchain"].append_latency(time.time() - start, None) - if logflag: - logger.info(res) - return res - - -async def aembed_query(request: Dict, async_client: AsyncInferenceClient) -> Union[Dict, List[List[float]]]: - response = await async_client.post(json=request) - return json.loads(response.decode()) - - -def get_async_inference_client(access_token: str) -> AsyncInferenceClient: - headers = {"Authorization": f"Bearer {access_token}"} if access_token else {} - return AsyncInferenceClient( - model=f"{TEI_EMBEDDING_ENDPOINT}/v1/embeddings", token=HUGGINGFACEHUB_API_TOKEN, headers=headers - ) - - -if __name__ == "__main__": - logger.info("TEI Gaudi Embedding initialized.") - opea_microservices["opea_service@embedding_tei_langchain"].start() diff --git a/comps/embeddings/tei/langchain/local_embedding.py b/comps/embeddings/tei/langchain/local_embedding.py deleted file mode 100644 index 3f3fd5fc4..000000000 --- a/comps/embeddings/tei/langchain/local_embedding.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -from langchain_huggingface import HuggingFaceEmbeddings - -from comps import ( - CustomLogger, - EmbedDoc, - ServiceType, - TextDoc, - opea_microservices, - opea_telemetry, - register_microservice, -) - -logger = CustomLogger("local_embedding") -logflag = os.getenv("LOGFLAG", False) - - -@register_microservice( - name="opea_service@local_embedding", - service_type=ServiceType.EMBEDDING, - endpoint="/v1/embeddings", - host="0.0.0.0", - port=6000, - input_datatype=TextDoc, - output_datatype=EmbedDoc, -) -@opea_telemetry -async def embedding(input: TextDoc) -> EmbedDoc: - if logflag: - logger.info(input) - embed_vector = await embeddings.aembed_query(input.text) - res = EmbedDoc(text=input.text, embedding=embed_vector) - if logflag: - logger.info(res) - return res - - -if __name__ == "__main__": - embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5") - opea_microservices["opea_service@local_embedding"].start() diff --git a/comps/embeddings/tei/langchain/local_embedding_reranking.py b/comps/embeddings/tei/langchain/local_embedding_reranking.py deleted file mode 100644 index a29677744..000000000 --- a/comps/embeddings/tei/langchain/local_embedding_reranking.py +++ /dev/null @@ -1,250 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import asyncio -import math -import os -from enum import Enum -from pathlib import Path -from typing import Union - -import torch -from habana_frameworks.torch.hpu import wrap_in_hpu_graph -from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi -from sentence_transformers.models import Pooling -from transformers import AutoModel, AutoModelForSequenceClassification, AutoTokenizer - -from comps import ( - CustomLogger, - EmbedDoc, - LLMParamsDoc, - SearchedDoc, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, -) -from comps.cores.proto.api_protocol import ChatCompletionRequest, EmbeddingRequest, EmbeddingResponse - -logger = CustomLogger("local_embedding_reranking") -logflag = os.getenv("LOGFLAG", False) - -# keep it consistent for different routers for now -DYNAMIC_BATCHING_TIMEOUT = float(os.getenv("DYNAMIC_BATCHING_TIMEOUT", 0.01)) -DYNAMIC_BATCHING_MAX_BATCH_SIZE = int(os.getenv("DYNAMIC_BATCHING_MAX_BATCH_SIZE", 32)) -PAD_SEQUENCE_TO_MULTIPLE_OF = int(os.environ.get("PAD_SEQUENCE_TO_MULTIPLE_OF", 128)) -EMBEDDING_MODEL_ID = os.environ.get("EMBEDDING_MODEL_ID", "BAAI/bge-base-en-v1.5") -RERANK_MODEL_ID = os.environ.get("RERANK_MODEL_ID", "BAAI/bge-reranker-base") - - -def round_up(number, k): - return (number + k - 1) // k * k - - -class EmbeddingModel: - def __init__( - self, - model_path: Path, - device: torch.device, - dtype: torch.dtype, - trust_remote: bool = False, - ): - if device == torch.device("hpu"): - adapt_transformers_to_gaudi() - model = AutoModel.from_pretrained(model_path, trust_remote_code=trust_remote).to(dtype).to(device) - if device == torch.device("hpu"): - logger.info("Use graph mode for HPU") - model = wrap_in_hpu_graph(model, disable_tensor_cache=True) - self.hidden_size = model.config.hidden_size - self.pooling = Pooling(self.hidden_size, pooling_mode="cls") - self.model = model - - def embed(self, batch): - output = self.model(**batch) - # sentence_embeddings = output[0][:, 0] - # sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1) - pooling_features = { - "token_embeddings": output[0], - "attention_mask": batch.attention_mask, - } - embedding = self.pooling.forward(pooling_features)["sentence_embedding"] - ## normalize - embedding = torch.nn.functional.normalize(embedding, p=2, dim=1) - cpu_results = embedding.reshape(-1).tolist() - return [cpu_results[i * self.hidden_size : (i + 1) * self.hidden_size] for i in range(len(batch.input_ids))] - - -class RerankingModel: - def __init__(self, model_path: Path, device: torch.device, dtype: torch.dtype): - if device == torch.device("hpu"): - adapt_transformers_to_gaudi() - - model = AutoModelForSequenceClassification.from_pretrained(model_path) - model = model.to(dtype).to(device) - - if device == torch.device("hpu"): - logger.info("Use graph mode for HPU") - model = wrap_in_hpu_graph(model, disable_tensor_cache=True) - self.model = model - - def predict(self, batch): - scores = ( - self.model(**batch, return_dict=True) - .logits.view( - -1, - ) - .float() - ) - scores = torch.sigmoid(scores) - return scores - - -def pad_batch(inputs: dict, max_input_len: int): - # pad seq_len to MULTIPLE OF, pad bs - batch_size, concrete_length = inputs["input_ids"].size()[0], inputs["input_ids"].size()[1] - max_length = round_up(concrete_length, PAD_SEQUENCE_TO_MULTIPLE_OF) - max_length = min(max_length, max_input_len) # should not exceed max input len - new_bs = 2 ** math.ceil(math.log2(batch_size)) - for x in inputs: - inputs[x] = torch.nn.functional.pad( - inputs[x], (0, max_length - concrete_length, 0, new_bs - batch_size), value=0 - ) - return inputs - - -async def dynamic_batching_infer(service_type: Enum, batch: list[dict]): - if logflag: - logger.info(f"{service_type} {len(batch)} request inference begin >>>") - - if service_type == ServiceType.EMBEDDING: - sentences = [req["request"].text for req in batch] - - with torch.no_grad(): - encoded_input = embedding_tokenizer( - sentences, - padding=True, - truncation=True, - return_tensors="pt", - ).to(device="hpu") - encoded_input = pad_batch(encoded_input, embedding_tokenizer.model_max_length) - # with torch.autocast("hpu", dtype=torch.bfloat16): - results = embedding_model.embed(encoded_input) - - return [EmbedDoc(text=txt, embedding=embed_vector) for txt, embed_vector in zip(sentences, results)] - elif service_type == ServiceType.RERANK: - pairs = [] - doc_lengths = [] - for req in batch: - doc_len = len(req["request"].retrieved_docs) - doc_lengths.append(doc_len) - for idx in range(doc_len): - pairs.append([req["request"].initial_query, req["request"].retrieved_docs[idx].text]) - - with torch.no_grad(): - inputs = reranking_tokenizer( - pairs, - padding=True, - truncation=True, - return_tensors="pt", - ).to("hpu") - inputs = pad_batch(inputs, reranking_tokenizer.model_max_length) - scores = reranking_model.predict(inputs) - - # reduce each query's best related doc - final_results = [] - start = 0 - for idx, doc_len in enumerate(doc_lengths): - req_scores = scores[start : start + doc_len] - cur_req = batch[idx]["request"] - docs: list[TextDoc] = cur_req.retrieved_docs[0:doc_len] - docs = [doc.text for doc in docs] - # sort and select top n docs - top_n_docs = sorted(list(zip(docs, req_scores)), key=lambda x: x[1], reverse=True)[: cur_req.top_n] - top_n_docs: list[str] = [tupl[0] for tupl in top_n_docs] - final_results.append(LLMParamsDoc(query=cur_req.initial_query, documents=top_n_docs)) - - start += doc_len - - return final_results - - -@register_microservice( - name="opea_service@local_embedding_reranking", - service_type=ServiceType.EMBEDDING, - endpoint="/v1/embeddings", - host="0.0.0.0", - port=6001, - dynamic_batching=True, - dynamic_batching_timeout=DYNAMIC_BATCHING_TIMEOUT, - dynamic_batching_max_batch_size=DYNAMIC_BATCHING_MAX_BATCH_SIZE, -) -async def embedding( - input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest] -) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]: - - # if logflag: - # logger.info(input) - # Create a future for this specific request - response_future = asyncio.get_event_loop().create_future() - - cur_microservice = opea_microservices["opea_service@local_embedding_reranking"] - cur_microservice.dynamic_batching_infer = dynamic_batching_infer - async with cur_microservice.buffer_lock: - cur_microservice.request_buffer[ServiceType.EMBEDDING].append({"request": input, "response": response_future}) - - # Wait for batch inference to complete and return results - result = await response_future - - return result - - -@register_microservice( - name="opea_service@local_embedding_reranking", - service_type=ServiceType.RERANK, - endpoint="/v1/reranking", - host="0.0.0.0", - port=6001, - input_datatype=SearchedDoc, - output_datatype=LLMParamsDoc, - dynamic_batching=True, - dynamic_batching_timeout=DYNAMIC_BATCHING_TIMEOUT, - dynamic_batching_max_batch_size=DYNAMIC_BATCHING_MAX_BATCH_SIZE, -) -async def reranking(input: SearchedDoc) -> LLMParamsDoc: - - # if logflag: - # logger.info(input) - - if len(input.retrieved_docs) == 0: - return LLMParamsDoc(query=input.initial_query) - - # Create a future for this specific request - response_future = asyncio.get_event_loop().create_future() - - cur_microservice = opea_microservices["opea_service@local_embedding_reranking"] - cur_microservice.dynamic_batching_infer = dynamic_batching_infer - async with cur_microservice.buffer_lock: - cur_microservice.request_buffer[ServiceType.RERANK].append({"request": input, "response": response_future}) - - # Wait for batch inference to complete and return results - result = await response_future - - return result - - -if __name__ == "__main__": - embedding_model = EmbeddingModel(model_path=EMBEDDING_MODEL_ID, device=torch.device("hpu"), dtype=torch.bfloat16) - embedding_tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_ID) - # sentences = ["sample-1", "sample-2"] - # encoded_input = embedding_tokenizer(sentences, padding=True, truncation=True, return_tensors='pt').to(device="hpu") - # results = embedding_model.embed(encoded_input) - # print(results) - reranking_model = RerankingModel(model_path=RERANK_MODEL_ID, device=torch.device("hpu"), dtype=torch.bfloat16) - reranking_tokenizer = AutoTokenizer.from_pretrained(RERANK_MODEL_ID) - - # pairs = [['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']] - # with torch.no_grad(): - # inputs = reranking_tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512).to("hpu") - # scores = reranking_model.predict(inputs) - # print(scores) - opea_microservices["opea_service@local_embedding_reranking"].start(in_single_process=True) diff --git a/comps/embeddings/tei/langchain/requirements.txt b/comps/embeddings/tei/langchain/requirements.txt deleted file mode 100644 index 1bfe6f44c..000000000 --- a/comps/embeddings/tei/langchain/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -docarray[full] -fastapi -huggingface_hub -langchain -langchain_huggingface -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -sentence_transformers -shortuuid -uvicorn diff --git a/comps/embeddings/tei/llama_index/Dockerfile b/comps/embeddings/tei/llama_index/Dockerfile deleted file mode 100644 index 0f411360c..000000000 --- a/comps/embeddings/tei/llama_index/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM ubuntu:22.04 - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev \ - python3 \ - python3-pip - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/user/comps/embeddings/tei/llama_index/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/embeddings/tei/llama_index - -ENTRYPOINT ["python3", "embedding_tei.py"] diff --git a/comps/embeddings/tei/llama_index/README.md b/comps/embeddings/tei/llama_index/README.md deleted file mode 100644 index dd1f5006c..000000000 --- a/comps/embeddings/tei/llama_index/README.md +++ /dev/null @@ -1,146 +0,0 @@ -# Embeddings Microservice with Llama Index TEI - -## 🚀1. Start Microservice with Python (Option 1) - -Currently, we provide two ways to implement the embedding service: - -1. Build the embedding model **_locally_** from the server, which is faster, but takes up memory on the local server. - -2. Build it based on the **_TEI endpoint_**, which provides more flexibility, but may bring some network latency. - -For both of the implementations, you need to install requirements first. - -### 1.1 Install Requirements - -```bash -pip install -r requirements.txt -``` - -### 1.2 Start Embedding Service - -You can select one of following ways to start the embedding service: - -#### Start Embedding Service with TEI - -First, you need to start a TEI service. - -```bash -your_port=8090 -model="BAAI/bge-large-en-v1.5" -docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model -``` - -Then you need to test your TEI service using the following commands: - -```bash -curl localhost:$your_port/embed \ - -X POST \ - -d '{"inputs":"What is Deep Learning?"}' \ - -H 'Content-Type: application/json' -``` - -Start the embedding service with the TEI_EMBEDDING_ENDPOINT. - -```bash -export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport" -export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5" -python embedding_tei.py -``` - -#### Start Embedding Service with Local Model - -```bash -python local_embedding.py -``` - -## 🚀2. Start Microservice with Docker (Optional 2) - -### 2.1 Start Embedding Service with TEI - -First, you need to start a TEI service. - -```bash -your_port=8090 -model="BAAI/bge-large-en-v1.5" -docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model -``` - -Then you need to test your TEI service using the following commands: - -```bash -curl localhost:$your_port/embed \ - -X POST \ - -d '{"inputs":"What is Deep Learning?"}' \ - -H 'Content-Type: application/json' -``` - -Export the `TEI_EMBEDDING_ENDPOINT` for later usage: - -```bash -export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport" -export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5" -``` - -### 2.2 Build Docker Image - -```bash -cd ../../../../ -docker build -t opea/embedding-tei-llama-index:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/llama_index/Dockerfile . -``` - -### 2.3 Run Docker with CLI - -```bash -docker run -d --name="embedding-tei-llama-index-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_EMBEDDING_MODEL_NAME=$TEI_EMBEDDING_MODEL_NAME opea/embedding-tei-llama-index:latest -``` - -### 2.4 Run Docker with Docker Compose - -```bash -cd docker -docker compose -f docker_compose_embedding.yaml up -d -``` - -## 🚀3. Consume Embedding Service - -### 3.1 Check Service Status - -```bash -curl http://localhost:6000/v1/health_check\ - -X GET \ - -H 'Content-Type: application/json' -``` - -### 3.2 Consume Embedding Service - -Use our basic API. - -```bash -## query with single text -curl http://localhost:6000/v1/embeddings\ - -X POST \ - -d '{"text":"Hello, world!"}' \ - -H 'Content-Type: application/json' - -## query with multiple texts -curl http://localhost:6000/v1/embeddings\ - -X POST \ - -d '{"text":["Hello, world!","How are you?"]}' \ - -H 'Content-Type: application/json' -``` - -We are also compatible with [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings). - -```bash -## Input single text -curl http://localhost:6000/v1/embeddings\ - -X POST \ - -d '{"input":"Hello, world!"}' \ - -H 'Content-Type: application/json' - -## Input multiple texts with parameters -curl http://localhost:6000/v1/embeddings\ - -X POST \ - -d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \ - -H 'Content-Type: application/json' -``` diff --git a/comps/embeddings/tei/llama_index/docker_compose_embedding.yaml b/comps/embeddings/tei/llama_index/docker_compose_embedding.yaml deleted file mode 100644 index 152f5030b..000000000 --- a/comps/embeddings/tei/llama_index/docker_compose_embedding.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - embedding: - image: opea/embedding-tei-llama-index:latest - container_name: embedding-tei-server - ports: - - "6000:6000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - TEI_EMBEDDING_MODEL_NAME: ${TEI_EMBEDDING_MODEL_NAME} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/embeddings/tei/llama_index/embedding_tei.py b/comps/embeddings/tei/llama_index/embedding_tei.py deleted file mode 100644 index a3ff25a70..000000000 --- a/comps/embeddings/tei/llama_index/embedding_tei.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -from typing import List, Union - -from llama_index.embeddings.text_embeddings_inference import TextEmbeddingsInference - -from comps import CustomLogger, EmbedDoc, ServiceType, TextDoc, opea_microservices, register_microservice -from comps.cores.proto.api_protocol import ( - ChatCompletionRequest, - EmbeddingRequest, - EmbeddingResponse, - EmbeddingResponseData, -) - -logger = CustomLogger("embedding_tei_llamaindex") -logflag = os.getenv("LOGFLAG", False) - - -@register_microservice( - name="opea_service@embedding_tei_llamaindex", - service_type=ServiceType.EMBEDDING, - endpoint="/v1/embeddings", - host="0.0.0.0", - port=6000, - input_datatype=TextDoc, - output_datatype=EmbedDoc, -) -async def embedding( - input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest] -) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]: - if logflag: - logger.info(input) - if isinstance(input, TextDoc): - embed_vector = await get_embeddings(input.text) - embedding_res = embed_vector[0] if isinstance(input.text, str) else embed_vector - res = EmbedDoc(text=input.text, embedding=embedding_res) - else: - embed_vector = await get_embeddings(input.input) - if input.dimensions is not None: - embed_vector = [embed_vector[i][: input.dimensions] for i in range(len(embed_vector))] - - # for standard openai embedding format - res = EmbeddingResponse( - data=[EmbeddingResponseData(index=i, embedding=embed_vector[i]) for i in range(len(embed_vector))] - ) - - if isinstance(input, ChatCompletionRequest): - input.embedding = res - # keep - res = input - - if logflag: - logger.info(res) - return res - - -async def get_embeddings(text: Union[str, List[str]]) -> List[List[float]]: - texts = [text] if isinstance(text, str) else text - embed_vector = await embeddings._aget_text_embeddings(texts) - return embed_vector - - -if __name__ == "__main__": - tei_embedding_model_name = os.getenv("TEI_EMBEDDING_MODEL_NAME", "BAAI/bge-base-en-v1.5") - tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT", "http://localhost:8090") - embeddings = TextEmbeddingsInference(model_name=tei_embedding_model_name, base_url=tei_embedding_endpoint) - logger.info("TEI Gaudi Embedding initialized.") - opea_microservices["opea_service@embedding_tei_llamaindex"].start() diff --git a/comps/embeddings/tei/llama_index/local_embedding.py b/comps/embeddings/tei/llama_index/local_embedding.py deleted file mode 100644 index ba9d3dd5a..000000000 --- a/comps/embeddings/tei/llama_index/local_embedding.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -from llama_index.embeddings.huggingface_api import HuggingFaceInferenceAPIEmbedding - -from comps import CustomLogger, EmbedDoc, ServiceType, TextDoc, opea_microservices, register_microservice - -logger = CustomLogger("local_embedding") -logflag = os.getenv("LOGFLAG", False) - - -@register_microservice( - name="opea_service@local_embedding", - service_type=ServiceType.EMBEDDING, - endpoint="/v1/embeddings", - host="0.0.0.0", - port=6000, - input_datatype=TextDoc, - output_datatype=EmbedDoc, -) -async def embedding(input: TextDoc) -> EmbedDoc: - if logflag: - logger.info(input) - embed_vector = await embeddings.aget_query_embedding(input.text) - res = EmbedDoc(text=input.text, embedding=embed_vector) - if logflag: - logger.info(res) - return res - - -if __name__ == "__main__": - embeddings = HuggingFaceInferenceAPIEmbedding(model_name="BAAI/bge-base-en-v1.5") - opea_microservices["opea_service@local_embedding"].start() diff --git a/comps/embeddings/tei/llama_index/requirements.txt b/comps/embeddings/tei/llama_index/requirements.txt deleted file mode 100644 index 4f1457e4a..000000000 --- a/comps/embeddings/tei/llama_index/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -docarray[full] -fastapi -huggingface_hub -llama-index-embeddings-huggingface-api -llama-index-embeddings-text-embeddings-inference -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -shortuuid -uvicorn diff --git a/comps/finetuning/Dockerfile b/comps/finetuning/Dockerfile index eca81f9cd..c1c0b0bf6 100644 --- a/comps/finetuning/Dockerfile +++ b/comps/finetuning/Dockerfile @@ -28,7 +28,7 @@ RUN python -m pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/finetuning +WORKDIR /home/user/comps/finetuning/ RUN echo PKGPATH=$(python3 -c "import pkg_resources; print(pkg_resources.get_distribution('oneccl-bind-pt').location)") >> run.sh && \ echo 'export LD_LIBRARY_PATH=$PKGPATH/oneccl_bindings_for_pytorch/opt/mpi/lib/:$LD_LIBRARY_PATH' >> run.sh && \ diff --git a/comps/finetuning/Dockerfile.intel_hpu b/comps/finetuning/Dockerfile.intel_hpu index ee4b6608b..6acd54a82 100644 --- a/comps/finetuning/Dockerfile.intel_hpu +++ b/comps/finetuning/Dockerfile.intel_hpu @@ -24,6 +24,6 @@ RUN python -m pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/finetuning +WORKDIR /home/user/comps/finetuning/ ENTRYPOINT ["/bin/bash", "launch.sh"] diff --git a/comps/finetuning/README.md b/comps/finetuning/README.md index 6f554ca22..14aa12df0 100644 --- a/comps/finetuning/README.md +++ b/comps/finetuning/README.md @@ -244,7 +244,7 @@ curl http://${your_ip}:8015/v1/finetune/list_checkpoints -X POST -H "Content-Typ ### 3.4 Leverage fine-tuned model -After fine-tuning job is done, fine-tuned model can be chosen from listed checkpoints, then the fine-tuned model can be used in other microservices. For example, fine-tuned reranking model can be used in [reranks](../reranks/fastrag/README.md) microservice by assign its path to the environment variable `RERANK_MODEL_ID`, fine-tuned embedding model can be used in [embeddings](../embeddings/README.md) microservice by assign its path to the environment variable `model`, LLMs after instruction tuning can be used in [llms](../llms/text-generation/README.md) microservice by assign its path to the environment variable `your_hf_llm_model`. +After fine-tuning job is done, fine-tuned model can be chosen from listed checkpoints, then the fine-tuned model can be used in other microservices. For example, fine-tuned reranking model can be used in [reranks](../reranks/src/README.md) microservice by assign its path to the environment variable `RERANK_MODEL_ID`, fine-tuned embedding model can be used in [embeddings](../embeddings/src/README.md) microservice by assign its path to the environment variable `model`, LLMs after instruction tuning can be used in [llms](../llms/src/text-generation/README.md) microservice by assign its path to the environment variable `your_hf_llm_model`. ## 🚀4. Descriptions for Finetuning parameters diff --git a/comps/finetuning/deployment/docker_compose/README.md b/comps/finetuning/deployment/docker_compose/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/finetuning/deployment/kubernetes/README.md b/comps/finetuning/deployment/kubernetes/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/guardrails/pii_detection/pii/detect/keys_detection.py b/comps/guardrails/pii_detection/pii/detect/keys_detection.py old mode 100755 new mode 100644 diff --git a/comps/image2image/deployment/docker_compose/README.md b/comps/image2image/deployment/docker_compose/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/image2image/deployment/kubernetes/README.md b/comps/image2image/deployment/kubernetes/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/image2image/src/image2image.py b/comps/image2image/src/image2image.py new file mode 100644 index 000000000..36e0cbd4f --- /dev/null +++ b/comps/image2image/src/image2image.py @@ -0,0 +1,117 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import base64 +import os +import threading +import time + +import torch +from diffusers import AutoPipelineForImage2Image +from diffusers.utils import load_image + +from comps import ( + CustomLogger, + SDImg2ImgInputs, + SDOutputs, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + +logger = CustomLogger("image2image") +pipe = None +args = None +initialization_lock = threading.Lock() +initialized = False + + +def initialize(): + global pipe, args, initialized + with initialization_lock: + if not initialized: + # initialize model and tokenizer + if os.getenv("MODEL", None): + args.model_name_or_path = os.getenv("MODEL") + kwargs = {} + if args.bf16: + kwargs["torch_dtype"] = torch.bfloat16 + if not args.token: + args.token = os.getenv("HF_TOKEN") + if args.device == "hpu": + kwargs.update( + { + "use_habana": True, + "use_hpu_graphs": args.use_hpu_graphs, + "gaudi_config": "Habana/stable-diffusion", + "token": args.token, + } + ) + if "stable-diffusion-xl" in args.model_name_or_path: + from optimum.habana.diffusers import GaudiStableDiffusionXLImg2ImgPipeline + + pipe = GaudiStableDiffusionXLImg2ImgPipeline.from_pretrained( + args.model_name_or_path, + **kwargs, + ) + else: + raise NotImplementedError( + "Only support stable-diffusion-xl now, " + f"model {args.model_name_or_path} not supported." + ) + elif args.device == "cpu": + pipe = AutoPipelineForImage2Image.from_pretrained(args.model_name_or_path, token=args.token, **kwargs) + else: + raise NotImplementedError(f"Only support cpu and hpu device now, device {args.device} not supported.") + logger.info("Stable Diffusion model initialized.") + initialized = True + + +@register_microservice( + name="opea_service@image2image", + service_type=ServiceType.IMAGE2IMAGE, + endpoint="/v1/image2image", + host="0.0.0.0", + port=9389, + input_datatype=SDImg2ImgInputs, + output_datatype=SDOutputs, +) +@register_statistics(names=["opea_service@image2image"]) +def image2image(input: SDImg2ImgInputs): + initialize() + start = time.time() + image = load_image(input.image).convert("RGB") + prompt = input.prompt + num_images_per_prompt = input.num_images_per_prompt + + generator = torch.manual_seed(args.seed) + images = pipe(image=image, prompt=prompt, generator=generator, num_images_per_prompt=num_images_per_prompt).images + image_path = os.path.join(os.getcwd(), prompt.strip().replace(" ", "_").replace("/", "")) + os.makedirs(image_path, exist_ok=True) + results = [] + for i, image in enumerate(images): + save_path = os.path.join(image_path, f"image_{i+1}.png") + image.save(save_path) + with open(save_path, "rb") as f: + bytes = f.read() + b64_str = base64.b64encode(bytes).decode() + results.append(b64_str) + statistics_dict["opea_service@image2image"].append_latency(time.time() - start, None) + return SDOutputs(images=results) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--model_name_or_path", type=str, default="stabilityai/stable-diffusion-xl-refiner-1.0") + parser.add_argument("--use_hpu_graphs", default=False, action="store_true") + parser.add_argument("--device", type=str, default="cpu") + parser.add_argument("--token", type=str, default=None) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--bf16", action="store_true") + + args = parser.parse_args() + + logger.info("Image2image server started.") + opea_microservices["opea_service@image2image"].start() diff --git a/comps/image2video/src/Dockerfile b/comps/image2video/src/Dockerfile index 8158c7d85..96887dd9f 100644 --- a/comps/image2video/src/Dockerfile +++ b/comps/image2video/src/Dockerfile @@ -19,4 +19,4 @@ ENV PYTHONPATH=$PYTHONPATH:/home WORKDIR /home/comps/image2video/src -ENTRYPOINT ["python", "opea_image2video_microservice.py"] +ENTRYPOINT ["python", "opea_image2video_microservice.py"] \ No newline at end of file diff --git a/comps/image2video/src/Dockerfile.intel_hpu b/comps/image2video/src/Dockerfile.intel_hpu index 987acb82d..67be7913c 100644 --- a/comps/image2video/src/Dockerfile.intel_hpu +++ b/comps/image2video/src/Dockerfile.intel_hpu @@ -11,6 +11,19 @@ COPY comps /home/user/comps RUN chown -R user /home/user/comps/image2video +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# HABANA environment +FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0 AS hpu +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY comps /home/user/comps + +RUN chown -R user /home/user/comps/image2video + RUN rm -rf /etc/ssh/ssh_host* USER user # Set environment variables diff --git a/comps/llms/deployment/docker_compose/text-generation_native_langchain.yaml b/comps/llms/deployment/docker_compose/text-generation_native_langchain.yaml new file mode 100644 index 000000000..241853efc --- /dev/null +++ b/comps/llms/deployment/docker_compose/text-generation_native_langchain.yaml @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + llm: + image: opea/llm-native:latest + container_name: llm-native-server + ports: + - "9000:9000" + runtime: habana + cap_add: + - SYS_NICE + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_NATIVE_MODEL: ${LLM_NATIVE_MODEL} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + TOKENIZERS_PARALLELISM: false + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/llms/deployment/docker_compose/text-generation_native_llama_index.yaml b/comps/llms/deployment/docker_compose/text-generation_native_llama_index.yaml new file mode 100644 index 000000000..f3a36e5bb --- /dev/null +++ b/comps/llms/deployment/docker_compose/text-generation_native_llama_index.yaml @@ -0,0 +1,28 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + llm: + image: opea/llm-native:latest + container_name: llm-native-server + ports: + - "9000:9000" + runtime: habana + cap_add: + - SYS_NICE + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_NATIVE_MODEL: ${LLM_NATIVE_MODEL} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + TOKENIZERS_PARALLELISM: false + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/embeddings/tei/langchain/docker_compose_embedding.yaml b/comps/llms/deployment/docker_compose/text-generation_predictionguard.yaml similarity index 52% rename from comps/embeddings/tei/langchain/docker_compose_embedding.yaml rename to comps/llms/deployment/docker_compose/text-generation_predictionguard.yaml index 09493a4a3..bde9fa10a 100644 --- a/comps/embeddings/tei/langchain/docker_compose_embedding.yaml +++ b/comps/llms/deployment/docker_compose/text-generation_predictionguard.yaml @@ -1,20 +1,18 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024 Prediction Guard, Inc # SPDX-License-Identifier: Apache-2.0 -version: "3.8" - services: - embedding: - image: opea/embedding-tei:latest - container_name: embedding-tei-server + llm: + image: opea/llm-textgen-predictionguard:latest + container_name: llm-textgen-predictionguard ports: - - "6000:6000" + - "9000:9000" ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} restart: unless-stopped networks: diff --git a/comps/llms/text-generation/tgi/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/text-generation_tgi.yaml similarity index 60% rename from comps/llms/text-generation/tgi/docker_compose_llm.yaml rename to comps/llms/deployment/docker_compose/text-generation_tgi.yaml index 36269aeea..63514f968 100644 --- a/comps/llms/text-generation/tgi/docker_compose_llm.yaml +++ b/comps/llms/deployment/docker_compose/text-generation_tgi.yaml @@ -5,7 +5,7 @@ version: "3.8" services: tgi_service: - image: ghcr.io/huggingface/text-generation-inference:2.1.0 + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-service ports: - "8008:80" @@ -19,19 +19,28 @@ services: HF_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 - command: --model-id ${LLM_MODEL_ID} + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8008/health"] + interval: 10s + timeout: 10s + retries: 30 + command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 llm: - image: opea/llm-tgi:latest - container_name: llm-tgi-server + image: opea/llm:latest + container_name: llm-server ports: - "9000:9000" ipc: host + depends_on: + tgi_service: + condition: service_healthy environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + LLM_ENDPOINT: ${LLM_ENDPOINT} HF_TOKEN: ${HF_TOKEN} + LLM_MODEL_ID: ${LLM_MODEL_ID} restart: unless-stopped networks: diff --git a/comps/llms/deployment/docker_compose/text-generation_vllm_langchain.yaml b/comps/llms/deployment/docker_compose/text-generation_vllm_langchain.yaml new file mode 100644 index 000000000..077ceee8b --- /dev/null +++ b/comps/llms/deployment/docker_compose/text-generation_vllm_langchain.yaml @@ -0,0 +1,46 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + vllm-service: + image: opea/vllm-gaudi:latest + container_name: vllm-gaudi-server + ports: + - "8008:80" + volumes: + - "./data:/data" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + LLM_MODEL: ${LLM_MODEL} + runtime: habana + cap_add: + - SYS_NICE + ipc: host + command: --model $LLM_MODEL --tensor-parallel-size 1 --host 0.0.0.0 --port 80 + llm: + image: opea/llm-vllm:latest + container_name: llm-vllm-gaudi-server + depends_on: + - vllm-service + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + vLLM_ENDPOINT: ${vLLM_ENDPOINT} + HF_TOKEN: ${HF_TOKEN} + LLM_MODEL: ${LLM_MODEL} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/llms/text-generation/vllm/llama_index/docker_compose_llm.yaml b/comps/llms/deployment/docker_compose/text-generation_vllm_llama_index.yaml similarity index 100% rename from comps/llms/text-generation/vllm/llama_index/docker_compose_llm.yaml rename to comps/llms/deployment/docker_compose/text-generation_vllm_llama_index.yaml diff --git a/comps/llms/deployment/kubernetes/README.md b/comps/llms/deployment/kubernetes/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/llms/text-generation/tgi/Dockerfile b/comps/llms/src/text-generation/Dockerfile similarity index 77% rename from comps/llms/text-generation/tgi/Dockerfile rename to comps/llms/src/text-generation/Dockerfile index bbdd3bf6a..3d080be3b 100644 --- a/comps/llms/text-generation/tgi/Dockerfile +++ b/comps/llms/src/text-generation/Dockerfile @@ -16,10 +16,11 @@ USER user COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/user/comps/llms/text-generation/tgi/requirements.txt + pip install --no-cache-dir -r /home/user/comps/llms/src/text-generation/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/llms/text-generation/tgi +WORKDIR /home/user/comps/llms/src/text-generation ENTRYPOINT ["bash", "entrypoint.sh"] + diff --git a/comps/llms/text-generation/tgi/README.md b/comps/llms/src/text-generation/README.md similarity index 81% rename from comps/llms/text-generation/tgi/README.md rename to comps/llms/src/text-generation/README.md index 04216a5ea..8cb316fa7 100644 --- a/comps/llms/text-generation/tgi/README.md +++ b/comps/llms/src/text-generation/README.md @@ -12,24 +12,11 @@ To start the LLM microservice, you need to install python packages first. pip install -r requirements.txt ``` -### 1.2 Start LLM Service +### 1.2 Start 3rd-party TGI Service -```bash -export HF_TOKEN=${your_hf_api_token} -export LLM_MODEL_ID=${your_hf_llm_model} -docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $LLM_MODEL_ID -``` +Please refer to [3rd-party TGI](../../../3rd_parties/tgi/deployment/docker_compose/) to start a LLM endpoint and verify. -### 1.3 Verify the TGI Service - -```bash -curl http://${your_ip}:8008/v1/chat/completions \ - -X POST \ - -d '{"model": ${LLM_MODEL_ID}, "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ - -H 'Content-Type: application/json' -``` - -### 1.4 Start LLM Service with Python Script +### 1.3 Start LLM Service with Python Script ```bash export TGI_LLM_ENDPOINT="http://${your_ip}:8008" @@ -54,7 +41,7 @@ export LLM_MODEL_ID=${your_hf_llm_model} ```bash cd ../../../../ -docker build -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile . +docker build -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile . ``` To start a docker container, you have two options: @@ -73,8 +60,8 @@ docker run -d --name="llm-tgi-server" -p 9000:9000 --ipc=host -e http_proxy=$htt ### 2.4 Run Docker with Docker Compose (Option B) ```bash -cd text-generation/tgi -docker compose -f docker_compose_llm.yaml up -d +cd comps/llms/deployment/docker_compose/ +docker compose -f text-generation_tgi.yaml up -d ``` ## 🚀3. Consume LLM Service diff --git a/comps/embeddings/multimodal_clip/__init__.py b/comps/llms/src/text-generation/__init__.py similarity index 100% rename from comps/embeddings/multimodal_clip/__init__.py rename to comps/llms/src/text-generation/__init__.py diff --git a/comps/reranks/mosec/langchain/__init__.py b/comps/llms/src/text-generation/entrypoint.sh similarity index 58% rename from comps/reranks/mosec/langchain/__init__.py rename to comps/llms/src/text-generation/entrypoint.sh index 916f3a44b..f00246e61 100644 --- a/comps/reranks/mosec/langchain/__init__.py +++ b/comps/llms/src/text-generation/entrypoint.sh @@ -1,2 +1,6 @@ +#!/usr/bin/env bash + # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 + +python opea_llm_microservice.py diff --git a/comps/embeddings/tei/langchain/__init__.py b/comps/llms/src/text-generation/integrations/__init__.py similarity index 100% rename from comps/embeddings/tei/langchain/__init__.py rename to comps/llms/src/text-generation/integrations/__init__.py diff --git a/comps/llms/src/text-generation/integrations/opea.py b/comps/llms/src/text-generation/integrations/opea.py new file mode 100644 index 000000000..877e8311b --- /dev/null +++ b/comps/llms/src/text-generation/integrations/opea.py @@ -0,0 +1,261 @@ +# Copyright (C) 2024 Prediction Guard, Inc. +# SPDX-License-Identified: Apache-2.0 + +import asyncio +import os +from typing import Union + +from fastapi.responses import StreamingResponse +from langchain_core.prompts import PromptTemplate +from openai import AsyncOpenAI + +from comps import CustomLogger, LLMParamsDoc, OpeaComponent, SearchedDoc, ServiceType +from comps.cores.mega.utils import ConfigError, get_access_token, load_model_configs +from comps.cores.proto.api_protocol import ChatCompletionRequest + +from .template import ChatTemplate + +logger = CustomLogger("opea_llm") +logflag = os.getenv("LOGFLAG", False) + +# Environment variables +MODEL_NAME = os.getenv("LLM_MODEL_ID") +MODEL_CONFIGS = os.getenv("MODEL_CONFIGS") +DEFAULT_ENDPOINT = os.getenv("LLM_ENDPOINT") +TOKEN_URL = os.getenv("TOKEN_URL") +CLIENTID = os.getenv("CLIENTID") +CLIENT_SECRET = os.getenv("CLIENT_SECRET") +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "EMPTY") + +# Validate and Load the models config if MODEL_CONFIGS is not null +configs_map = {} +if MODEL_CONFIGS: + try: + configs_map = load_model_configs(MODEL_CONFIGS) + except ConfigError as e: + logger.error(f"Failed to load model configurations: {e}") + raise ConfigError(f"Failed to load model configurations: {e}") + + +def get_llm_endpoint(): + if not MODEL_CONFIGS: + return DEFAULT_ENDPOINT + try: + return configs_map.get(MODEL_NAME).get("endpoint") + except ConfigError as e: + logger.error(f"Input model {MODEL_NAME} not present in model_configs. Error {e}") + raise ConfigError(f"Input model {MODEL_NAME} not present in model_configs") + + +class OPEALLM(OpeaComponent): + """A specialized OPEA LLM component derived from OpeaComponent for interacting with TGI/vLLM services based on OpenAI API. + + Attributes: + client (TGI/vLLM): An instance of the TGI/vLLM client for text generation. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.LLM.name.lower(), description, config) + self.client = self._initialize_client() + + def _initialize_client(self) -> AsyncOpenAI: + """Initializes the AsyncOpenAI.""" + access_token = ( + get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None + ) + headers = {} + if access_token: + headers = {"Authorization": f"Bearer {access_token}"} + llm_endpoint = get_llm_endpoint() + return AsyncOpenAI(api_key=OPENAI_API_KEY, base_url=llm_endpoint + "/v1", timeout=600, default_headers=headers) + + def check_health(self) -> bool: + """Checks the health of the TGI/vLLM LLM service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + + try: + + async def send_simple_request(): + response = await self.client.completions.create(model=MODEL_NAME, prompt="How are you?", max_tokens=4) + return response + + response = asyncio.run(send_simple_request()) + return response is not None + except Exception as e: + logger.error(e) + logger.error("Health check failed") + return False + + def align_input( + self, input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc], prompt_template, input_variables + ): + if isinstance(input, SearchedDoc): + if logflag: + logger.info("[ SearchedDoc ] input from retriever microservice") + prompt = input.initial_query + if input.retrieved_docs: + docs = [doc.text for doc in input.retrieved_docs] + if logflag: + logger.info(f"[ SearchedDoc ] combined retrieved docs: {docs}") + prompt = ChatTemplate.generate_rag_prompt(input.initial_query, docs, MODEL_NAME) + + ## use default ChatCompletionRequest parameters + new_input = ChatCompletionRequest(messages=prompt) + + if logflag: + logger.info(f"[ SearchedDoc ] final input: {new_input}") + + return prompt, new_input + + elif isinstance(input, LLMParamsDoc): + if logflag: + logger.info("[ LLMParamsDoc ] input from rerank microservice") + prompt = input.query + if prompt_template: + if sorted(input_variables) == ["context", "question"]: + prompt = prompt_template.format(question=input.query, context="\n".join(input.documents)) + elif input_variables == ["question"]: + prompt = prompt_template.format(question=input.query) + else: + logger.info( + f"[ LLMParamsDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']" + ) + else: + if input.documents: + # use rag default template + prompt = ChatTemplate.generate_rag_prompt(input.query, input.documents, input.model) + + # convert to unified OpenAI /v1/chat/completions format + new_input = ChatCompletionRequest( + messages=prompt, + max_tokens=input.max_tokens, + top_p=input.top_p, + stream=input.streaming, + frequency_penalty=input.frequency_penalty, + temperature=input.temperature, + ) + + return prompt, new_input + + else: + if logflag: + logger.info("[ ChatCompletionRequest ] input in opea format") + + prompt = input.messages + if prompt_template: + if sorted(input_variables) == ["context", "question"]: + prompt = prompt_template.format(question=input.messages, context="\n".join(input.documents)) + elif input_variables == ["question"]: + prompt = prompt_template.format(question=input.messages) + else: + logger.info( + f"[ ChatCompletionRequest ] {prompt_template} not used, we only support 2 input variables ['question', 'context']" + ) + else: + if input.documents: + # use rag default template + prompt = ChatTemplate.generate_rag_prompt(input.messages, input.documents, input.model) + + return prompt, input + + async def invoke(self, input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]): + """Invokes the TGI/vLLM LLM service to generate output for the provided input. + + Args: + input (Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]): The input text(s). + """ + + prompt_template = None + input_variables = None + if not isinstance(input, SearchedDoc) and input.chat_template: + prompt_template = PromptTemplate.from_template(input.chat_template) + input_variables = prompt_template.input_variables + + if isinstance(input, ChatCompletionRequest) and not isinstance(input.messages, str): + if logflag: + logger.info("[ ChatCompletionRequest ] input in opea format") + + if input.messages[0]["role"] == "system": + if "{context}" in input.messages[0]["content"]: + if input.documents is None or input.documents == []: + input.messages[0]["content"].format(context="") + else: + input.messages[0]["content"].format(context="\n".join(input.documents)) + else: + if prompt_template: + system_prompt = prompt_template + if input_variables == ["context"]: + system_prompt = prompt_template.format(context="\n".join(input.documents)) + else: + logger.info( + f"[ ChatCompletionRequest ] {prompt_template} not used, only support 1 input variables ['context']" + ) + + input.messages.insert(0, {"role": "system", "content": system_prompt}) + + chat_completion = await self.client.chat.completions.create( + model=MODEL_NAME, + messages=input.messages, + frequency_penalty=input.frequency_penalty, + max_tokens=input.max_tokens, + n=input.n, + presence_penalty=input.presence_penalty, + response_format=input.response_format, + seed=input.seed, + stop=input.stop, + stream=input.stream, + stream_options=input.stream_options, + temperature=input.temperature, + top_p=input.top_p, + user=input.user, + ) + """TODO need validate following parameters for vllm + logit_bias=input.logit_bias, + logprobs=input.logprobs, + top_logprobs=input.top_logprobs, + service_tier=input.service_tier, + tools=input.tools, + tool_choice=input.tool_choice, + parallel_tool_calls=input.parallel_tool_calls,""" + else: + prompt, input = self.align_input(input, prompt_template, input_variables) + chat_completion = await self.client.completions.create( + model=MODEL_NAME, + prompt=prompt, + echo=input.echo, + frequency_penalty=input.frequency_penalty, + max_tokens=input.max_tokens, + n=input.n, + presence_penalty=input.presence_penalty, + seed=input.seed, + stop=input.stop, + stream=input.stream, + suffix=input.suffix, + temperature=input.temperature, + top_p=input.top_p, + user=input.user, + ) + """TODO need validate following parameters for vllm + best_of=input.best_of, + logit_bias=input.logit_bias, + logprobs=input.logprobs,""" + + if input.stream: + + async def stream_generator(): + async for c in chat_completion: + if logflag: + logger.info(c) + chunk = c.model_dump_json() + if chunk not in ["<|im_end|>", "<|endoftext|>"]: + yield f"data: {chunk}\n\n" + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), media_type="text/event-stream") + else: + if logflag: + logger.info(chat_completion) + return chat_completion diff --git a/comps/llms/text-generation/tgi/template.py b/comps/llms/src/text-generation/integrations/template.py similarity index 100% rename from comps/llms/text-generation/tgi/template.py rename to comps/llms/src/text-generation/integrations/template.py diff --git a/comps/llms/src/text-generation/opea_llm_microservice.py b/comps/llms/src/text-generation/opea_llm_microservice.py new file mode 100644 index 000000000..262214586 --- /dev/null +++ b/comps/llms/src/text-generation/opea_llm_microservice.py @@ -0,0 +1,74 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time +from typing import Union + +from integrations.opea import OPEALLM + +from comps import ( + CustomLogger, + LLMParamsDoc, + OpeaComponentController, + SearchedDoc, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) +from comps.cores.proto.api_protocol import ChatCompletionRequest + +logger = CustomLogger("llm") +logflag = os.getenv("LOGFLAG", False) + +# Initialize OpeaComponentController +controller = OpeaComponentController() + +# Register components +try: + opea_llm = OPEALLM( + name="OPEALLM", + description="OPEA LLM Service, compatible with OpenAI API", + ) + + # Register components with the controller + controller.register(opea_llm) + + # Discover and activate a healthy component + controller.discover_and_activate() +except Exception as e: + logger.error(f"Failed to initialize components: {e}") + + +@register_microservice( + name="opea_service@llm", + service_type=ServiceType.LLM, + endpoint="/v1/chat/completions", + host="0.0.0.0", + port=9000, +) +@register_statistics(names=["opea_service@llm"]) +async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]): + start = time.time() + + # Log the input if logging is enabled + if logflag: + logger.info(input) + + try: + # Use the controller to invoke the active component + response = await controller.invoke(input) + # Record statistics + statistics_dict["opea_service@llm"].append_latency(time.time() - start, None) + return response + + except Exception as e: + logger.error(f"Error during LLM invocation: {e}") + raise + + +if __name__ == "__main__": + logger.info("OPEA LLM Microservice is starting...") + opea_microservices["opea_service@llm"].start() diff --git a/comps/llms/text-generation/tgi/requirements.txt b/comps/llms/src/text-generation/requirements.txt similarity index 86% rename from comps/llms/text-generation/tgi/requirements.txt rename to comps/llms/src/text-generation/requirements.txt index 4aebb318a..85b06a876 100644 --- a/comps/llms/text-generation/tgi/requirements.txt +++ b/comps/llms/src/text-generation/requirements.txt @@ -1,8 +1,9 @@ aiohttp docarray[full] fastapi -httpx +httpx==0.27.2 huggingface_hub +langchain_core openai==1.57.4 opentelemetry-api opentelemetry-exporter-otlp diff --git a/comps/llms/text-generation/README.md b/comps/llms/text-generation/README.md index 824d8a227..4438d7ad1 100644 --- a/comps/llms/text-generation/README.md +++ b/comps/llms/text-generation/README.md @@ -127,8 +127,6 @@ In order to start the microservices with docker, you need to build the docker im ### 2.1 Build Docker Image -#### 2.1.1 TGI - ```bash # Build the microservice docker cd ${OPEA_GENAICOMPS_ROOT} @@ -136,24 +134,8 @@ cd ${OPEA_GENAICOMPS_ROOT} docker build \ --build-arg https_proxy=$https_proxy \ --build-arg http_proxy=$http_proxy \ - -t opea/llm-tgi:latest \ - -f comps/llms/text-generation/tgi/Dockerfile . -``` - -#### 2.1.2 vLLM - -```bash -# Build vllm docker -bash ${OPEA_GENAICOMPS_ROOT}/comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm.sh hpu - -# Build the microservice docker -cd ${OPEA_GENAICOMPS_ROOT} - -docker build \ - --build-arg https_proxy=$https_proxy \ - --build-arg http_proxy=$http_proxy \ - -t opea/llm-vllm:latest \ - -f comps/llms/text-generation/vllm/langchain/Dockerfile . + -t opea/llm:latest \ + -f comps/llms/src/text-generation/Dockerfile . ``` ### 2.2 Start LLM Service with the built image diff --git a/comps/llms/text-generation/tgi/llm.py b/comps/llms/text-generation/tgi/llm.py deleted file mode 100644 index dcd926f61..000000000 --- a/comps/llms/text-generation/tgi/llm.py +++ /dev/null @@ -1,289 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import time -from typing import Union - -from fastapi.responses import StreamingResponse -from huggingface_hub import AsyncInferenceClient -from langchain_core.prompts import PromptTemplate -from openai import OpenAI -from template import ChatTemplate - -from comps import ( - CustomLogger, - GeneratedDoc, - LLMParamsDoc, - SearchedDoc, - ServiceType, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) -from comps.cores.mega.utils import ConfigError, get_access_token, load_model_configs -from comps.cores.proto.api_protocol import ChatCompletionRequest - -logger = CustomLogger("llm_tgi") -logflag = os.getenv("LOGFLAG", False) - -# Environment variables -MODEL_CONFIGS = os.getenv("MODEL_CONFIGS") -DEFAULT_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") -TOKEN_URL = os.getenv("TOKEN_URL") -CLIENTID = os.getenv("CLIENTID") -CLIENT_SECRET = os.getenv("CLIENT_SECRET") - -# Validate and Load the models config if MODEL_CONFIGS is not null -configs_map = {} -if MODEL_CONFIGS: - try: - configs_map = load_model_configs(MODEL_CONFIGS) - except ConfigError as e: - logger.error(f"Failed to load model configurations: {e}") - raise ConfigError(f"Failed to load model configurations: {e}") - - -def get_llm_endpoint(model): - if not MODEL_CONFIGS: - return DEFAULT_ENDPOINT - try: - return configs_map.get(model).get("endpoint") - except ConfigError as e: - logger.error(f"Input model {model} not present in model_configs. Error {e}") - raise ConfigError(f"Input model {model} not present in model_configs") - - -@register_microservice( - name="opea_service@llm_tgi", - service_type=ServiceType.LLM, - endpoint="/v1/chat/completions", - host="0.0.0.0", - port=9000, -) -@register_statistics(names=["opea_service@llm_tgi"]) -async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]): - if logflag: - logger.info(input) - - access_token = ( - get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None - ) - headers = {} - if access_token: - headers = {"Authorization": f"Bearer {access_token}"} - llm_endpoint = get_llm_endpoint(input.model) - llm = AsyncInferenceClient(model=llm_endpoint, timeout=600, headers=headers) - - prompt_template = None - if not isinstance(input, SearchedDoc) and input.chat_template: - prompt_template = PromptTemplate.from_template(input.chat_template) - input_variables = prompt_template.input_variables - - stream_gen_time = [] - start = time.time() - - if isinstance(input, SearchedDoc): - if logflag: - logger.info("[ SearchedDoc ] input from retriever microservice") - prompt = input.initial_query - if input.retrieved_docs: - docs = [doc.text for doc in input.retrieved_docs] - if logflag: - logger.info(f"[ SearchedDoc ] combined retrieved docs: {docs}") - prompt = ChatTemplate.generate_rag_prompt(input.initial_query, docs, input.model) - # use default llm parameters for inferencing - new_input = LLMParamsDoc(query=prompt) - if logflag: - logger.info(f"[ SearchedDoc ] final input: {new_input}") - text_generation = await llm.text_generation( - prompt=prompt, - stream=new_input.streaming, - max_new_tokens=new_input.max_tokens, - repetition_penalty=new_input.repetition_penalty, - temperature=new_input.temperature, - top_k=new_input.top_k, - top_p=new_input.top_p, - ) - if new_input.streaming: - - async def stream_generator(): - chat_response = "" - async for text in text_generation: - stream_gen_time.append(time.time() - start) - if text not in ["<|im_end|>", "<|endoftext|>"]: - chat_response += text - chunk_repr = repr(text.encode("utf-8")) - if logflag: - logger.info(f"[ SearchedDoc ] chunk:{chunk_repr}") - yield f"data: {chunk_repr}\n\n" - if logflag: - logger.info(f"[ SearchedDoc ] stream response: {chat_response}") - statistics_dict["opea_service@llm_tgi"].append_latency(stream_gen_time[-1], stream_gen_time[0]) - yield "data: [DONE]\n\n" - - return StreamingResponse(stream_generator(), media_type="text/event-stream") - else: - statistics_dict["opea_service@llm_tgi"].append_latency(time.time() - start, None) - if logflag: - logger.info(text_generation) - return GeneratedDoc(text=text_generation, prompt=new_input.query) - - elif isinstance(input, LLMParamsDoc): - if logflag: - logger.info("[ LLMParamsDoc ] input from rerank microservice") - prompt = input.query - if prompt_template: - if sorted(input_variables) == ["context", "question"]: - prompt = prompt_template.format(question=input.query, context="\n".join(input.documents)) - elif input_variables == ["question"]: - prompt = prompt_template.format(question=input.query) - else: - logger.info( - f"[ LLMParamsDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']" - ) - else: - if input.documents: - # use rag default template - prompt = ChatTemplate.generate_rag_prompt(input.query, input.documents, input.model) - - text_generation = await llm.text_generation( - prompt=prompt, - stream=input.streaming, - max_new_tokens=input.max_tokens, - repetition_penalty=input.repetition_penalty, - temperature=input.temperature, - top_k=input.top_k, - top_p=input.top_p, - ) - if input.streaming: - - async def stream_generator(): - chat_response = "" - async for text in text_generation: - stream_gen_time.append(time.time() - start) - if text not in ["<|im_end|>", "<|endoftext|>"]: - chat_response += text - chunk_repr = repr(text.encode("utf-8")) - if logflag: - logger.info(f"[ LLMParamsDoc ] chunk:{chunk_repr}") - yield f"data: {chunk_repr}\n\n" - if logflag: - logger.info(f"[ LLMParamsDoc ] stream response: {chat_response}") - statistics_dict["opea_service@llm_tgi"].append_latency(stream_gen_time[-1], stream_gen_time[0]) - yield "data: [DONE]\n\n" - - return StreamingResponse(stream_generator(), media_type="text/event-stream") - else: - statistics_dict["opea_service@llm_tgi"].append_latency(time.time() - start, None) - if logflag: - logger.info(text_generation) - return GeneratedDoc(text=text_generation, prompt=input.query) - - else: - if logflag: - logger.info("[ ChatCompletionRequest ] input in opea format") - client = OpenAI( - api_key="EMPTY", - base_url=llm_endpoint + "/v1", - ) - - if isinstance(input.messages, str): - prompt = input.messages - if prompt_template: - if sorted(input_variables) == ["context", "question"]: - prompt = prompt_template.format(question=input.messages, context="\n".join(input.documents)) - elif input_variables == ["question"]: - prompt = prompt_template.format(question=input.messages) - else: - logger.info( - f"[ ChatCompletionRequest ] {prompt_template} not used, we only support 2 input variables ['question', 'context']" - ) - else: - if input.documents: - # use rag default template - prompt = ChatTemplate.generate_rag_prompt(input.messages, input.documents, input.model) - - chat_completion = client.completions.create( - model="tgi", - prompt=prompt, - best_of=input.best_of, - echo=input.echo, - frequency_penalty=input.frequency_penalty, - logit_bias=input.logit_bias, - logprobs=input.logprobs, - max_tokens=input.max_tokens, - n=input.n, - presence_penalty=input.presence_penalty, - seed=input.seed, - stop=input.stop, - stream=input.stream, - suffix=input.suffix, - temperature=input.temperature, - top_p=input.top_p, - user=input.user, - ) - else: - if input.messages[0]["role"] == "system": - if "{context}" in input.messages[0]["content"]: - if input.documents is None or input.documents == []: - input.messages[0]["content"].format(context="") - else: - input.messages[0]["content"].format(context="\n".join(input.documents)) - else: - if prompt_template: - system_prompt = prompt_template - if input_variables == ["context"]: - system_prompt = prompt_template.format(context="\n".join(input.documents)) - else: - logger.info( - f"[ ChatCompletionRequest ] {prompt_template} not used, only support 1 input variables ['context']" - ) - - input.messages.insert(0, {"role": "system", "content": system_prompt}) - - chat_completion = client.chat.completions.create( - model="tgi", - messages=input.messages, - frequency_penalty=input.frequency_penalty, - logit_bias=input.logit_bias, - logprobs=input.logprobs, - top_logprobs=input.top_logprobs, - max_tokens=input.max_tokens, - n=input.n, - presence_penalty=input.presence_penalty, - response_format=input.response_format, - seed=input.seed, - service_tier=input.service_tier, - stop=input.stop, - stream=input.stream, - stream_options=input.stream_options, - temperature=input.temperature, - top_p=input.top_p, - tools=input.tools, - tool_choice=input.tool_choice, - parallel_tool_calls=input.parallel_tool_calls, - user=input.user, - ) - - if input.stream: - - def stream_generator(): - for c in chat_completion: - if logflag: - logger.info(c) - chunk = c.model_dump_json() - if chunk not in ["<|im_end|>", "<|endoftext|>"]: - yield f"data: {chunk}\n\n" - yield "data: [DONE]\n\n" - - return StreamingResponse(stream_generator(), media_type="text/event-stream") - else: - if logflag: - logger.info(chat_completion) - return chat_completion - - -if __name__ == "__main__": - opea_microservices["opea_service@llm_tgi"].start() diff --git a/comps/llms/text-generation/vllm/langchain/Dockerfile b/comps/llms/text-generation/vllm/langchain/Dockerfile deleted file mode 100644 index 025d6dbd5..000000000 --- a/comps/llms/text-generation/vllm/langchain/Dockerfile +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ARG ARCH="cpu" # Set this to "cpu" or "gpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then \ - pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/llms/text-generation/vllm/langchain/requirements.txt; \ - else \ - pip install --no-cache-dir -r /home/user/comps/llms/text-generation/vllm/langchain/requirements.txt; \ - fi - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/llms/text-generation/vllm/langchain - -ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/text-generation/vllm/langchain/build_docker_microservice.sh b/comps/llms/text-generation/vllm/langchain/build_docker_microservice.sh deleted file mode 100644 index 1e504d868..000000000 --- a/comps/llms/text-generation/vllm/langchain/build_docker_microservice.sh +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -cd ../../../../../ -docker build \ - -t opea/llm-vllm:latest \ - --build-arg https_proxy=$https_proxy \ - --build-arg http_proxy=$http_proxy \ - -f comps/llms/text-generation/vllm/langchain/Dockerfile . diff --git a/comps/llms/text-generation/vllm/llama_index/Dockerfile b/comps/llms/text-generation/vllm/llama_index/Dockerfile deleted file mode 100644 index 48429eb7e..000000000 --- a/comps/llms/text-generation/vllm/llama_index/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM ubuntu:22.04 - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev \ - python3 \ - python3-pip - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/user/comps/llms/text-generation/vllm/llama_index/requirements.txt - - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/llms/text-generation/vllm/llama_index - -ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/text-generation/vllm/llama_index/README.md b/comps/llms/text-generation/vllm/llama_index/README.md deleted file mode 100644 index 1cd254c88..000000000 --- a/comps/llms/text-generation/vllm/llama_index/README.md +++ /dev/null @@ -1,179 +0,0 @@ -# vLLM Endpoint Service - -[vLLM](https://github.com/vllm-project/vllm) is a fast and easy-to-use library for LLM inference and serving, it delivers state-of-the-art serving throughput with a set of advanced features such as PagedAttention, Continuous batching and etc.. Besides GPUs, vLLM already supported [Intel CPUs](https://www.intel.com/content/www/us/en/products/overview.html) and [Gaudi accelerators](https://habana.ai/products). This guide provides an example on how to launch vLLM serving endpoint on CPU and Gaudi accelerators. - -## 🚀1. Set up Environment Variables - -```bash -export HUGGINGFACEHUB_API_TOKEN= -export vLLM_ENDPOINT="http://${your_ip}:8008" -export LLM_MODEL="meta-llama/Meta-Llama-3-8B-Instruct" -``` - -For gated models such as `LLAMA-2`, you will have to pass the environment HUGGINGFACEHUB_API_TOKEN. Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token. - -## 🚀2. Set up vLLM Service - -First of all, go to the server folder for vllm. - -```bash -cd dependency -``` - -### 2.1 vLLM on CPU - -First let's enable VLLM on CPU. - -#### Build docker - -```bash -bash ./build_docker_vllm.sh -``` - -The `build_docker_vllm` accepts one parameter `hw_mode` to specify the hardware mode of the service, with the default being `cpu`, and the optional selection can be `hpu`. - -#### Launch vLLM service - -```bash -bash ./launch_vllm_service.sh -``` - -If you want to customize the port or model_name, can run: - -```bash -bash ./launch_vllm_service.sh ${port_number} ${model_name} -``` - -### 2.2 vLLM on Gaudi - -Then we show how to enable VLLM on Gaudi. - -#### Build docker - -```bash -bash ./build_docker_vllm.sh hpu -``` - -Set `hw_mode` to `hpu`. - -#### Launch vLLM service on single node - -For small model, we can just use single node. - -```bash -bash ./launch_vllm_service.sh ${port_number} ${model_name} hpu 1 -``` - -Set `hw_mode` to `hpu` and `parallel_number` to 1. - -The `launch_vllm_service.sh` script accepts 7 parameters: - -- port_number: The port number assigned to the vLLM CPU endpoint, with the default being 8008. -- model_name: The model name utilized for LLM, with the default set to 'meta-llama/Meta-Llama-3-8B-Instruct'. -- hw_mode: The hardware mode utilized for LLM, with the default set to "cpu", and the optional selection can be "hpu". -- parallel_number: parallel nodes number for 'hpu' mode -- block_size: default set to 128 for better performance on HPU -- max_num_seqs: default set to 256 for better performance on HPU -- max_seq_len_to_capture: default set to 2048 for better performance on HPU - -If you want to get more performance tuning tips, can refer to [Performance tuning](https://github.com/HabanaAI/vllm-fork/blob/habana_main/README_GAUDI.md#performance-tips). - -#### Launch vLLM service on multiple nodes - -For large model such as `meta-llama/Meta-Llama-3-70b`, we need to launch on multiple nodes. - -```bash -bash ./launch_vllm_service.sh ${port_number} ${model_name} hpu ${parallel_number} -``` - -For example, if we run `meta-llama/Meta-Llama-3-70b` with 8 cards, we can use following command. - -```bash -bash ./launch_vllm_service.sh 8008 meta-llama/Meta-Llama-3-70b hpu 8 -``` - -### 2.3 vLLM with OpenVINO - -vLLM powered by OpenVINO supports all LLM models from [vLLM supported models list](https://github.com/vllm-project/vllm/blob/main/docs/source/models/supported_models.rst) and can perform optimal model serving on all x86-64 CPUs with, at least, AVX2 support. OpenVINO vLLM backend supports the following advanced vLLM features: - -- Prefix caching (`--enable-prefix-caching`) -- Chunked prefill (`--enable-chunked-prefill`) - -#### Build Docker Image - -To build the docker image, run the command - -```bash -bash ./build_docker_vllm_openvino.sh -``` - -Once it successfully builds, you will have the `vllm-openvino` image. It can be used to spawn a serving container with OpenAI API endpoint or you can work with it interactively via bash shell. - -#### Launch vLLM service - -For gated models, such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\ to the docker run command above with a valid Hugging Face Hub read token. - -Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get an access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token. - -```bash -export HUGGINGFACEHUB_API_TOKEN= -``` - -To start the model server: - -```bash -bash launch_vllm_service_openvino.sh -``` - -#### Performance tips - -vLLM OpenVINO backend uses the following environment variables to control behavior: - -- `VLLM_OPENVINO_KVCACHE_SPACE` to specify the KV Cache size (e.g, `VLLM_OPENVINO_KVCACHE_SPACE=40` means 40 GB space for KV cache), larger setting will allow vLLM running more requests in parallel. This parameter should be set based on the hardware configuration and memory management pattern of users. - -- `VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8` to control KV cache precision. By default, FP16 / BF16 is used depending on platform. - -- `VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON` to enable U8 weights compression during model loading stage. By default, compression is turned off. - -To enable better TPOT / TTFT latency, you can use vLLM's chunked prefill feature (`--enable-chunked-prefill`). Based on the experiments, the recommended batch size is `256` (`--max-num-batched-tokens`) - -OpenVINO best known configuration is: - - $ VLLM_OPENVINO_KVCACHE_SPACE=100 VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8 VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON \ - python3 vllm/benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-chat-hf --dataset vllm/benchmarks/ShareGPT_V3_unfiltered_cleaned_split.json --enable-chunked-prefill --max-num-batched-tokens 256 - -### 2.4 Query the service - -And then you can make requests like below to check the service status: - -```bash -curl http://${host_ip}:8008/v1/chat/completions \ - -X POST \ - -H "Content-Type: application/json" \ - -d '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}' -``` - -## 🚀3. Set up LLM microservice - -Then we warp the VLLM service into LLM microcervice. - -### Build docker - -```bash -bash build_docker_microservice.sh -``` - -### Launch the microservice - -```bash -bash launch_microservice.sh -``` - -### Query the microservice - -```bash -curl http://${your_ip}:9000/v1/chat/completions \ - -X POST \ - -d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":0.95,"temperature":0.01,"streaming":false}' \ - -H 'Content-Type: application/json' -``` diff --git a/comps/llms/text-generation/vllm/llama_index/build_docker_microservice.sh b/comps/llms/text-generation/vllm/llama_index/build_docker_microservice.sh deleted file mode 100644 index 46e1edba3..000000000 --- a/comps/llms/text-generation/vllm/llama_index/build_docker_microservice.sh +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -cd ../../../../ -docker build \ - -t opea/llm-vllm-llamaindex:latest \ - --build-arg https_proxy=$https_proxy \ - --build-arg http_proxy=$http_proxy \ - -f comps/llms/text-generation/vllm/llama_index/Dockerfile . diff --git a/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm_openvino.sh b/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm_openvino.sh deleted file mode 100644 index d42878eba..000000000 --- a/comps/llms/text-generation/vllm/llama_index/dependency/build_docker_vllm_openvino.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -BASEDIR="$( cd "$( dirname "$0" )" && pwd )" -git clone https://github.com/vllm-project/vllm.git vllm -cd ./vllm/ && git checkout v0.6.1 -docker build -t vllm-openvino:latest -f Dockerfile.openvino . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -cd $BASEDIR && rm -rf vllm diff --git a/comps/llms/text-generation/vllm/llama_index/entrypoint.sh b/comps/llms/text-generation/vllm/llama_index/entrypoint.sh deleted file mode 100644 index 0e0bc8f04..000000000 --- a/comps/llms/text-generation/vllm/llama_index/entrypoint.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -pip --no-cache-dir install -r requirements-runtime.txt - -python3 llm.py diff --git a/comps/llms/text-generation/vllm/llama_index/launch_microservice.sh b/comps/llms/text-generation/vllm/llama_index/launch_microservice.sh deleted file mode 100644 index ef8084f61..000000000 --- a/comps/llms/text-generation/vllm/llama_index/launch_microservice.sh +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -docker run -d --rm \ - --name="llm-vllm-server" \ - -p 9000:9000 \ - --ipc=host \ - -e http_proxy=$http_proxy \ - -e https_proxy=$https_proxy \ - -e vLLM_ENDPOINT=$vLLM_ENDPOINT \ - -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ - -e LLM_MODEL=$LLM_MODEL \ - -e LOGFLAG=$LOGFLAG \ - opea/llm-vllm-llamaindex:latest diff --git a/comps/llms/text-generation/vllm/llama_index/llm.py b/comps/llms/text-generation/vllm/llama_index/llm.py deleted file mode 100644 index 335f40629..000000000 --- a/comps/llms/text-generation/vllm/llama_index/llm.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -from fastapi.responses import StreamingResponse -from llama_index.llms.openai_like import OpenAILike - -from comps import ( - CustomLogger, - GeneratedDoc, - LLMParamsDoc, - ServiceType, - opea_microservices, - opea_telemetry, - register_microservice, -) - -logger = CustomLogger("llm_vllm_llamaindex") -logflag = os.getenv("LOGFLAG", False) - - -@opea_telemetry -def post_process_text(text: str): - if text == " ": - return "data: @#$\n\n" - if text == "\n": - return "data:
\n\n" - if text.isspace(): - return None - new_text = text.replace(" ", "@#$") - return f"data: {new_text}\n\n" - - -@register_microservice( - name="opea_service@llm_vllm_llama_index", - service_type=ServiceType.LLM, - endpoint="/v1/chat/completions", - host="0.0.0.0", - port=9000, -) -async def llm_generate(input: LLMParamsDoc): - if logflag: - logger.info(input) - llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8008") - model_name = os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct") - llm = OpenAILike( - api_key="fake", - api_base=llm_endpoint + "/v1", - max_tokens=input.max_tokens, - model=model_name, - top_p=input.top_p, - temperature=input.temperature, - streaming=input.streaming, - ) - - if input.streaming: - - async def stream_generator(): - async for text in llm.astream_complete(input.query): - if text.text not in ["<|im_end|>", "<|endoftext|>"]: - output = text.text - yield f"data: {output}\n\n" - if logflag: - logger.info(f"[llm - chat_stream] stream response: {output}") - yield "data: [DONE]\n\n" - - return StreamingResponse(stream_generator(), media_type="text/event-stream") - else: - response = await llm.acomplete(input.query) - response = response.text - if logflag: - logger.info(response) - return GeneratedDoc(text=response, prompt=input.query) - - -if __name__ == "__main__": - opea_microservices["opea_service@llm_vllm_llama_index"].start() diff --git a/comps/llms/text-generation/vllm/llama_index/query.sh b/comps/llms/text-generation/vllm/llama_index/query.sh deleted file mode 100644 index 68beefc4d..000000000 --- a/comps/llms/text-generation/vllm/llama_index/query.sh +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -your_ip="0.0.0.0" - -curl http://${your_ip}:8008/v1/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "meta-llama/Meta-Llama-3-8B-Instruct", - "prompt": "What is Deep Learning?", - "max_tokens": 32, - "temperature": 0 - }' - -##query microservice -curl http://${your_ip}:9000/v1/chat/completions \ - -X POST \ - -d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":0.95,"temperature":0.01,"streaming":false}' \ - -H 'Content-Type: application/json' diff --git a/comps/llms/text-generation/vllm/llama_index/requirements-runtime.txt b/comps/llms/text-generation/vllm/llama_index/requirements-runtime.txt deleted file mode 100644 index 225adde27..000000000 --- a/comps/llms/text-generation/vllm/llama_index/requirements-runtime.txt +++ /dev/null @@ -1 +0,0 @@ -langserve diff --git a/comps/llms/text-generation/vllm/llama_index/requirements.txt b/comps/llms/text-generation/vllm/llama_index/requirements.txt deleted file mode 100644 index cddb1c21e..000000000 --- a/comps/llms/text-generation/vllm/llama_index/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -docarray[full] -fastapi -huggingface_hub -llama-index-llms-openai-like -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -shortuuid -transformers -uvicorn -vllm diff --git a/comps/lvms/llama-vision/update b/comps/lvms/llama-vision/update old mode 100755 new mode 100644 diff --git a/comps/nginx/README.md b/comps/nginx/README.md deleted file mode 100644 index 0ccb3e0e0..000000000 --- a/comps/nginx/README.md +++ /dev/null @@ -1,79 +0,0 @@ -# Nginx for Microservice Forwarding - -[Nginx](https://nginx.org/en/) serves as a versatile tool in the realm of web services, functioning as an HTTP and reverse proxy server, and a generic TCP/UDP proxy server. - -In GenAIComps, we utilize nginx to streamline our network services. We provide an nginx Docker container, which is essential for deploying [OPEA](https://github.com/opea-project) microservices, mega services, and managing endpoint and port forwarding for frontend services. Our use of Docker to launch nginx ensures a flexible and reliable service deployment, optimizing our infrastructure to meet diverse operational demands. - -## 🚀1. Build Docker Image - -```bash -cd ../.. -docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile . -``` - -## 🚀2. Environment Settings - -To use Nginx for service forwarding, users need to setup environment variables first. The variables set here will be substituted in `nginx.conf.template`. - -For example, if you want to use Nginx to forward the frontend, backend services of a [ChatQnA](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/README.md) example, setup environment variables as below. - -```bash -export FRONTEND_SERVICE_IP=${your_frontend_service_ip} -export FRONTEND_SERVICE_PORT=5173 -export BACKEND_SERVICE_NAME=chatqna -export BACKEND_SERVICE_IP=${your_backend_service_ip} -export BACKEND_SERVICE_PORT=8888 -export DATAPREP_SERVICE_IP=${your_dataprep_service_ip} -export DATAPREP_SERVICE_PORT=6007 -export NGINX_PORT=${your_nginx_port} -``` - -Nginx will expose `80` as the default port. You can choose other available ports as `${your_nginx_port}` for Nginx docker. - -For other examples, change the variable above following the corresponding READMEs. - -If you want to forward other services like `dataprep` using Nginx, add the code below in `nginx.conf.template` and setup the right parameters for it. Notice that the `${dataprep_service_endpoint}` need to be the form of `/v1/xxx/xxx`. - -```bash -location ${dataprep_service_endpoint} { - proxy_pass http://${dataprep_service_ip}:${dataprep_service_port}; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; -} -``` - -## 🚀3. Start Nginx Service - -### 3.1 Start with CLI (Option 1) - -```bash -docker run -d --name opea-nginx -p ${NGINX_PORT}:80 \ - -e FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP} \ - -e FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT} \ - -e BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME} \ - -e BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP} \ - -e BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT} \ - -e DATAPREP_SERVICE_IP=${DATAPREP_SERVICE_IP} \ - -e DATAPREP_SERVICE_PORT=${DATAPREP_SERVICE_PORT} \ - opea/nginx:latest -``` - -### 3.2 Start with Docker Compose (Option 2) - -```bash -docker compose -f docker_compose.yaml up -d -``` - -## 🚀4. Consume Forwarded Service - -To consume the backend service, use the curl command as below (this is a ChatQnA service example): - -```bash -curl http://${your_nginx_ip}:${your_nginx_port}/v1/chatqna \ - -H "Content-Type: application/json" \ - -d '{"messages": "What is Deep Learning?"}' -``` - -For the frontend service, open the following URL in your browser: `http://${your_nginx_ip}:${your_nginx_port}`. diff --git a/comps/ragas/deployment/docker_compose/README.md b/comps/ragas/deployment/docker_compose/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/ragas/tgi/langchain/docker_compose_llm.yaml b/comps/ragas/deployment/docker_compose/tgi_langchain.yaml similarity index 100% rename from comps/ragas/tgi/langchain/docker_compose_llm.yaml rename to comps/ragas/deployment/docker_compose/tgi_langchain.yaml diff --git a/comps/ragas/deployment/kubernetes/README.md b/comps/ragas/deployment/kubernetes/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/ragas/tgi/langchain/Dockerfile b/comps/ragas/src/tgi/langchain/Dockerfile similarity index 100% rename from comps/ragas/tgi/langchain/Dockerfile rename to comps/ragas/src/tgi/langchain/Dockerfile diff --git a/comps/embeddings/tei/llama_index/__init__.py b/comps/ragas/src/tgi/langchain/__init__.py similarity index 100% rename from comps/embeddings/tei/llama_index/__init__.py rename to comps/ragas/src/tgi/langchain/__init__.py diff --git a/comps/ragas/tgi/langchain/build_docker.sh b/comps/ragas/src/tgi/langchain/build_docker.sh similarity index 100% rename from comps/ragas/tgi/langchain/build_docker.sh rename to comps/ragas/src/tgi/langchain/build_docker.sh diff --git a/comps/llms/text-generation/tgi/entrypoint.sh b/comps/ragas/src/tgi/langchain/entrypoint.sh similarity index 100% rename from comps/llms/text-generation/tgi/entrypoint.sh rename to comps/ragas/src/tgi/langchain/entrypoint.sh diff --git a/comps/ragas/tgi/langchain/llm.py b/comps/ragas/src/tgi/langchain/llm.py similarity index 100% rename from comps/ragas/tgi/langchain/llm.py rename to comps/ragas/src/tgi/langchain/llm.py diff --git a/comps/llms/text-generation/tgi/requirements-runtime.txt b/comps/ragas/src/tgi/langchain/requirements-runtime.txt similarity index 100% rename from comps/llms/text-generation/tgi/requirements-runtime.txt rename to comps/ragas/src/tgi/langchain/requirements-runtime.txt diff --git a/comps/ragas/tgi/langchain/requirements.txt b/comps/ragas/src/tgi/langchain/requirements.txt similarity index 100% rename from comps/ragas/tgi/langchain/requirements.txt rename to comps/ragas/src/tgi/langchain/requirements.txt diff --git a/comps/ragas/tgi/langchain/entrypoint.sh b/comps/ragas/tgi/langchain/entrypoint.sh deleted file mode 100644 index d60eddd36..000000000 --- a/comps/ragas/tgi/langchain/entrypoint.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -pip --no-cache-dir install -r requirements-runtime.txt - -python llm.py diff --git a/comps/ragas/tgi/langchain/requirements-runtime.txt b/comps/ragas/tgi/langchain/requirements-runtime.txt deleted file mode 100644 index 225adde27..000000000 --- a/comps/ragas/tgi/langchain/requirements-runtime.txt +++ /dev/null @@ -1 +0,0 @@ -langserve diff --git a/comps/reranks/deployment/docker_compose/README.md b/comps/reranks/deployment/docker_compose/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/reranks/tei/docker_compose_reranking.yaml b/comps/reranks/deployment/docker_compose/rerank_tei.yaml similarity index 64% rename from comps/reranks/tei/docker_compose_reranking.yaml rename to comps/reranks/deployment/docker_compose/rerank_tei.yaml index 33b2f2b74..14c62ff3f 100644 --- a/comps/reranks/tei/docker_compose_reranking.yaml +++ b/comps/reranks/deployment/docker_compose/rerank_tei.yaml @@ -4,18 +4,23 @@ version: "3.8" services: - tei_xeon_service: + tei_reranking_service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 - container_name: tei-xeon-server + container_name: tei-reranking-server ports: - "8808:80" volumes: - "./data:/data" shm_size: 1g + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8808/health"] + interval: 10s + timeout: 6s + retries: 18 command: --model-id ${RERANK_MODEL_ID} --hf-api-token ${HF_TOKEN} reranking: - image: opea/reranking-tei:latest - container_name: reranking-tei-xeon-server + image: opea/reranking:latest + container_name: reranking-tei-server ports: - "8000:8000" ipc: host @@ -25,6 +30,9 @@ services: https_proxy: ${https_proxy} TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} HF_TOKEN: ${HF_TOKEN} + depends_on: + tei_reranking_service: + condition: service_healthy restart: unless-stopped networks: diff --git a/comps/reranks/deployment/kubernetes/README.md b/comps/reranks/deployment/kubernetes/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/comps/reranks/fastrag/Dockerfile b/comps/reranks/fastrag/Dockerfile deleted file mode 100644 index 9effd27b8..000000000 --- a/comps/reranks/fastrag/Dockerfile +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.10-slim - -ENV LANG=C.UTF-8 - -ARG ARCH="cpu" - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - git \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN git clone https://github.com/IntelLabs/fastRAG.git /home/user/fastRAG && \ - cd /home/user/fastRAG && \ - pip install --no-cache-dir --upgrade pip && \ - if [ ${ARCH} = "cpu" ]; then \ - pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/reranks/fastrag/requirements.txt; \ - pip install --no-cache-dir .; \ - pip install --no-cache-dir .[intel]; \ - else \ - pip install --no-cache-dir -r /home/user/comps/reranks/fastrag/requirements.txt; \ - pip install --no-cache-dir .; \ - pip install --no-cache-dir .[intel]; \ - fi - -ENV PYTHONPATH=$PYTHONPH:/home/user - -WORKDIR /home/user/comps/reranks/fastrag - -ENTRYPOINT ["python", "local_reranking.py"] diff --git a/comps/reranks/fastrag/README.md b/comps/reranks/fastrag/README.md deleted file mode 100644 index 0114de503..000000000 --- a/comps/reranks/fastrag/README.md +++ /dev/null @@ -1,82 +0,0 @@ -# Reranking Microservice with fastRAG - -`fastRAG` is a research framework for efficient and optimized retrieval augmented generative pipelines, incorporating state-of-the-art LLMs and Information Retrieval. - -Please refer to [Official fastRAG repo](https://github.com/IntelLabs/fastRAG/tree/main) -for more information. - -This README provides set-up instructions and comprehensive details regarding the reranking microservice via fastRAG. - ---- - -## 🚀1. Start Microservice with Python (Option 1) - -To start the Reranking microservice, you must first install the required python packages. - -### 1.1 Install Requirements - -```bash -pip install -r requirements.txt -``` - -### 1.2 Install fastRAG - -```bash -git clone https://github.com/IntelLabs/fastRAG.git -cd fastRag -pip install . -pip install .[intel] -``` - -### 1.3 Start Reranking Service with Python Script - -```bash -export EMBED_MODEL="Intel/bge-small-en-v1.5-rag-int8-static" -python local_reranking.py -``` - ---- - -## 🚀2. Start Microservice with Docker (Option 2) - -### 2.1 Setup Environment Variables - -```bash -export EMBED_MODEL="Intel/bge-small-en-v1.5-rag-int8-static" -``` - -### 2.2 Build Docker Image - -```bash -cd ../../../ -docker build -t opea/reranking-fastrag:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/fastrag/Dockerfile . -``` - -### 2.3 Run Docker - -```bash -docker run -d --name="reranking-fastrag-server" -p 8000:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e EMBED_MODEL=$EMBED_MODEL opea/reranking-fastrag:latest -``` - ---- - -## ✅ 3. Invoke Reranking Microservice - -The Reranking microservice exposes following API endpoints: - -- Check Service Status - - ```bash - curl http://localhost:8000/v1/health_check \ - -X GET \ - -H 'Content-Type: application/json' - ``` - -- Execute reranking process by providing query and documents - - ```bash - curl http://localhost:8000/v1/reranking \ - -X POST \ - -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ - -H 'Content-Type: application/json' - ``` diff --git a/comps/reranks/fastrag/config.py b/comps/reranks/fastrag/config.py deleted file mode 100644 index 70a6fde8b..000000000 --- a/comps/reranks/fastrag/config.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -# Re-ranking model -RANKER_MODEL = os.getenv("EMBED_MODEL", "Intel/bge-small-en-v1.5-rag-int8-static") diff --git a/comps/reranks/fastrag/local_reranking.py b/comps/reranks/fastrag/local_reranking.py deleted file mode 100644 index d6f33193c..000000000 --- a/comps/reranks/fastrag/local_reranking.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -from config import RANKER_MODEL -from fastrag.rankers import IPEXBiEncoderSimilarityRanker -from haystack import Document - -from comps import CustomLogger -from comps.cores.mega.micro_service import ServiceType, opea_microservices, register_microservice -from comps.cores.proto.docarray import RerankedDoc, SearchedDoc, TextDoc - -logger = CustomLogger("local_reranking") -logflag = os.getenv("LOGFLAG", False) - - -@register_microservice( - name="opea_service@local_reranking", - service_type=ServiceType.RERANK, - endpoint="/v1/reranking", - host="0.0.0.0", - port=8000, - input_datatype=SearchedDoc, - output_datatype=RerankedDoc, -) -def reranking(input: SearchedDoc) -> RerankedDoc: - if logflag: - logger.info(input) - documents = [] - for i, d in enumerate(input.retrieved_docs): - documents.append(Document(content=d.text, id=(i + 1))) - sorted_documents = reranker_model.run(input.initial_query, documents)["documents"] - ranked_documents = [TextDoc(id=doc.id, text=doc.content) for doc in sorted_documents] - res = RerankedDoc(initial_query=input.initial_query, reranked_docs=ranked_documents) - if logflag: - logger.info(res) - return res - - -if __name__ == "__main__": - # Use an optimized quantized bi-encoder model for re-reranking - reranker_model = IPEXBiEncoderSimilarityRanker(RANKER_MODEL) - reranker_model.warm_up() - - opea_microservices["opea_service@local_reranking"].start() diff --git a/comps/reranks/fastrag/requirements.txt b/comps/reranks/fastrag/requirements.txt deleted file mode 100644 index a832e760f..000000000 --- a/comps/reranks/fastrag/requirements.txt +++ /dev/null @@ -1,13 +0,0 @@ -aiohttp -docarray[full] -fastapi -haystack-ai -httpx -langchain -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -sentence_transformers -shortuuid -uvicorn diff --git a/comps/reranks/mosec/langchain/Dockerfile b/comps/reranks/mosec/langchain/Dockerfile deleted file mode 100644 index 6b536d4be..000000000 --- a/comps/reranks/mosec/langchain/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/user/comps/reranks/mosec/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/reranks/mosec/langchain - -ENTRYPOINT ["python", "reranking_mosec_xeon.py"] diff --git a/comps/reranks/mosec/langchain/README.md b/comps/reranks/mosec/langchain/README.md deleted file mode 100644 index 5251256ec..000000000 --- a/comps/reranks/mosec/langchain/README.md +++ /dev/null @@ -1,65 +0,0 @@ -# Reranking Microservice with Mosec - -`Mosec` is a high-performance and flexible model serving framework for building ML model-enabled backend and microservices. - -Please refer to [Official mosec repo](https://github.com/mosecorg/mosec) -for more information. - -This README provides set-up instructions and comprehensive details regarding the reranking microservice via mosec. - ---- - -## Build Reranking Mosec Image - -- Build reranking mosec endpoint docker image. - - ``` - docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/reranking-langchain-mosec-endpoint:latest -f comps/reranks/mosec/langchain/dependency/Dockerfile . - ``` - ---- - -## Build Reranking Microservice Image - -- Build reranking microservice docker image. - - ``` - docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/reranking-langchain-mosec:latest -f comps/reranks/mosec/langchain/Dockerfile . - ``` - ---- - -## Launch Mosec Endpoint Image Container - -- Start the mosec endpoint image docker container. - - ``` - docker run -d --name="reranking-langchain-mosec-endpoint" -p 6001:8000 opea/reranking-langchain-mosec-endpoint:latest - ``` - ---- - -## Launch Embedding Microservice Image Container - -- Start the embedding microservice image docker container. - - ``` - export MOSEC_RERANKING_ENDPOINT=http://127.0.0.1:6001 - - docker run -d --name="reranking-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 6000:8000 --ipc=host -e MOSEC_RERANKING_ENDPOINT=$MOSEC_RERANKING_ENDPOINT opea/reranking-langchain-mosec:latest - ``` - ---- - -## ✅ Invoke Reranking Microservice - -The Reranking microservice exposes following API endpoints: - -- Execute reranking process by providing query and documents - - ``` - curl http://localhost:6000/v1/reranking \ - -X POST \ - -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ - -H 'Content-Type: application/json' - ``` diff --git a/comps/reranks/mosec/langchain/dependency/Dockerfile b/comps/reranks/mosec/langchain/dependency/Dockerfile deleted file mode 100644 index 9e9275c45..000000000 --- a/comps/reranks/mosec/langchain/dependency/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -From ubuntu:22.04 -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ -ARG DEBIAN_FRONTEND=noninteractive - -ENV GLIBC_TUNABLES glibc.cpu.x86_shstk=permissive - -COPY comps /home/user/comps - -RUN apt update && apt install -y python3 python3-pip - -RUN pip3 install torch==2.2.2 torchvision --trusted-host download.pytorch.org --index-url https://download.pytorch.org/whl/cpu -RUN pip3 install intel-extension-for-pytorch==2.2.0 -RUN pip3 install transformers sentence-transformers==3.0.1 -RUN pip3 install llmspec mosec - -RUN cd /home/user/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-reranker-base --local-dir /home/user/bge-reranker-large -USER user -ENV MOSEC_EMBEDDING_MODEL="/home/user/bge-reranker-large/" - -WORKDIR /home/user/comps/reranks/mosec/langchain/dependency - -CMD ["python3", "server-ipex.py"] diff --git a/comps/reranks/mosec/langchain/dependency/server-ipex.py b/comps/reranks/mosec/langchain/dependency/server-ipex.py deleted file mode 100644 index 1f813d4e6..000000000 --- a/comps/reranks/mosec/langchain/dependency/server-ipex.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -from os import environ -from typing import Dict, List, Optional, Union - -import intel_extension_for_pytorch as ipex -import numpy as np -import torch -from mosec import Server, Worker -from msgspec import Struct -from sentence_transformers import CrossEncoder -from torch.utils.data import DataLoader -from tqdm.autonotebook import tqdm - -DEFAULT_MODEL = "/home/user/bge-reranker-large" - - -class MyCrossEncoder(CrossEncoder): - def __init__( - self, - model_name: str, - num_labels: int = None, - max_length: int = None, - device: str = None, - tokenizer_args: Dict = None, - automodel_args: Dict = None, - trust_remote_code: bool = False, - revision: Optional[str] = None, - local_files_only: bool = False, - default_activation_function=None, - classifier_dropout: float = None, - ) -> None: - super().__init__( - model_name, - num_labels, - max_length, - device, - tokenizer_args, - automodel_args, - trust_remote_code, - revision, - local_files_only, - default_activation_function, - classifier_dropout, - ) - # jit trace model - self.model = ipex.optimize(self.model, dtype=torch.float32) - vocab_size = self.model.config.vocab_size - batch_size = 16 - seq_length = 512 - d = torch.randint(vocab_size, size=[batch_size, seq_length]) - # t = torch.randint(0, 1, size=[batch_size, seq_length]) - m = torch.randint(1, 2, size=[batch_size, seq_length]) - self.model = torch.jit.trace(self.model, [d, m], check_trace=False, strict=False) - self.model = torch.jit.freeze(self.model) - - def predict( - self, - sentences: List[List[str]], - batch_size: int = 32, - show_progress_bar: bool = None, - num_workers: int = 0, - activation_fct=None, - apply_softmax=False, - convert_to_numpy: bool = True, - convert_to_tensor: bool = False, - ) -> Union[List[float], np.ndarray, torch.Tensor]: - input_was_string = False - if isinstance(sentences[0], str): # Cast an individual sentence to a list with length 1 - sentences = [sentences] - input_was_string = True - - inp_dataloader = DataLoader( - sentences, - batch_size=batch_size, - collate_fn=self.smart_batching_collate_text_only, - num_workers=num_workers, - shuffle=False, - ) - - iterator = inp_dataloader - if show_progress_bar: - iterator = tqdm(inp_dataloader, desc="Batches") - - if activation_fct is None: - activation_fct = self.default_activation_function - - pred_scores = [] - self.model.eval() - self.model.to(self._target_device) - with torch.no_grad(): - for features in iterator: - model_predictions = self.model(**features) - logits = activation_fct(model_predictions["logits"]) - - if apply_softmax and len(logits[0]) > 1: - logits = torch.nn.functional.softmax(logits, dim=1) - pred_scores.extend(logits) - - if self.config.num_labels == 1: - pred_scores = [score[0] for score in pred_scores] - - if convert_to_tensor: - pred_scores = torch.stack(pred_scores) - elif convert_to_numpy: - pred_scores = np.asarray([score.cpu().detach().numpy() for score in pred_scores]) - - if input_was_string: - pred_scores = pred_scores[0] - - return pred_scores - - -class Request(Struct, kw_only=True): - query: str - docs: List[str] - - -class Response(Struct, kw_only=True): - scores: List[float] - - -def float_handler(o): - if isinstance(o, float): - return format(o, ".10f") - raise TypeError("Not serializable") - - -class MosecReranker(Worker): - def __init__(self): - self.model_name = environ.get("MODEL_NAME", DEFAULT_MODEL) - self.model = MyCrossEncoder(self.model_name) - - def serialize(self, data: Response) -> bytes: - sorted_list = sorted(data.scores, reverse=True) - index_sorted = [data.scores.index(i) for i in sorted_list] - res = [] - for i, s in zip(index_sorted, sorted_list): - tmp = {"index": i, "score": "{:.10f}".format(s)} - res.append(tmp) - return json.dumps(res, default=float_handler).encode("utf-8") - - def forward(self, data: List[Request]) -> List[Response]: - sentence_pairs = [] - inputs_lens = [] - for d in data: - inputs_lens.append(len(d["texts"])) - tmp = [[d["query"], doc] for doc in d["texts"]] - sentence_pairs.extend(tmp) - - scores = self.model.predict(sentence_pairs) - scores = scores.tolist() - - resp = [] - cur_idx = 0 - for lens in inputs_lens: - resp.append(Response(scores=scores[cur_idx : cur_idx + lens])) - cur_idx += lens - - return resp - - -if __name__ == "__main__": - MAX_BATCH_SIZE = int(os.environ.get("MAX_BATCH_SIZE", 128)) - MAX_WAIT_TIME = int(os.environ.get("MAX_WAIT_TIME", 10)) - server = Server() - server.append_worker(MosecReranker, max_batch_size=MAX_BATCH_SIZE, max_wait_time=MAX_WAIT_TIME) - server.run() diff --git a/comps/reranks/mosec/langchain/docker_compose_embedding.yaml b/comps/reranks/mosec/langchain/docker_compose_embedding.yaml deleted file mode 100644 index 581946185..000000000 --- a/comps/reranks/mosec/langchain/docker_compose_embedding.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - reranking: - image: opea/reranking-langchain-mosec:latest - container_name: reranking-langchain-mosec-server - ports: - - "6000:8000" - ipc: host - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - MOSEC_RERANKING_ENDPOINT: ${MOSEC_RERANKING_ENDPOINT} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/reranks/mosec/langchain/requirements.txt b/comps/reranks/mosec/langchain/requirements.txt deleted file mode 100644 index f9327419d..000000000 --- a/comps/reranks/mosec/langchain/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -docarray[full] -fastapi -langchain -langchain_community -openai -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus_fastapi_instrumentator -shortuuid -uvicorn diff --git a/comps/reranks/mosec/langchain/reranking_mosec_xeon.py b/comps/reranks/mosec/langchain/reranking_mosec_xeon.py deleted file mode 100644 index 1f222beb3..000000000 --- a/comps/reranks/mosec/langchain/reranking_mosec_xeon.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Copyright 2024 MOSEC Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import os -import re -import time - -import requests -from langchain_core.prompts import ChatPromptTemplate - -from comps import ( - CustomLogger, - LLMParamsDoc, - SearchedDoc, - ServiceType, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) - -logger = CustomLogger("reranking_mosec_xeon") -logflag = os.getenv("LOGFLAG", False) - - -@register_microservice( - name="opea_service@reranking_mosec_xeon", - service_type=ServiceType.RERANK, - endpoint="/v1/reranking", - host="0.0.0.0", - port=8000, - input_datatype=SearchedDoc, - output_datatype=LLMParamsDoc, -) -@register_statistics(names=["opea_service@reranking_mosec_xeon"]) -def reranking(input: SearchedDoc) -> LLMParamsDoc: - if logflag: - logger.info("reranking input: ", input) - start = time.time() - if input.retrieved_docs: - docs = [doc.text for doc in input.retrieved_docs] - url = mosec_reranking_endpoint + "/inference" - data = {"query": input.initial_query, "texts": docs} - headers = {"Content-Type": "application/json"} - response = requests.post(url, data=json.dumps(data), headers=headers) - response_data = response.json() - best_response = max(response_data, key=lambda response: response["score"]) - doc = input.retrieved_docs[best_response["index"]] - if doc.text and len(re.findall("[\u4E00-\u9FFF]", doc.text)) / len(doc.text) >= 0.3: - # chinese context - template = "仅基于以下背景回答问题:\n{context}\n问题: {question}" - else: - template = """Answer the question based only on the following context: - {context} - Question: {question} - """ - prompt = ChatPromptTemplate.from_template(template) - final_prompt = prompt.format(context=doc.text, question=input.initial_query) - statistics_dict["opea_service@reranking_mosec_xeon"].append_latency(time.time() - start, None) - if logflag: - logger.info(final_prompt.strip()) - return LLMParamsDoc(query=final_prompt.strip()) - else: - if logflag: - logger.info(input.initial_query) - return LLMParamsDoc(query=input.initial_query) - - -if __name__ == "__main__": - mosec_reranking_endpoint = os.getenv("MOSEC_RERANKING_ENDPOINT", "http://localhost:8080") - opea_microservices["opea_service@reranking_mosec_xeon"].start() diff --git a/comps/reranks/tei/Dockerfile b/comps/reranks/src/Dockerfile similarity index 80% rename from comps/reranks/tei/Dockerfile rename to comps/reranks/src/Dockerfile index 6c6016bcd..b8153dc7c 100644 --- a/comps/reranks/tei/Dockerfile +++ b/comps/reranks/src/Dockerfile @@ -21,10 +21,10 @@ COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/reranks/tei/requirements.txt + pip install --no-cache-dir -r /home/user/comps/reranks/src/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/reranks/tei +WORKDIR /home/user/comps/reranks/src -ENTRYPOINT ["python", "reranking_tei.py"] +ENTRYPOINT ["python", "opea_reranking_microservice.py"] diff --git a/comps/reranks/README.md b/comps/reranks/src/README.md similarity index 53% rename from comps/reranks/README.md rename to comps/reranks/src/README.md index f8176720b..28c3324e8 100644 --- a/comps/reranks/README.md +++ b/comps/reranks/src/README.md @@ -7,29 +7,3 @@ either a dense embedding model or a sparse lexical search index is often employe However, a reranking model can further refine this process by rearranging potential candidates into a final, optimized order. ![Flow Chart](./assets/img/reranking_flow.png) - ---- - -## 🛠️ Features - -- **rerank on retrieved documents**: Perform reranking on the given documents using reranking models together with query. - ---- - -## ⚙️ Implementation - -### Utilizing Reranking with fastRAG - -For additional information, please refer to this [README](./fastrag/README.md) - -### Utilizing Reranking with Mosec - -For additional information, please refer to this [README](./mosec/langchain/README.md) - -### Utilizing Reranking with TEI - -For additional information, please refer to this [README](./tei/README.md) - -### Utilizing Reranking with VideoQnA - -For additional information, please refer to this [README](./videoqna/README.md) diff --git a/comps/llms/text-generation/tgi/__init__.py b/comps/reranks/src/__init__.py similarity index 100% rename from comps/llms/text-generation/tgi/__init__.py rename to comps/reranks/src/__init__.py diff --git a/comps/reranks/assets/img/reranking_flow.png b/comps/reranks/src/assets/img/reranking_flow.png similarity index 100% rename from comps/reranks/assets/img/reranking_flow.png rename to comps/reranks/src/assets/img/reranking_flow.png diff --git a/comps/ragas/tgi/langchain/__init__.py b/comps/reranks/src/integrations/__init__.py similarity index 100% rename from comps/ragas/tgi/langchain/__init__.py rename to comps/reranks/src/integrations/__init__.py diff --git a/comps/reranks/src/integrations/opea_tei.py b/comps/reranks/src/integrations/opea_tei.py new file mode 100644 index 000000000..09dda5e9d --- /dev/null +++ b/comps/reranks/src/integrations/opea_tei.py @@ -0,0 +1,115 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +from typing import Union + +import requests +from huggingface_hub import AsyncInferenceClient + +from comps import CustomLogger, LLMParamsDoc, SearchedDoc, ServiceType +from comps.cores.common.component import OpeaComponent +from comps.cores.mega.utils import get_access_token +from comps.cores.proto.api_protocol import ( + ChatCompletionRequest, + RerankingRequest, + RerankingResponse, + RerankingResponseData, +) + +logger = CustomLogger("reranking_tei") +logflag = os.getenv("LOGFLAG", False) + +# Environment variables +TOKEN_URL = os.getenv("TOKEN_URL") +CLIENTID = os.getenv("CLIENTID") +CLIENT_SECRET = os.getenv("CLIENT_SECRET") + + +class OPEATEIReranking(OpeaComponent): + """A specialized reranking component derived from OpeaComponent for TEI reranking services. + + Attributes: + client (AsyncInferenceClient): An instance of the client for reranking generation. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.RERANK.name.lower(), description, config) + self.base_url = os.getenv("TEI_RERANKING_ENDPOINT", "http://localhost:8808") + self.client = self._initialize_client() + + def _initialize_client(self) -> AsyncInferenceClient: + """Initializes the AsyncInferenceClient.""" + access_token = ( + get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None + ) + headers = {"Authorization": f"Bearer {access_token}"} if access_token else {} + return AsyncInferenceClient( + model=f"{self.base_url}/rerank", + token=os.getenv("HUGGINGFACEHUB_API_TOKEN"), + headers=headers, + ) + + async def invoke( + self, input: Union[SearchedDoc, RerankingRequest, ChatCompletionRequest] + ) -> Union[LLMParamsDoc, RerankingResponse, ChatCompletionRequest]: + """Invokes the reranking service to generate rerankings for the provided input.""" + reranking_results = [] + + if input.retrieved_docs: + docs = [doc.text for doc in input.retrieved_docs] + if isinstance(input, SearchedDoc): + query = input.initial_query + else: + # for RerankingRequest, ChatCompletionRequest + query = input.input + + response = await self.client.post( + json={"query": query, "texts": docs}, + task="text-reranking", + ) + + for best_response in json.loads(response.decode())[: input.top_n]: + reranking_results.append( + {"text": input.retrieved_docs[best_response["index"]].text, "score": best_response["score"]} + ) + + if isinstance(input, SearchedDoc): + result = [doc["text"] for doc in reranking_results] + if logflag: + logger.info(result) + return LLMParamsDoc(query=input.initial_query, documents=result) + else: + reranking_docs = [] + for doc in reranking_results: + reranking_docs.append(RerankingResponseData(text=doc["text"], score=doc["score"])) + if isinstance(input, RerankingRequest): + result = RerankingResponse(reranked_docs=reranking_docs) + if logflag: + logger.info(result) + return result + + if isinstance(input, ChatCompletionRequest): + input.reranked_docs = reranking_docs + input.documents = [doc["text"] for doc in reranking_results] + if logflag: + logger.info(input) + return input + + def check_health(self) -> bool: + """Checks the health of the embedding service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + try: + response = requests.get(f"{self.base_url}/health") + if response.status_code == 200: + return True + else: + return False + except Exception as e: + # Handle connection errors, timeouts, etc. + logger.error(f"Health check failed: {e}") + return False diff --git a/comps/reranks/src/opea_reranking_microservice.py b/comps/reranks/src/opea_reranking_microservice.py new file mode 100644 index 000000000..8d429b95d --- /dev/null +++ b/comps/reranks/src/opea_reranking_microservice.py @@ -0,0 +1,80 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time +from typing import Union + +from integrations.opea_tei import OPEATEIReranking + +from comps import ( + CustomLogger, + OpeaComponentController, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) +from comps.cores.proto.api_protocol import ChatCompletionRequest, RerankingRequest, RerankingResponse +from comps.cores.proto.docarray import LLMParamsDoc, LVMVideoDoc, RerankedDoc, SearchedDoc, SearchedMultimodalDoc + +logger = CustomLogger("opea_reranking_microservice") +logflag = os.getenv("LOGFLAG", False) +rerank_type = os.getenv("RERANK_TYPE", False) +controller = OpeaComponentController() + +# Register components +try: + # Instantiate reranking components + if rerank_type == "tei": + opea_tei_reranking = OPEATEIReranking( + name="OPEATEIReranking", + description="OPEA TEI Reranking Service", + ) + # Register components with the controller + controller.register(opea_tei_reranking) + + # Discover and activate a healthy component + controller.discover_and_activate() +except Exception as e: + logger.error(f"Failed to initialize components: {e}") + + +@register_microservice( + name="opea_service@reranking", + service_type=ServiceType.RERANK, + endpoint="/v1/reranking", + host="0.0.0.0", + port=8000, +) +@register_statistics(names=["opea_service@reranking"]) +async def reranking( + input: Union[SearchedMultimodalDoc, SearchedDoc, RerankingRequest, ChatCompletionRequest] +) -> Union[RerankedDoc, LLMParamsDoc, RerankingResponse, ChatCompletionRequest, LVMVideoDoc]: + start = time.time() + + # Log the input if logging is enabled + if logflag: + logger.info(f"Input received: {input}") + + try: + # Use the controller to invoke the active component + reranking_response = await controller.invoke(input) + + # Log the result if logging is enabled + if logflag: + logger.info(f"Output received: {reranking_response}") + + # Record statistics + statistics_dict["opea_service@reranking"].append_latency(time.time() - start, None) + return reranking_response + + except Exception as e: + logger.error(f"Error during reranking invocation: {e}") + raise + + +if __name__ == "__main__": + opea_microservices["opea_service@reranking"].start() + logger.info("OPEA Reranking Microservice is starting...") diff --git a/comps/reranks/tei/requirements.txt b/comps/reranks/src/requirements.txt similarity index 100% rename from comps/reranks/tei/requirements.txt rename to comps/reranks/src/requirements.txt diff --git a/comps/reranks/tei/README.md b/comps/reranks/tei/README.md deleted file mode 100644 index 2bd1151e0..000000000 --- a/comps/reranks/tei/README.md +++ /dev/null @@ -1,116 +0,0 @@ -# Reranking Microservice via TEI - -`Text Embeddings Inference (TEI)` is a comprehensive toolkit designed for efficient deployment and serving of open source text embeddings models. -It enable us to host our own reranker endpoint seamlessly. - -This README provides set-up instructions and comprehensive details regarding the reranking microservice via TEI. - ---- - -## 🚀1. Start Microservice with Python (Option 1) - -To start the Reranking microservice, you must first install the required python packages. - -### 1.1 Install Requirements - -```bash -pip install -r requirements.txt -``` - -### 1.2 Start TEI Service - -```bash -export HF_TOKEN=${your_hf_api_token} -export RERANK_MODEL_ID="BAAI/bge-reranker-base" -export volume=$PWD/data - -docker run -d -p 6060:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $RERANK_MODEL_ID --hf-api-token $HF_TOKEN -``` - -### 1.3 Verify the TEI Service - -```bash -curl 127.0.0.1:6060/rerank \ - -X POST \ - -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \ - -H 'Content-Type: application/json' -``` - -### 1.4 Start Reranking Service with Python Script - -```bash -export TEI_RERANKING_ENDPOINT="http://${your_ip}:6060" - -python reranking_tei_xeon.py -``` - ---- - -## 🚀2. Start Microservice with Docker (Option 2) - -If you start an Reranking microservice with docker, the `docker_compose_reranking.yaml` file will automatically start a TEI service with docker. - -### 2.1 Setup Environment Variables - -```bash -export HF_TOKEN=${your_hf_api_token} -export TEI_RERANKING_ENDPOINT="http://${your_ip}:8808" -``` - -### 2.2 Build Docker Image - -```bash -cd ../../../ -docker build -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile . -``` - -To start a docker container, you have two options: - -- A. Run Docker with CLI -- B. Run Docker with Docker Compose - -You can choose one as needed. - -### 2.3 Run Docker with CLI (Option A) - -```bash -docker run -d --name="reranking-tei-server" -p 8000:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_RERANKING_ENDPOINT=$TEI_RERANKING_ENDPOINT -e HF_TOKEN=$HF_TOKEN opea/reranking-tei:latest -``` - -### 2.4 Run Docker with Docker Compose (Option B) - -```bash -docker compose -f docker_compose_reranking.yaml up -d -``` - ---- - -## ✅3. Invoke Reranking Microservice - -The Reranking microservice exposes following API endpoints: - -- Check Service Status - - ```bash - curl http://localhost:8000/v1/health_check \ - -X GET \ - -H 'Content-Type: application/json' - ``` - -- Execute reranking process by providing query and documents - - ```bash - curl http://localhost:8000/v1/reranking \ - -X POST \ - -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ - -H 'Content-Type: application/json' - ``` - - - You can add the parameter `top_n` to specify the return number of the reranker model, default value is 1. - - ```bash - curl http://localhost:8000/v1/reranking \ - -X POST \ - -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}], "top_n":2}' \ - -H 'Content-Type: application/json' - ``` diff --git a/comps/reranks/tei/__init__.py b/comps/reranks/tei/__init__.py deleted file mode 100644 index 916f3a44b..000000000 --- a/comps/reranks/tei/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/reranks/tei/local_reranking.py b/comps/reranks/tei/local_reranking.py deleted file mode 100644 index b9d5bf931..000000000 --- a/comps/reranks/tei/local_reranking.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -from sentence_transformers import CrossEncoder - -from comps import CustomLogger, RerankedDoc, SearchedDoc, ServiceType, opea_microservices, register_microservice - -logger = CustomLogger("local_reranking") -logflag = os.getenv("LOGFLAG", False) - - -@register_microservice( - name="opea_service@local_reranking", - service_type=ServiceType.RERANK, - endpoint="/v1/reranking", - host="0.0.0.0", - port=8000, - input_datatype=SearchedDoc, - output_datatype=RerankedDoc, -) -def reranking(input: SearchedDoc) -> RerankedDoc: - if logflag: - logger.info(input) - query_and_docs = [(input.initial_query, doc.text) for doc in input.retrieved_docs] - scores = reranker_model.predict(query_and_docs) - first_passage = sorted(list(zip(input.retrieved_docs, scores)), key=lambda x: x[1], reverse=True)[0][0] - res = RerankedDoc(initial_query=input.initial_query, reranked_docs=[first_passage]) - if logflag: - logger.info(res) - return res - - -if __name__ == "__main__": - reranker_model = CrossEncoder(model_name="BAAI/bge-reranker-base", max_length=512) - opea_microservices["opea_service@local_reranking"].start() diff --git a/comps/reranks/tei/reranking_tei.py b/comps/reranks/tei/reranking_tei.py deleted file mode 100644 index 682346f6d..000000000 --- a/comps/reranks/tei/reranking_tei.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -import time -from typing import Union - -import aiohttp - -from comps import ( - CustomLogger, - LLMParamsDoc, - SearchedDoc, - ServiceType, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) -from comps.cores.mega.utils import get_access_token -from comps.cores.proto.api_protocol import ( - ChatCompletionRequest, - RerankingRequest, - RerankingResponse, - RerankingResponseData, -) - -logger = CustomLogger("reranking_tei") -logflag = os.getenv("LOGFLAG", False) - -# Environment variables -TOKEN_URL = os.getenv("TOKEN_URL") -CLIENTID = os.getenv("CLIENTID") -CLIENT_SECRET = os.getenv("CLIENT_SECRET") - - -@register_microservice( - name="opea_service@reranking_tei", - service_type=ServiceType.RERANK, - endpoint="/v1/reranking", - host="0.0.0.0", - port=8000, - input_datatype=Union[SearchedDoc, RerankingRequest, ChatCompletionRequest], - output_datatype=Union[LLMParamsDoc, RerankingResponse, ChatCompletionRequest], -) -@register_statistics(names=["opea_service@reranking_tei"]) -async def reranking( - input: Union[SearchedDoc, RerankingRequest, ChatCompletionRequest] -) -> Union[LLMParamsDoc, RerankingResponse, ChatCompletionRequest]: - if logflag: - logger.info(input) - start = time.time() - reranking_results = [] - access_token = ( - get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None - ) - if input.retrieved_docs: - docs = [doc.text for doc in input.retrieved_docs] - url = tei_reranking_endpoint + "/rerank" - if isinstance(input, SearchedDoc): - query = input.initial_query - else: - # for RerankingRequest, ChatCompletionRequest - query = input.input - data = {"query": query, "texts": docs} - headers = {"Content-Type": "application/json"} - if access_token: - headers = {"Content-Type": "application/json", "Authorization": f"Bearer {access_token}"} - async with aiohttp.ClientSession() as session: - async with session.post(url, data=json.dumps(data), headers=headers) as response: - response_data = await response.json() - - for best_response in response_data[: input.top_n]: - reranking_results.append( - {"text": input.retrieved_docs[best_response["index"]].text, "score": best_response["score"]} - ) - - statistics_dict["opea_service@reranking_tei"].append_latency(time.time() - start, None) - if isinstance(input, SearchedDoc): - result = [doc["text"] for doc in reranking_results] - if logflag: - logger.info(result) - return LLMParamsDoc(query=input.initial_query, documents=result) - else: - reranking_docs = [] - for doc in reranking_results: - reranking_docs.append(RerankingResponseData(text=doc["text"], score=doc["score"])) - if isinstance(input, RerankingRequest): - result = RerankingResponse(reranked_docs=reranking_docs) - if logflag: - logger.info(result) - return result - - if isinstance(input, ChatCompletionRequest): - input.reranked_docs = reranking_docs - input.documents = [doc["text"] for doc in reranking_results] - if logflag: - logger.info(input) - return input - - -if __name__ == "__main__": - tei_reranking_endpoint = os.getenv("TEI_RERANKING_ENDPOINT", "http://localhost:8080") - opea_microservices["opea_service@reranking_tei"].start() diff --git a/comps/reranks/videoqna/Dockerfile b/comps/reranks/videoqna/Dockerfile deleted file mode 100644 index dc422a3d4..000000000 --- a/comps/reranks/videoqna/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -ENV LANG=C.UTF-8 - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/user/comps/reranks/videoqna/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/reranks/videoqna - -ENTRYPOINT ["python", "local_reranking.py"] diff --git a/comps/reranks/videoqna/README.md b/comps/reranks/videoqna/README.md deleted file mode 100644 index d10528d63..000000000 --- a/comps/reranks/videoqna/README.md +++ /dev/null @@ -1,74 +0,0 @@ -# Rerank Microservice with VideoQnA - -This README provides set-up instructions and comprehensive details regarding the reranking microservice with VideoQnA. -This microservice is designed that do result rerank for VideoQnA use case. Local rerank is used rather than rerank model. - -For the `VideoQnA` usecase, during the data preparation phase, frames are extracted from videos and stored in a vector database. -To identify the most relevant video, we count the occurrences of each video source among the retrieved data with rerank function `get_top_doc`. -This sorts the video as a descending list of names, ranked by their degree of match with the query. -Then we could send the `top_n` videos to the downstream LVM. - ---- - -## 🚀1. Start Microservice with Docker - -### 1.1 Build Images - -```bash -cd GenAIComps -docker build --no-cache -t opea/reranking-videoqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/videoqna/Dockerfile . -``` - -### 1.2 Start Rerank Service - -```bash -docker compose -f comps/reranks/videoqna/docker_compose_reranking.yaml up -d -# wait until ready -until docker logs reranking-videoqna-server 2>&1 | grep -q "Uvicorn running on"; do - sleep 2 -done -``` - -### 1.3 Configuration available by setting environment variable - -Configuration that available by setting environment variable: - -- CHUNK_DURATION: target chunk duration, should be aligned with VideoQnA dataprep. Default 10s. - ---- - -## ✅ 2. Invoke Reranking Microservice - -The Reranking microservice exposes following API endpoints: - -```bash -export ip_address=$(hostname -I | awk '{print $1}') - -curl -X 'POST' \ -"http://${ip_address}:8000/v1/reranking" \ --H 'accept: application/json' \ --H 'Content-Type: application/json' \ --d '{ - "retrieved_docs": [{"doc": [{"text": "this is the retrieved text"}]}], - "initial_query": "this is the query", - "top_n": 1, - "metadata": [ - {"other_key": "value", "video":"top_video_name", "timestamp":"20"}, - {"other_key": "value", "video":"second_video_name", "timestamp":"40"}, - {"other_key": "value", "video":"top_video_name", "timestamp":"20"} - ] -}' - -# Expected output result: -# {"id":"random number","video_url":"http://0.0.0.0:6005/top_video_name","chunk_start":20.0,"chunk_duration":10.0,"prompt":"this is the query","max_new_tokens":512} -``` - ---- - -## ♻️ 3. Cleaning the Container - -```bash -# remove the container -cid=$(docker ps -aq --filter "name=reranking-videoqna-server") -if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -``` diff --git a/comps/reranks/videoqna/local_reranking.py b/comps/reranks/videoqna/local_reranking.py deleted file mode 100644 index ac234499b..000000000 --- a/comps/reranks/videoqna/local_reranking.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import logging -import os -import re -import time - -from fastapi import HTTPException - -from comps import ( - LVMVideoDoc, - SearchedMultimodalDoc, - ServiceType, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) - -chunk_duration = os.getenv("CHUNK_DURATION", "10") or "10" -chunk_duration = float(chunk_duration) if chunk_duration.isdigit() else 10.0 - -file_server_endpoint = os.getenv("FILE_SERVER_ENDPOINT") or "http://0.0.0.0:6005" - -logging.basicConfig( - level=logging.INFO, format="%(levelname)s: [%(asctime)s] %(message)s", datefmt="%d/%m/%Y %I:%M:%S" -) - - -def get_top_doc(top_n, videos) -> list: - hit_score = {} - if videos is None: - return None - for video_name in videos: - try: - if video_name not in hit_score.keys(): - hit_score[video_name] = 0 - hit_score[video_name] += 1 - except KeyError as r: - logging.info(f"no video name {r}") - - x = dict(sorted(hit_score.items(), key=lambda item: -item[1])) # sorted dict of video name and score - top_n_names = list(x.keys())[:top_n] - logging.info(f"top docs = {x}") - logging.info(f"top n docs names = {top_n_names}") - - return top_n_names - - -def find_timestamp_from_video(metadata_list, video): - return next( - (metadata["timestamp"] for metadata in metadata_list if metadata["video"] == video), - None, - ) - - -def format_video_name(video_name): - # Check for an existing file extension - match = re.search(r"\.(\w+)$", video_name) - - if match: - extension = match.group(1) - # If the extension is not 'mp4', raise an error - if extension != "mp4": - raise ValueError(f"Invalid file extension: .{extension}. Only '.mp4' is allowed.") - - # Use regex to remove any suffix after the base name (e.g., '_interval_0', etc.) - base_name = re.sub(r"(_interval_\d+)?(\.mp4)?$", "", video_name) - - # Add the '.mp4' extension - formatted_name = f"{base_name}.mp4" - - return formatted_name - - -@register_microservice( - name="opea_service@reranking_videoqna", - service_type=ServiceType.RERANK, - endpoint="/v1/reranking", - host="0.0.0.0", - port=8000, - input_datatype=SearchedMultimodalDoc, - output_datatype=LVMVideoDoc, -) -@register_statistics(names=["opea_service@reranking_videoqna"]) -def reranking(input: SearchedMultimodalDoc) -> LVMVideoDoc: - start = time.time() - try: - # get top video name from metadata - video_names = [meta["video"] for meta in input.metadata] - top_video_names = get_top_doc(input.top_n, video_names) - - # only use the first top video - timestamp = find_timestamp_from_video(input.metadata, top_video_names[0]) - formatted_video_name = format_video_name(top_video_names[0]) - video_url = f"{file_server_endpoint.rstrip('/')}/{formatted_video_name}" - - result = LVMVideoDoc( - video_url=video_url, - prompt=input.initial_query, - chunk_start=timestamp, - chunk_duration=float(chunk_duration), - max_new_tokens=512, - ) - except ValueError as e: - raise HTTPException(status_code=400, detail=str(e)) - except Exception as e: - logging.error(f"Unexpected error in reranking: {str(e)}") - # Handle any other exceptions with a generic server error response - raise HTTPException(status_code=500, detail="An unexpected error occurred.") - - statistics_dict["opea_service@reranking_videoqna"].append_latency(time.time() - start, None) - - return result - - -if __name__ == "__main__": - opea_microservices["opea_service@reranking_videoqna"].start() diff --git a/comps/reranks/videoqna/requirements.txt b/comps/reranks/videoqna/requirements.txt deleted file mode 100644 index c7cc250eb..000000000 --- a/comps/reranks/videoqna/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -datasets -docarray -fastapi -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -Pillow -prometheus-fastapi-instrumentator -pydub -shortuuid -uvicorn diff --git a/comps/retrievers/elasticsearch/langchain/README.md b/comps/retrievers/elasticsearch/langchain/README.md index 455f8c783..3a799de19 100644 --- a/comps/retrievers/elasticsearch/langchain/README.md +++ b/comps/retrievers/elasticsearch/langchain/README.md @@ -75,7 +75,7 @@ export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" ### 2.2 Build Docker Image ```bash -cd comps/retrievers/elasticsearch/langchain +cd ../../../../../ docker build -t opea/retriever-elasticsearch:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/elasticsearch/langchain/Dockerfile . ``` diff --git a/comps/retrievers/elasticsearch/langchain/docker_compose_retriever.yaml b/comps/retrievers/elasticsearch/langchain/elasticsearch_langchain.yaml similarity index 100% rename from comps/retrievers/elasticsearch/langchain/docker_compose_retriever.yaml rename to comps/retrievers/elasticsearch/langchain/elasticsearch_langchain.yaml diff --git a/comps/retrievers/milvus/langchain/Dockerfile b/comps/retrievers/milvus/langchain/Dockerfile index 38223f9ec..94a482aaf 100644 --- a/comps/retrievers/milvus/langchain/Dockerfile +++ b/comps/retrievers/milvus/langchain/Dockerfile @@ -28,4 +28,4 @@ ENV PYTHONPATH=$PYTHONPATH:/home/user WORKDIR /home/user/comps/retrievers/milvus/langchain -ENTRYPOINT ["python", "retriever_milvus.py"] +ENTRYPOINT ["python", "retriever_milvus.py"] \ No newline at end of file diff --git a/comps/retrievers/milvus/langchain/README.md b/comps/retrievers/milvus/langchain/README.md index 61978cdf7..1edfa2e34 100644 --- a/comps/retrievers/milvus/langchain/README.md +++ b/comps/retrievers/milvus/langchain/README.md @@ -21,13 +21,13 @@ export https_proxy=${your_http_proxy} export MILVUS_HOST=${your_milvus_host_ip} export MILVUS_PORT=19530 export COLLECTION_NAME=${your_collection_name} -export MOSEC_EMBEDDING_ENDPOINT=${your_emdding_endpoint} +export TEI_EMBEDDING_ENDPOINT=${your_emdding_endpoint} ``` ### Start Retriever Service ```bash -export MOSEC_EMBEDDING_ENDPOINT="http://${your_ip}:6060" +export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" python retriever_redis.py ``` @@ -43,7 +43,7 @@ docker build -t opea/retriever-milvus:latest --build-arg https_proxy=$https_prox ### Run Docker with CLI ```bash -docker run -d --name="retriever-milvus-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e MOSEC_EMBEDDING_ENDPOINT=${your_emdding_endpoint} -e MILVUS_HOST=${your_milvus_host_ip} opea/retriever-milvus:latest +docker run -d --name="retriever-milvus-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=${your_emdding_endpoint} -e MILVUS_HOST=${your_milvus_host_ip} opea/retriever-milvus:latest ``` ## 🚀3. Consume Retriever Service diff --git a/comps/retrievers/milvus/langchain/config.py b/comps/retrievers/milvus/langchain/config.py index b201b35c9..92533eb0c 100644 --- a/comps/retrievers/milvus/langchain/config.py +++ b/comps/retrievers/milvus/langchain/config.py @@ -11,8 +11,8 @@ TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") MILVUS_HOST = os.getenv("MILVUS_HOST", "localhost") MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530)) COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag_milvus") -# MOSEC configuration -MOSEC_EMBEDDING_MODEL = os.environ.get("MOSEC_EMBEDDING_MODEL", "/home/user/bce-embedding-base_v1") -MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "") -os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT +# TEI configuration +TEI_EMBEDDING_MODEL = os.environ.get("TEI_EMBEDDING_MODEL", "/home/user/bce-embedding-base_v1") +TEI_EMBEDDING_ENDPOINT = os.environ.get("TEI_EMBEDDING_ENDPOINT", "") +os.environ["OPENAI_API_BASE"] = TEI_EMBEDDING_ENDPOINT os.environ["OPENAI_API_KEY"] = "Dummy key" diff --git a/comps/retrievers/milvus/langchain/retriever_milvus.py b/comps/retrievers/milvus/langchain/retriever_milvus.py index a4eb3ce43..b029d819f 100644 --- a/comps/retrievers/milvus/langchain/retriever_milvus.py +++ b/comps/retrievers/milvus/langchain/retriever_milvus.py @@ -10,9 +10,8 @@ from config import ( LOCAL_EMBEDDING_MODEL, MILVUS_HOST, MILVUS_PORT, - MOSEC_EMBEDDING_ENDPOINT, - MOSEC_EMBEDDING_MODEL, TEI_EMBEDDING_ENDPOINT, + TEI_EMBEDDING_MODEL, ) from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings, OpenAIEmbeddings from langchain_milvus.vectorstores import Milvus @@ -33,31 +32,6 @@ logger = CustomLogger("retriever_milvus") logflag = os.getenv("LOGFLAG", False) -class MosecEmbeddings(OpenAIEmbeddings): - def _get_len_safe_embeddings( - self, texts: List[str], *, engine: str, chunk_size: Optional[int] = None - ) -> List[List[float]]: - _chunk_size = chunk_size or self.chunk_size - batched_embeddings: List[List[float]] = [] - response = self.client.create(input=texts, **self._invocation_params) - if not isinstance(response, dict): - response = response.model_dump() - batched_embeddings.extend(r["embedding"] for r in response["data"]) - - _cached_empty_embedding: Optional[List[float]] = None - - def empty_embedding() -> List[float]: - nonlocal _cached_empty_embedding - if _cached_empty_embedding is None: - average_embedded = self.client.create(input="", **self._invocation_params) - if not isinstance(average_embedded, dict): - average_embedded = average_embedded.model_dump() - _cached_empty_embedding = average_embedded["data"][0]["embedding"] - return _cached_empty_embedding - - return [e if e is not None else empty_embedding() for e in batched_embeddings] - - @register_microservice( name="opea_service@retriever_milvus", service_type=ServiceType.RETRIEVER, @@ -104,12 +78,7 @@ async def retrieve(input: EmbedDoc) -> SearchedDoc: if __name__ == "__main__": # Create vectorstore - if MOSEC_EMBEDDING_ENDPOINT: - # create embeddings using Mosec endpoint service - if logflag: - logger.info(f"[ retriever_milvus ] MOSEC_EMBEDDING_ENDPOINT:{MOSEC_EMBEDDING_ENDPOINT}") - embeddings = MosecEmbeddings(model=MOSEC_EMBEDDING_MODEL) - elif TEI_EMBEDDING_ENDPOINT: + if TEI_EMBEDDING_ENDPOINT: # create embeddings using TEI endpoint service if logflag: logger.info(f"[ retriever_milvus ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") diff --git a/comps/retrievers/neo4j/llama_index/compose.yaml b/comps/retrievers/neo4j/llama_index/neo4j_llama_index.yaml similarity index 100% rename from comps/retrievers/neo4j/llama_index/compose.yaml rename to comps/retrievers/neo4j/llama_index/neo4j_llama_index.yaml diff --git a/comps/retrievers/pathway/langchain/docker_compose_retriever.yaml b/comps/retrievers/pathway/langchain/pathway_langchain.yaml similarity index 100% rename from comps/retrievers/pathway/langchain/docker_compose_retriever.yaml rename to comps/retrievers/pathway/langchain/pathway_langchain.yaml diff --git a/comps/retrievers/pgvector/langchain/README.md b/comps/retrievers/pgvector/langchain/README.md index 2b6cb09cd..ef11cd182 100644 --- a/comps/retrievers/pgvector/langchain/README.md +++ b/comps/retrievers/pgvector/langchain/README.md @@ -71,7 +71,7 @@ export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" ### 2.2 Build Docker Image ```bash -cd comps/retrievers/pgvector/langchain +cd ../../../../ docker build -t opea/retriever-pgvector:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/pgvector/langchain/Dockerfile . ``` diff --git a/comps/retrievers/pgvector/langchain/docker_compose_retriever.yaml b/comps/retrievers/pgvector/langchain/pgvector_langchain.yaml similarity index 100% rename from comps/retrievers/pgvector/langchain/docker_compose_retriever.yaml rename to comps/retrievers/pgvector/langchain/pgvector_langchain.yaml diff --git a/comps/retrievers/pinecone/langchain/docker_compose_retriever.yaml b/comps/retrievers/pinecone/langchain/pinecone_langchain.yaml similarity index 100% rename from comps/retrievers/pinecone/langchain/docker_compose_retriever.yaml rename to comps/retrievers/pinecone/langchain/pinecone_langchain.yaml diff --git a/comps/retrievers/redis/langchain/docker_compose_retriever.yaml b/comps/retrievers/redis/langchain/redis_langchain.yaml similarity index 100% rename from comps/retrievers/redis/langchain/docker_compose_retriever.yaml rename to comps/retrievers/redis/langchain/redis_langchain.yaml diff --git a/comps/retrievers/redis/langchain/retriever_redis.py b/comps/retrievers/redis/langchain/retriever_redis.py index ad5adbae6..5737acb38 100644 --- a/comps/retrievers/redis/langchain/retriever_redis.py +++ b/comps/retrievers/redis/langchain/retriever_redis.py @@ -30,7 +30,7 @@ from comps.cores.proto.api_protocol import ( RetrievalResponse, RetrievalResponseData, ) -from comps.embeddings.multimodal.bridgetower import BridgeTowerEmbedding +from comps.embeddings.src.integrations.dependency.bridgetower import BridgeTowerEmbedding logger = CustomLogger("retriever_redis") logflag = os.getenv("LOGFLAG", False) diff --git a/comps/retrievers/redis/llama_index/docker_compose_retriever.yaml b/comps/retrievers/redis/llama_index/redis_llama_index.yaml similarity index 100% rename from comps/retrievers/redis/llama_index/docker_compose_retriever.yaml rename to comps/retrievers/redis/llama_index/redis_llama_index.yaml diff --git a/comps/embeddings/multimodal_clip/Dockerfile b/comps/retrievers/src/Dockerfile similarity index 75% rename from comps/embeddings/multimodal_clip/Dockerfile rename to comps/retrievers/src/Dockerfile index 03703b500..3c2d12ab2 100644 --- a/comps/embeddings/multimodal_clip/Dockerfile +++ b/comps/retrievers/src/Dockerfile @@ -13,16 +13,16 @@ RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ -USER user - COPY comps /home/user/comps +USER user + RUN pip install --no-cache-dir --upgrade pip setuptools && \ if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/embeddings/multimodal_clip/requirements.txt + pip install --no-cache-dir -r /home/user/comps/retrievers/src/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/embeddings/multimodal_clip +WORKDIR /home/user/comps/retrievers/src -ENTRYPOINT ["python", "embedding_multimodal.py"] +ENTRYPOINT ["python", "opea_retrievers_microservice.py"] diff --git a/comps/retrievers/src/README.md b/comps/retrievers/src/README.md new file mode 100644 index 000000000..9d31b1afa --- /dev/null +++ b/comps/retrievers/src/README.md @@ -0,0 +1,7 @@ +# Retriever Microservice + +This retriever microservice is a highly efficient search service designed for handling and retrieving embedding vectors. It operates by receiving an embedding vector as input and conducting a similarity search against vectors stored in a VectorDB database. Users must specify the VectorDB's URL and the index name, and the service searches within that index to find documents with the highest similarity to the input vector. + +The service primarily utilizes similarity measures in vector space to rapidly retrieve contentually similar documents. The vector-based retrieval approach is particularly suited for handling large datasets, offering fast and accurate search results that significantly enhance the efficiency and quality of information retrieval. + +Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial. diff --git a/comps/reranks/fastrag/__init__.py b/comps/retrievers/src/integrations/__init__.py similarity index 100% rename from comps/reranks/fastrag/__init__.py rename to comps/retrievers/src/integrations/__init__.py diff --git a/comps/retrievers/src/integrations/config.py b/comps/retrievers/src/integrations/config.py new file mode 100644 index 000000000..f728b84ef --- /dev/null +++ b/comps/retrievers/src/integrations/config.py @@ -0,0 +1,95 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + + +####################################################### +# Common Functions # +####################################################### +def get_boolean_env_var(var_name, default_value=False): + """Retrieve the boolean value of an environment variable. + + Args: + var_name (str): The name of the environment variable to retrieve. + default_value (bool): The default value to return if the variable + is not found. + + Returns: + bool: The value of the environment variable, interpreted as a boolean. + """ + true_values = {"true", "1", "t", "y", "yes"} + false_values = {"false", "0", "f", "n", "no"} + + # Retrieve the environment variable's value + value = os.getenv(var_name, "").lower() + + # Decide the boolean value based on the content of the string + if value in true_values: + return True + elif value in false_values: + return False + else: + return default_value + + +# Whether or not to enable langchain debugging +DEBUG = get_boolean_env_var("DEBUG", False) +# Set DEBUG env var to "true" if you wish to enable LC debugging module +if DEBUG: + import langchain + + langchain.debug = True + +# Embedding model +EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") +LOCAL_EMBEDDING_MODEL = os.getenv("LOCAL_EMBEDDING_MODEL", "maidalun1020/bce-embedding-base_v1") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT") +BRIDGE_TOWER_EMBEDDING = os.getenv("BRIDGE_TOWER_EMBEDDING", False) + +# Directory pathss +current_file_path = os.path.abspath(__file__) +parent_dir = os.path.dirname(current_file_path) + + +####################################################### +# Redis # +####################################################### +INDEX_NAME = os.getenv("INDEX_NAME", "rag_redis") +REDIS_HOST = os.getenv("REDIS_HOST", "localhost") +REDIS_PORT = int(os.getenv("REDIS_PORT", 6379)) + + +def format_redis_conn_from_env(): + redis_url = os.getenv("REDIS_URL", None) + if redis_url: + return redis_url + else: + using_ssl = get_boolean_env_var("REDIS_SSL", False) + start = "rediss://" if using_ssl else "redis://" + + # if using RBAC + password = os.getenv("REDIS_PASSWORD", None) + username = os.getenv("REDIS_USERNAME", "default") + if password is not None: + start += f"{username}:{password}@" + + return start + f"{REDIS_HOST}:{REDIS_PORT}" + + +REDIS_URL = format_redis_conn_from_env() + + +####################################################### +# Milvus # +####################################################### +MILVUS_HOST = os.getenv("MILVUS_HOST", "localhost") +MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530)) +MILVUS_URI = f"http://{MILVUS_HOST}:{MILVUS_PORT}" +INDEX_PARAMS = {"index_type": "FLAT", "metric_type": "IP", "params": {}} +COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag_milvus") +# TEI configuration +TEI_EMBEDDING_MODEL = os.environ.get("TEI_EMBEDDING_MODEL", "/home/user/bce-embedding-base_v1") +TEI_EMBEDDING_ENDPOINT = os.environ.get("TEI_EMBEDDING_ENDPOINT", "") +os.environ["OPENAI_API_BASE"] = TEI_EMBEDDING_ENDPOINT +os.environ["OPENAI_API_KEY"] = "Dummy key" diff --git a/comps/retrievers/src/integrations/milvus.py b/comps/retrievers/src/integrations/milvus.py new file mode 100644 index 000000000..548022c09 --- /dev/null +++ b/comps/retrievers/src/integrations/milvus.py @@ -0,0 +1,109 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os +from typing import List, Optional + +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings, OpenAIEmbeddings +from langchain_milvus.vectorstores import Milvus + +from comps import CustomLogger, EmbedDoc, OpeaComponent, SearchedDoc, ServiceType, TextDoc + +from .config import COLLECTION_NAME, INDEX_PARAMS, LOCAL_EMBEDDING_MODEL, MILVUS_URI, TEI_EMBEDDING_ENDPOINT + +logger = CustomLogger("milvus_retrievers") +logflag = os.getenv("LOGFLAG", False) + + +class OpeaMilvusRetriever(OpeaComponent): + """A specialized retriever component derived from OpeaComponent for milvus retriever services. + + Attributes: + client (Milvus): An instance of the milvus client for vector database operations. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.RETRIEVER.name.lower(), description, config) + + self.embedder = self._initialize_embedder() + self.client = self._initialize_client() + + def _initialize_embedder(self): + if TEI_EMBEDDING_ENDPOINT: + # create embeddings using TEI endpoint service + if logflag: + logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") + embeddings = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + else: + # create embeddings using local embedding model + if logflag: + logger.info(f"[ init embedder ] LOCAL_EMBEDDING_MODEL:{LOCAL_EMBEDDING_MODEL}") + embeddings = HuggingFaceBgeEmbeddings(model_name=LOCAL_EMBEDDING_MODEL) + return embeddings + + def _initialize_client(self) -> Milvus: + """Initializes the redis client.""" + try: + client = Milvus( + embedding_function=self.embedder, + collection_name=COLLECTION_NAME, + connection_args={"uri": MILVUS_URI}, + index_params=INDEX_PARAMS, + auto_id=True, + ) + return client + except Exception as e: + logger.error(f"fail to initialize milvus client: {e}") + return None + + def check_health(self) -> bool: + """Checks the health of the retriever service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + if logflag: + logger.info("[ check health ] start to check health of milvus") + try: + _ = self.client.client.list_collections() + if logflag: + logger.info("[ check health ] Successfully connected to Milvus!") + return True + except Exception as e: + logger.info(f"[ check health ] Failed to connect to Milvus: {e}") + return False + + async def invoke(self, input: EmbedDoc) -> SearchedDoc: + """Search the Milvus index for the most similar documents to the input query. + + Args: + input (EmbedDoc): The input query to search for. + Output: + Union[SearchedDoc, RetrievalResponse, ChatCompletionRequest]: The retrieved documents. + """ + if logflag: + logger.info(input) + + if input.search_type == "similarity": + search_res = await self.client.asimilarity_search_by_vector(embedding=input.embedding, k=input.k) + elif input.search_type == "similarity_distance_threshold": + if input.distance_threshold is None: + raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") + search_res = await self.client.asimilarity_search_by_vector( + embedding=input.embedding, k=input.k, distance_threshold=input.distance_threshold + ) + elif input.search_type == "similarity_score_threshold": + docs_and_similarities = await self.client.asimilarity_search_with_relevance_scores( + query=input.text, k=input.k, score_threshold=input.score_threshold + ) + search_res = [doc for doc, _ in docs_and_similarities] + elif input.search_type == "mmr": + search_res = await self.client.amax_marginal_relevance_search( + query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult + ) + + if logflag: + logger.info(f"retrieve result: {search_res}") + + return search_res diff --git a/comps/retrievers/src/integrations/redis.py b/comps/retrievers/src/integrations/redis.py new file mode 100644 index 000000000..09d0a022f --- /dev/null +++ b/comps/retrievers/src/integrations/redis.py @@ -0,0 +1,128 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os +from typing import Union + +from langchain_community.vectorstores import Redis + +from comps import CustomLogger, EmbedDoc, EmbedMultimodalDoc, OpeaComponent, SearchedDoc, ServiceType +from comps.cores.proto.api_protocol import ChatCompletionRequest, EmbeddingResponse, RetrievalRequest, RetrievalResponse + +from .config import BRIDGE_TOWER_EMBEDDING, EMBED_MODEL, INDEX_NAME, REDIS_URL, TEI_EMBEDDING_ENDPOINT + +logger = CustomLogger("redis_retrievers") +logflag = os.getenv("LOGFLAG", False) + + +class OpeaRedisRetriever(OpeaComponent): + """A specialized retriever component derived from OpeaComponent for redis retriever services. + + Attributes: + client (redis.Redis): An instance of the redis client for vector database operations. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.RETRIEVER.name.lower(), description, config) + + # Create embeddings + if TEI_EMBEDDING_ENDPOINT: + # create embeddings using TEI endpoint service + from langchain_huggingface import HuggingFaceEndpointEmbeddings + + self.embeddings = HuggingFaceEndpointEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + elif BRIDGE_TOWER_EMBEDDING: + logger.info("use bridge tower embedding") + from comps.embeddings.src.integrations.dependency.bridgetower import BridgeTowerEmbedding + + self.embeddings = BridgeTowerEmbedding() + else: + # create embeddings using local embedding model + from langchain_community.embeddings import HuggingFaceBgeEmbeddings + + self.embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + self.client = self._initialize_client() + + def _initialize_client(self) -> Redis: + """Initializes the redis client.""" + try: + client = Redis(embedding=self.embeddings, index_name=INDEX_NAME, redis_url=REDIS_URL) + return client + except Exception as e: + logger.error(f"fail to initialize redis client: {e}") + return None + + def check_health(self) -> bool: + """Checks the health of the retriever service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + if logflag: + logger.info("[ health check ] start to check health of redis") + try: + if self.client.client.ping(): + if logflag: + logger.info("[ health check ] Successfully connected to Redis!") + return True + except Exception as e: + logger.info(f"[ health check ] Failed to connect to Redis: {e}") + return False + + async def invoke( + self, input: Union[EmbedDoc, EmbedMultimodalDoc, RetrievalRequest, ChatCompletionRequest] + ) -> Union[SearchedDoc, RetrievalResponse, ChatCompletionRequest]: + """Search the Redis index for the most similar documents to the input query. + + Args: + input (Union[EmbedDoc, RetrievalRequest, ChatCompletionRequest]): The input query to search for. + Output: + Union[SearchedDoc, RetrievalResponse, ChatCompletionRequest]: The retrieved documents. + """ + if logflag: + logger.info(input) + + # check if the Redis index has data + if self.client.client.keys() == []: + search_res = [] + else: + if isinstance(input, EmbedDoc) or isinstance(input, EmbedMultimodalDoc): + embedding_data_input = input.embedding + else: + # for RetrievalRequest, ChatCompletionRequest + if isinstance(input.embedding, EmbeddingResponse): + embeddings = input.embedding.data + embedding_data_input = [] + for emb in embeddings: + embedding_data_input.append(emb.embedding) + else: + embedding_data_input = input.embedding + + # if the Redis index has data, perform the search + if input.search_type == "similarity": + search_res = await self.client.asimilarity_search_by_vector(embedding=embedding_data_input, k=input.k) + elif input.search_type == "similarity_distance_threshold": + if input.distance_threshold is None: + raise ValueError( + "distance_threshold must be provided for " + "similarity_distance_threshold retriever" + ) + search_res = await self.client.asimilarity_search_by_vector( + embedding=input.embedding, k=input.k, distance_threshold=input.distance_threshold + ) + elif input.search_type == "similarity_score_threshold": + docs_and_similarities = await self.client.asimilarity_search_with_relevance_scores( + query=input.text, k=input.k, score_threshold=input.score_threshold + ) + search_res = [doc for doc, _ in docs_and_similarities] + elif input.search_type == "mmr": + search_res = await self.client.amax_marginal_relevance_search( + query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult + ) + else: + raise ValueError(f"{input.search_type} not valid") + + if logflag: + logger.info(search_res) + + return search_res diff --git a/comps/retrievers/src/opea_retrievers_microservice.py b/comps/retrievers/src/opea_retrievers_microservice.py new file mode 100644 index 000000000..9b0681cc2 --- /dev/null +++ b/comps/retrievers/src/opea_retrievers_microservice.py @@ -0,0 +1,117 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os +import time +from typing import Union + +from integrations.milvus import OpeaMilvusRetriever +from integrations.redis import OpeaRedisRetriever + +from comps import ( + CustomLogger, + EmbedDoc, + EmbedMultimodalDoc, + OpeaComponentController, + SearchedDoc, + SearchedMultimodalDoc, + ServiceType, + TextDoc, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) +from comps.cores.proto.api_protocol import ( + ChatCompletionRequest, + RetrievalRequest, + RetrievalResponse, + RetrievalResponseData, +) + +logger = CustomLogger("opea_retrievers_microservice") +logflag = os.getenv("LOGFLAG", False) +retriever_type = os.getenv("RETRIEVER_TYPE", False) +# Initialize Controller +controller = OpeaComponentController() + + +# Register components +try: + # Instantiate Retrievers components and register it to controller + if retriever_type == "redis": + redis_retriever = OpeaRedisRetriever( + name="OpeaRedisRetriever", + description="OPEA Redis Retriever Service", + ) + controller.register(redis_retriever) + elif retriever_type == "milvus": + milvus_retriever = OpeaMilvusRetriever( + name="OpeaMilvusRetriever", + description="OPEA Milvus Retriever Service", + ) + controller.register(milvus_retriever) + + # Discover and activate a healthy component + controller.discover_and_activate() +except Exception as e: + logger.error(f"Failed to initialize components: {e}") + + +@register_microservice( + name="opea_service@retrievers", + service_type=ServiceType.RETRIEVER, + endpoint="/v1/retrieval", + host="0.0.0.0", + port=7000, +) +@register_statistics(names=["opea_service@retrievers"]) +async def ingest_files( + input: Union[EmbedDoc, EmbedMultimodalDoc, RetrievalRequest, ChatCompletionRequest] +) -> Union[SearchedDoc, SearchedMultimodalDoc, RetrievalResponse, ChatCompletionRequest]: + start = time.time() + + if logflag: + logger.info(f"[ retrieval ] input:{input}") + + try: + # Use the controller to invoke the active component + response = await controller.invoke(input) + + # return different response format + retrieved_docs = [] + if isinstance(input, EmbedDoc) or isinstance(input, EmbedMultimodalDoc): + metadata_list = [] + for r in response: + metadata_list.append(r.metadata) + retrieved_docs.append(TextDoc(text=r.page_content)) + result = SearchedMultimodalDoc( + retrieved_docs=retrieved_docs, initial_query=input.text, metadata=metadata_list + ) + else: + for r in response: + retrieved_docs.append(RetrievalResponseData(text=r.page_content, metadata=r.metadata)) + if isinstance(input, RetrievalRequest): + result = RetrievalResponse(retrieved_docs=retrieved_docs) + elif isinstance(input, ChatCompletionRequest): + input.retrieved_docs = retrieved_docs + input.documents = [doc.text for doc in retrieved_docs] + result = input + + # Record statistics + statistics_dict["opea_service@retrievers"].append_latency(time.time() - start, None) + + if logflag: + logger.info(f"[ retrieval ] Output generated: {response}") + + return result + + except Exception as e: + logger.error(f"[ retrieval ] Error during retrieval invocation: {e}") + raise + + +if __name__ == "__main__": + logger.info("OPEA Retriever Microservice is starting...") + opea_microservices["opea_service@retrievers"].start() diff --git a/comps/retrievers/src/requirements.txt b/comps/retrievers/src/requirements.txt new file mode 100644 index 000000000..c15e7811a --- /dev/null +++ b/comps/retrievers/src/requirements.txt @@ -0,0 +1,15 @@ +docarray[full] +easyocr +fastapi +langchain_community --extra-index-url https://download.pytorch.org/whl/cpu +langchain_huggingface --extra-index-url https://download.pytorch.org/whl/cpu +langchain_milvus --extra-index-url https://download.pytorch.org/whl/cpu +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +prometheus-fastapi-instrumentator +pymupdf +redis +sentence_transformers +shortuuid +uvicorn diff --git a/comps/retrievers/vdms/langchain/docker_compose_retriever.yaml b/comps/retrievers/vdms/langchain/vdms_langchain.yaml similarity index 100% rename from comps/retrievers/vdms/langchain/docker_compose_retriever.yaml rename to comps/retrievers/vdms/langchain/vdms_langchain.yaml diff --git a/comps/vectorstores/elasticsearch/docker-compose.yml b/comps/vectorstores/elasticsearch/elasticsearch.yaml similarity index 100% rename from comps/vectorstores/elasticsearch/docker-compose.yml rename to comps/vectorstores/elasticsearch/elasticsearch.yaml diff --git a/comps/vectorstores/milvus/docker-compose.yml b/comps/vectorstores/milvus/docker-compose.yaml similarity index 100% rename from comps/vectorstores/milvus/docker-compose.yml rename to comps/vectorstores/milvus/docker-compose.yaml diff --git a/comps/vectorstores/pathway/README.md b/comps/vectorstores/pathway/README.md index 65ac6aff9..c5c82327b 100644 --- a/comps/vectorstores/pathway/README.md +++ b/comps/vectorstores/pathway/README.md @@ -57,7 +57,7 @@ For more information, see the relevant Pathway docs: Build the Docker and run the Pathway Vector Store: ```bash -docker build --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/vectorstore-pathway:latest -f comps/vectorstores/pathway/Dockerfile . +docker build --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/vectorstore-pathway:latest -f comps/vectorstores/src/pathway/Dockerfile . # with locally loaded model, you may add `EMBED_MODEL` env variable to configure the model. docker run -e PATHWAY_HOST=${PATHWAY_HOST} -e PATHWAY_PORT=${PATHWAY_PORT} -e http_proxy=$http_proxy -e https_proxy=$https_proxy -v ./data:/app/data -p ${PATHWAY_PORT}:${PATHWAY_PORT} opea/vectorstore-pathway:latest diff --git a/comps/vectorstores/pgvector/docker-compose.yml b/comps/vectorstores/pgvector/pgvector.yaml similarity index 100% rename from comps/vectorstores/pgvector/docker-compose.yml rename to comps/vectorstores/pgvector/pgvector.yaml diff --git a/comps/vectorstores/redis/docker-compose-redis.yml b/comps/vectorstores/redis/redis.yaml similarity index 100% rename from comps/vectorstores/redis/docker-compose-redis.yml rename to comps/vectorstores/redis/redis.yaml diff --git a/comps/vectorstores/vdms/docker-compose-vdms.yml b/comps/vectorstores/vdms/vdms.yaml similarity index 100% rename from comps/vectorstores/vdms/docker-compose-vdms.yml rename to comps/vectorstores/vdms/vdms.yaml diff --git a/tests/nginx/test_nginx.sh b/tests/3rd_parties/test_3rd_parties_nginx.sh similarity index 98% rename from tests/nginx/test_nginx.sh rename to tests/3rd_parties/test_3rd_parties_nginx.sh index f2ac45239..71b185daa 100644 --- a/tests/nginx/test_nginx.sh +++ b/tests/3rd_parties/test_3rd_parties_nginx.sh @@ -10,7 +10,7 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH - docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/nginx:comps -f comps/nginx/Dockerfile . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/nginx:comps -f comps/3rd_parties/nginx/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/nginx built fail" exit 1 diff --git a/tests/cores/common/test_component.py b/tests/cores/common/test_component.py index 4af06a0e5..7b1a2cdbf 100644 --- a/tests/cores/common/test_component.py +++ b/tests/cores/common/test_component.py @@ -1,8 +1,9 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import asyncio import unittest -from unittest.mock import MagicMock +from unittest.mock import AsyncMock, MagicMock from comps import OpeaComponent, OpeaComponentController @@ -12,7 +13,7 @@ class TestOpeaComponent(unittest.TestCase): def check_health(self) -> bool: return True - def invoke(self, *args, **kwargs): + async def invoke(self, *args, **kwargs): return "Service accessed" def test_initialization(self): @@ -79,7 +80,7 @@ class TestOpeaComponentController(unittest.TestCase): def test_invoke_no_active_component(self): controller = OpeaComponentController() with self.assertRaises(RuntimeError): - controller.invoke("arg1", key="value") + asyncio.run(controller.invoke("arg1", key="value")) def test_invoke_with_active_component(self): controller = OpeaComponentController() @@ -88,14 +89,14 @@ class TestOpeaComponentController(unittest.TestCase): component = MagicMock() component.name = "TestComponent" component.check_health.return_value = True - component.invoke = MagicMock(return_value="Service accessed") + component.invoke = AsyncMock(return_value="Service accessed") # Register and activate the component controller.register(component) controller.discover_and_activate() # Invoke using the active component - result = controller.invoke("arg1", key="value") + result = asyncio.run(controller.invoke("arg1", key="value")) # Assert the result and method call self.assertEqual(result, "Service accessed") @@ -109,7 +110,7 @@ class TestOpeaComponentController(unittest.TestCase): component1 = MagicMock() component1.name = "Component1" component1.check_health.return_value = True - component1.invoke = MagicMock(return_value="Result from Component1") + component1.invoke = AsyncMock(return_value="Result from Component1") # Register the component controller.register(component1) @@ -121,7 +122,7 @@ class TestOpeaComponentController(unittest.TestCase): self.assertEqual(controller.active_component, component1) # Call invoke separately - result = controller.invoke("test_input") + result = asyncio.run(controller.invoke("test_input")) self.assertEqual(result, "Result from Component1") component1.invoke.assert_called_once_with("test_input") diff --git a/tests/dataprep/test_dataprep_milvus_langchain.sh b/tests/dataprep/test_dataprep_milvus.sh similarity index 80% rename from tests/dataprep/test_dataprep_milvus_langchain.sh rename to tests/dataprep/test_dataprep_milvus.sh index 256b686f0..5732e6a50 100644 --- a/tests/dataprep/test_dataprep_milvus_langchain.sh +++ b/tests/dataprep/test_dataprep_milvus.sh @@ -11,16 +11,8 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - # langchain mosec embedding image - docker build --no-cache -t opea/langchain-mosec:comps --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -f comps/embeddings/mosec/langchain/dependency/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/langchain-mosec built fail" - exit 1 - else - echo "opea/langchain-mosec built successful" - fi # dataprep milvus image - docker build --no-cache -t opea/dataprep-milvus:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/milvus/langchain/Dockerfile . + docker build --no-cache -t opea/dataprep-milvus:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/dataprep-milvus built fail" exit 1 @@ -37,17 +29,17 @@ function start_service() { # sed '/- \${DOCKER_VOLUME_DIRECTORY:-\.}\/volumes\/milvus:\/var\/lib\/milvus/a \ \ \ \ \ \ - \${DOCKER_VOLUME_DIRECTORY:-\.}\/milvus.yaml:\/milvus\/configs\/milvus.yaml' -i docker-compose.yml docker compose up -d - # set service ports - mosec_embedding_port=5021 - dataprep_service_port=5022 - - # start mosec embedding service - docker run -d --name="test-comps-dataprep-milvus-mosec-server" -p $mosec_embedding_port:8000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/langchain-mosec:comps + # start embedding service + embed_port=5021 + embed_model="BAAI/bge-base-en-v1.5" + docker run -d -p $embed_port:80 -v ./data:/data --name test-comps-dataprep-milvus-tei-server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $embed_model + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${embed_port}" # start dataprep service - MOSEC_EMBEDDING_ENDPOINT="http://${ip_address}:${mosec_embedding_port}" MILVUS_HOST=${ip_address} - docker run -d --name="test-comps-dataprep-milvus-server" -p ${dataprep_service_port}:6010 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MOSEC_EMBEDDING_ENDPOINT=${MOSEC_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} -e LOGFLAG=true --ipc=host opea/dataprep-milvus:comps + dataprep_service_port=5022 + HF_TOKEN=${HF_TOKEN} + docker run -d --name="test-comps-dataprep-milvus-server" -p ${dataprep_service_port}:5000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} -e HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} -e LOGFLAG=true -e DATAPREP_TYPE="milvus" --ipc=host opea/dataprep-milvus:comps sleep 1m } @@ -62,7 +54,7 @@ function validate_service() { cd $LOG_PATH HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") elif [[ $SERVICE_NAME == *"dataprep_upload_link"* ]]; then - HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'link_list=["https://www.ces.tech/"]' -F "chunk_size=500" "$URL") + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'link_list=["https://www.ces.tech/"]' -F 'chunk_size=400' "$URL") elif [[ $SERVICE_NAME == *"dataprep_get"* ]]; then HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then @@ -78,9 +70,9 @@ function validate_service() { # check response status if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - ##################### + if [[ $SERVICE_NAME == *"dataprep_upload_link"* ]]; then - docker logs test-comps-dataprep-milvus-mosec-server >> ${LOG_PATH}/mosec-embedding.log + docker logs test-comps-dataprep-milvus-tei-server >> ${LOG_PATH}/tei-embedding.log fi exit 1 else @@ -101,9 +93,9 @@ function validate_microservice() { cd $LOG_PATH dataprep_service_port=5022 - # test /v1/dataprep/delete_file + # test /v1/dataprep/delete validate_service \ - "http://${ip_address}:${dataprep_service_port}/v1/dataprep/delete_file" \ + "http://${ip_address}:${dataprep_service_port}/v1/dataprep/delete" \ '{"status":true}' \ "dataprep_del" \ "test-comps-dataprep-milvus-server" @@ -111,21 +103,21 @@ function validate_microservice() { # test /v1/dataprep upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt validate_service \ - "http://${ip_address}:${dataprep_service_port}/v1/dataprep" \ + "http://${ip_address}:${dataprep_service_port}/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_file" \ "test-comps-dataprep-milvus-server" # test /v1/dataprep upload link validate_service \ - "http://${ip_address}:${dataprep_service_port}/v1/dataprep" \ + "http://${ip_address}:${dataprep_service_port}/v1/dataprep/ingest" \ "Data preparation succeeded" \ "dataprep_upload_link" \ "test-comps-dataprep-milvus-server" # test /v1/dataprep/get_file validate_service \ - "http://${ip_address}:${dataprep_service_port}/v1/dataprep/get_file" \ + "http://${ip_address}:${dataprep_service_port}/v1/dataprep/get" \ '{"name":' \ "dataprep_get" \ "test-comps-dataprep-milvus-server" diff --git a/tests/dataprep/test_dataprep_pinecone_langchain.sh b/tests/dataprep/test_dataprep_pinecone_langchain.sh index 2a4eda8ef..31661de5e 100644 --- a/tests/dataprep/test_dataprep_pinecone_langchain.sh +++ b/tests/dataprep/test_dataprep_pinecone_langchain.sh @@ -24,7 +24,7 @@ function start_service() { export PINECONE_INDEX_NAME="test-index" export HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN - docker run -d --name="test-comps-dataprep-pinecone" -p 5039:6007 -p 5040:6008 -p 5041:6009 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e PINECONE_API_KEY=$PINECONE_API_KEY -e PINECONE_INDEX_NAME=$PINECONE_INDEX_NAME opea/dataprep-pinecone:comps + docker run -d --name="test-comps-dataprep-pinecone" -p 5039:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e PINECONE_API_KEY=$PINECONE_API_KEY -e PINECONE_INDEX_NAME=$PINECONE_INDEX_NAME -e LOGFLAG=true opea/dataprep-pinecone:comps sleep 1m } @@ -41,7 +41,7 @@ function validate_microservice() { docker logs test-comps-dataprep-pinecone exit 1 fi - DELETE_URL="http://$ip_address:5041/v1/dataprep/delete_file" + DELETE_URL="http://$ip_address:5039/v1/dataprep/delete_file" result=$(curl --noproxy $ip_address --location --request POST \ -d '{"file_path": "all"}' -H 'Content-Type: application/json' $DELETE_URL) if [[ $result == *"true"* ]]; then diff --git a/tests/dataprep/test_dataprep_redis_langchain.sh b/tests/dataprep/test_dataprep_redis.sh similarity index 83% rename from tests/dataprep/test_dataprep_redis_langchain.sh rename to tests/dataprep/test_dataprep_redis.sh index 44cf35f39..13afc43c8 100644 --- a/tests/dataprep/test_dataprep_redis_langchain.sh +++ b/tests/dataprep/test_dataprep_redis.sh @@ -11,7 +11,7 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/dataprep-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile . + docker build -t opea/dataprep-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/dataprep-redis built fail" exit 1 @@ -23,17 +23,49 @@ function build_docker_images() { function start_service() { REDIS_PORT=6380 docker run -d --name="test-comps-dataprep-redis-langchain" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $REDIS_PORT:6379 -p 8002:8001 --ipc=host redis/redis-stack:7.2.0-v9 - dataprep_service_port=5013 + + embed_port=5439 + embed_model="BAAI/bge-base-en-v1.5" + docker run -d -p $embed_port:80 -v ./data:/data --name test-comps-dataprep-redis-langchain-tei-server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $embed_model + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${embed_port}" + + export dataprep_service_port=5013 REDIS_URL="redis://${ip_address}:${REDIS_PORT}" - docker run -d --name="test-comps-dataprep-redis-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e REDIS_HOST=$ip_address -e REDIS_PORT=$REDIS_PORT -p ${dataprep_service_port}:6007 --ipc=host opea/dataprep-redis:comps + export INDEX_NAME="rag_redis" + export HF_TOKEN=${HF_TOKEN} + docker run -d --name="test-comps-dataprep-redis-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e REDIS_HOST=$ip_address -e REDIS_PORT=$REDIS_PORT -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e INDEX_NAME=$INDEX_NAME -e HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} -e LOGFLAG=true -e DATAPREP_TYPE="redis" -p ${dataprep_service_port}:5000 --ipc=host opea/dataprep-redis:comps sleep 1m } function validate_microservice() { cd $LOG_PATH + export dataprep_service_port=5013 - # test /v1/dataprep upload file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep" + # test /v1/dataprep/delete + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete" + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + SERVICE_NAME="dataprep - del" + + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + # check response body + if [[ "$RESPONSE_BODY" != *'{"status":true}'* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi + + # test /v1/dataprep/ingest upload file + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/ingest" echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') @@ -55,8 +87,8 @@ function validate_microservice() { echo "[ $SERVICE_NAME ] Content is as expected." fi - # test /v1/dataprep upload link - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep" + # test /v1/dataprep/ingest upload link + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/ingest" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'link_list=["https://www.ces.tech/"]' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -78,8 +110,8 @@ function validate_microservice() { echo "[ $SERVICE_NAME ] Content is as expected." fi - # test /v1/dataprep/get_file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get_file" + # test /v1/dataprep/get + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') @@ -100,29 +132,6 @@ function validate_microservice() { echo "[ $SERVICE_NAME ] Content is as expected." fi - # test /v1/dataprep/delete_file - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete_file" - HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL") - HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') - RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') - SERVICE_NAME="dataprep - del" - - # check response status - if [ "$HTTP_STATUS" -ne "200" ]; then - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log - exit 1 - else - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - fi - # check response body - if [[ "$RESPONSE_BODY" != *'{"status":true}'* ]]; then - echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log - exit 1 - else - echo "[ $SERVICE_NAME ] Content is as expected." - fi } function stop_docker() { diff --git a/tests/dataprep/test_dataprep_redis_langchain_ray.sh b/tests/dataprep/test_dataprep_redis_langchain_ray.sh index 4a50fabaa..84851b8d6 100644 --- a/tests/dataprep/test_dataprep_redis_langchain_ray.sh +++ b/tests/dataprep/test_dataprep_redis_langchain_ray.sh @@ -29,7 +29,7 @@ function start_service() { export REDIS_URL="redis://${ip_address}:5038" export INDEX_NAME="rag-redis" echo "Starting dataprep-redis-server" - docker run -d --name="test-comps-dataprep-redis-ray-server" --runtime=runc -p 5037:6007 -p 6010:6008 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:comps + docker run -d --name="test-comps-dataprep-redis-ray-server" --runtime=runc -p 5037:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e TIMEOUT_SECONDS=600 -e LOGFLAG=true opea/dataprep-on-ray-redis:comps sleep 10 echo "Service started successfully" @@ -40,6 +40,7 @@ function validate_microservice() { dataprep_service_port=5037 export URL="http://${ip_address}:$dataprep_service_port/v1/dataprep" + export GET_URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get_file" echo "Starting validating the microservice" export PATH="${HOME}/miniforge3/bin:$PATH" @@ -52,6 +53,7 @@ import json import os proxies = {'http':""} url = os.environ['URL'] +get_url = os.environ['GET_URL'] print("test single file ingestion") file_list = ["dataprep_file.txt"] @@ -70,8 +72,7 @@ resp.raise_for_status() # Raise an exception for unsuccessful HTTP status codes print("Request successful!") print("test get file structure") -url = 'http://localhost:6010/v1/dataprep/get_file' -resp = requests.request('POST', url=url, headers={}, proxies=proxies) +resp = requests.request('POST', url=get_url, headers={}, proxies=proxies) print(resp.text) assert "name" in resp.text, "Response does not meet expectation." print("Request successful!") diff --git a/tests/dataprep/test_dataprep_redis_llama_index.sh b/tests/dataprep/test_dataprep_redis_llama_index.sh index d76a99a85..4015887e6 100644 --- a/tests/dataprep/test_dataprep_redis_llama_index.sh +++ b/tests/dataprep/test_dataprep_redis_llama_index.sh @@ -23,9 +23,8 @@ function build_docker_images() { function start_service() { docker run -d --name="test-comps-dataprep-redis-llama-index" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 6381:6379 -p 8003:8001 --ipc=host redis/redis-stack:7.2.0-v9 dataprep_service_port=5012 - dataprep_file_service_port=5017 REDIS_URL="redis://${ip_address}:6381" - docker run -d --name="test-comps-dataprep-redis-llama-index-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -p ${dataprep_service_port}:6007 -p ${dataprep_file_service_port}:6008 --ipc=host opea/dataprep-redis-llama-index:comps + docker run -d --name="test-comps-dataprep-redis-llama-index-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -p ${dataprep_service_port}:6007 -e LOGFLAG=true --ipc=host opea/dataprep-redis-llama-index:comps sleep 2m } @@ -56,8 +55,7 @@ function validate_microservice() { rm -rf $LOG_PATH/dataprep_file.txt # test /v1/dataprep/get_file - dataprep_file_service_port=5017 - URL="http://${ip_address}:$dataprep_file_service_port/v1/dataprep/get_file" + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get_file" HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") if [ "$HTTP_STATUS" -eq 200 ]; then echo "[ dataprep - file ] HTTP status is 200. Checking content..." diff --git a/tests/embeddings/test_embeddings_mosec_langchain.sh b/tests/embeddings/test_embeddings_mosec_langchain.sh deleted file mode 100644 index 0c7b1bc3c..000000000 --- a/tests/embeddings/test_embeddings_mosec_langchain.sh +++ /dev/null @@ -1,102 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -ip_address=$(hostname -I | awk '{print $1}') - -function build_mosec_docker_images() { - cd $WORKPATH - echo $(pwd) - docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t opea/embedding-langchain-mosec-endpoint:comps -f comps/embeddings/mosec/langchain/dependency/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/embedding-langchain-mosec-endpoint built fail" - exit 1 - else - echo "opea/embedding-langchain-mosec-endpoint built successful" - fi -} - -function build_docker_images() { - cd $WORKPATH - echo $(pwd) - docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t opea/embedding-langchain-mosec:comps -f comps/embeddings/mosec/langchain/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/embedding-langchain-mosec built fail" - exit 1 - else - echo "opea/embedding-langchain-mosec built successful" - fi -} - -function start_service() { - mosec_endpoint=5001 - model="BAAI/bge-base-en-v1.5" - unset http_proxy - docker run -d --name="test-comps-embedding-langchain-mosec-endpoint" -p $mosec_endpoint:8000 opea/embedding-langchain-mosec-endpoint:comps - export MOSEC_EMBEDDING_ENDPOINT="http://${ip_address}:${mosec_endpoint}" - mosec_service_port=5002 - docker run -d --name="test-comps-embedding-langchain-mosec-server" -e LOGFLAG=True -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${mosec_service_port}:6000 --ipc=host -e MOSEC_EMBEDDING_ENDPOINT=$MOSEC_EMBEDDING_ENDPOINT opea/embedding-langchain-mosec:comps - sleep 3m -} - -function validate_service() { - local INPUT_DATA="$1" - mosec_service_port=5002 - http_proxy="" curl http://${ip_address}:$mosec_service_port/v1/embeddings \ - -X POST \ - -d "$INPUT_DATA" \ - -H 'Content-Type: application/json' - if [ $? -eq 0 ]; then - echo "curl command executed successfully" - else - echo "curl command failed" - docker logs test-comps-embedding-langchain-mosec-endpoint - docker logs test-comps-embedding-langchain-mosec-server - exit 1 - fi -} - -function validate_microservice() { - ## query with single text - validate_service \ - '{"text":"What is Deep Learning?"}' - - ## query with multiple texts - validate_service \ - '{"text":["What is Deep Learning?","How are you?"]}' - - ## Test OpenAI API, input single text - validate_service \ - '{"input":"What is Deep Learning?"}' - - ## Test OpenAI API, input multiple texts with parameters - validate_service \ - '{"input":["What is Deep Learning?","How are you?"], "dimensions":100}' -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-embedding-langchain-mosec-*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - - build_mosec_docker_images - - build_docker_images - - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/embeddings/test_embeddings_multimodal.sh b/tests/embeddings/test_embeddings_multimodal.sh index bd2ca93b7..34297c467 100644 --- a/tests/embeddings/test_embeddings_multimodal.sh +++ b/tests/embeddings/test_embeddings_multimodal.sh @@ -8,15 +8,27 @@ WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') export your_mmei_port=8089 export EMBEDDER_PORT=$your_mmei_port -export MMEI_EMBEDDING_ENDPOINT="http://$ip_address:$your_mmei_port/v1/encode" +export MMEI_EMBEDDING_ENDPOINT="http://$ip_address:$your_mmei_port" export your_embedding_port_microservice=6609 export MM_EMBEDDING_PORT_MICROSERVICE=$your_embedding_port_microservice unset http_proxy -function build_mmei_docker_images() { +function build_mm_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile . + docker build --no-cache -t opea/embedding:latest -f comps/embeddings/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/embedding built fail" + exit 1 + else + echo "opea/embedding built successfully" + fi +} + +function build_embedding_service_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/embedding-multimodal-bridgetower:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/integrations/dependency/bridgetower/Dockerfile . if [ $? -ne 0 ]; then echo "opea/embedding-multimodal-bridgetower built fail" @@ -26,33 +38,18 @@ function build_mmei_docker_images() { fi } -function build_embedding_service_images() { - cd $WORKPATH - echo $(pwd) - docker build --no-cache -t opea/embedding-multimodal:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/multimodal_langchain/Dockerfile . - - if [ $? -ne 0 ]; then - echo "opea/embedding-multimodal built fail" - exit 1 - else - echo "opea/embedding-multimodal built successful" - fi -} - function build_docker_images() { - build_mmei_docker_images + build_mm_docker_images build_embedding_service_images } function start_service() { cd $WORKPATH - cd comps/embeddings/multimodal/bridgetower/ - docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d - cd $WORKPATH - cd comps/embeddings/multimodal/multimodal_langchain/ - docker compose -f docker_compose_multimodal_embedding.yaml up -d - sleep 2m + cd comps/embeddings/deployment/docker_compose/ + docker compose -f compose_multimodal_bridgetower.yaml up -d + sleep 30 } + function validate_microservice_text_embedding() { result=$(http_proxy="" curl http://${ip_address}:$MM_EMBEDDING_PORT_MICROSERVICE/v1/embeddings \ -X POST \ @@ -64,7 +61,7 @@ function validate_microservice_text_embedding() { else echo "Result wrong. Received was $result" docker logs embedding-multimodal-bridgetower - docker logs embedding-multimodal + docker logs embedding-multimodal-bridgetower-server exit 1 fi } @@ -80,7 +77,7 @@ function validate_microservice_image_text_pair_embedding() { else echo "Result wrong. Received was $result" docker logs embedding-multimodal-bridgetower - docker logs embedding-multimodal + docker logs embedding-multimodal-bridgetower-server exit 1 fi } @@ -91,7 +88,7 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=embedding-multimodal-bridgetower" --filter "name=embedding-multimodal") + cid=$(docker ps -aq --filter "name=embedding-multimodal-bridgetower") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/embeddings/test_embeddings_multimodal_clip.sh b/tests/embeddings/test_embeddings_multimodal_clip.sh deleted file mode 100644 index 770f2dc3d..000000000 --- a/tests/embeddings/test_embeddings_multimodal_clip.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - echo $(pwd) - docker build --no-cache -t opea/embedding-multimodal:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal_clip/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/embedding-multimodal built fail" - exit 1 - else - echo "opea/embedding-multimodal built successful" - fi -} - -function start_service() { - docker run -d --name="test-embedding-multimodal-server" -e LOGFLAG=True -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5038:6000 --ipc=host opea/embedding-multimodal:comps - sleep 3m -} - -function validate_service() { - local INPUT_DATA="$1" - service_port=5038 - result=$(http_proxy="" curl http://${ip_address}:$service_port/v1/embeddings \ - -X POST \ - -d "$INPUT_DATA" \ - -H 'Content-Type: application/json') - if [[ $result == *"embedding"* ]]; then - echo "Result correct." - else - echo "Result wrong. Received was $result" - docker logs test-embedding-multimodal-server - exit 1 - fi -} - -function validate_microservice() { - ## query with single text - validate_service \ - '{"text":"What is Deep Learning?"}' - - ## query with multiple texts - validate_service \ - '{"text":["What is Deep Learning?","How are you?"]}' - - ## Test OpenAI API, input single text - validate_service \ - '{"input":"What is Deep Learning?"}' - - ## Test OpenAI API, input multiple texts with parameters - validate_service \ - '{"input":["What is Deep Learning?","How are you?"], "dimensions":100}' -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-embedding-multimodal-server-*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/embeddings/test_embeddings_multimodal_on_intel_hpu.sh b/tests/embeddings/test_embeddings_multimodal_on_intel_hpu.sh index fe3cb8e2e..2438916f1 100644 --- a/tests/embeddings/test_embeddings_multimodal_on_intel_hpu.sh +++ b/tests/embeddings/test_embeddings_multimodal_on_intel_hpu.sh @@ -8,15 +8,27 @@ WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') export your_mmei_port=8087 export EMBEDDER_PORT=$your_mmei_port -export MMEI_EMBEDDING_ENDPOINT="http://$ip_address:$your_mmei_port/v1/encode" +export MMEI_EMBEDDING_ENDPOINT="http://$ip_address:$your_mmei_port" export your_embedding_port_microservice=6608 export MM_EMBEDDING_PORT_MICROSERVICE=$your_embedding_port_microservice unset http_proxy -function build_mmei_docker_images() { +function build_mm_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/embedding-multimodal-bridgetower:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu . + docker build --no-cache -t opea/embedding:latest -f comps/embeddings/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/embedding built fail" + exit 1 + else + echo "opea/embedding built successfully" + fi +} + +function build_embedding_service_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/embedding-multimodal-bridgetower-hpu:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/integrations/dependency/bridgetower/Dockerfile.intel_hpu . if [ $? -ne 0 ]; then echo "opea/embedding-multimodal-bridgetower built fail" @@ -26,32 +38,16 @@ function build_mmei_docker_images() { fi } -function build_embedding_service_images() { - cd $WORKPATH - echo $(pwd) - docker build --no-cache -t opea/embedding-multimodal:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal/multimodal_langchain/Dockerfile . - - if [ $? -ne 0 ]; then - echo "opea/embedding-multimodal built fail" - exit 1 - else - echo "opea/embedding-multimodal built successful" - fi -} - function build_docker_images() { - build_mmei_docker_images + build_mm_docker_images build_embedding_service_images } function start_service() { cd $WORKPATH - cd comps/embeddings/multimodal/bridgetower/ - docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d - cd $WORKPATH - cd comps/embeddings/multimodal/multimodal_langchain/ - docker compose -f docker_compose_multimodal_embedding.yaml up -d - sleep 2m + cd comps/embeddings/deployment/docker_compose/ + docker compose -f compose_multimodal_bridgetower_intel_hpu.yaml up -d + sleep 30 } function validate_microservice_text_embedding() { @@ -65,7 +61,7 @@ function validate_microservice_text_embedding() { else echo "Result wrong. Received was $result" docker logs embedding-multimodal-bridgetower - docker logs embedding-multimodal + docker logs embedding-multimodal-bridgetower-server exit 1 fi } @@ -81,7 +77,7 @@ function validate_microservice_image_text_pair_embedding() { else echo "Result wrong. Received was $result" docker logs embedding-multimodal-bridgetower - docker logs embedding-multimodal + docker logs embedding-multimodal-bridgetower-server exit 1 fi } @@ -92,7 +88,7 @@ function validate_microservice() { } function stop_docker() { - cid=$(docker ps -aq --filter "name=embedding-multimodal-bridgetower" --filter "name=embedding-multimodal") + cid=$(docker ps -aq --filter "name=embedding-multimodal-bridgetower") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/embeddings/test_embeddings_predictionguard.sh b/tests/embeddings/test_embeddings_predictionguard.sh index a6727b6bf..ea834decc 100644 --- a/tests/embeddings/test_embeddings_predictionguard.sh +++ b/tests/embeddings/test_embeddings_predictionguard.sh @@ -13,29 +13,29 @@ fi function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/embedding-pg:comps -f comps/embeddings/predictionguard/Dockerfile . + docker build --no-cache -t opea/embedding:comps -f comps/embeddings/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/embedding-pg built fail" + echo "opea/embedding built fail" exit 1 else - echo "opea/embedding-pg built successfully" + echo "opea/embedding built successfully" fi } function start_service() { - tei_service_port=6000 + pg_service_port=5124 unset http_proxy docker run -d --name=test-comps-embedding-pg-server \ - -e LOGFLAG=True -e http_proxy= -e https_proxy= \ + -e LOGFLAG=True -e http_proxy=$http_proxy -e https_proxy=$https_proxy \ -e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \ - -p 6000:6000 --ipc=host opea/embedding-pg:comps - sleep 60 # Sleep for 1 minute to allow the service to start + -p ${pg_service_port}:6000 --ipc=host opea/embedding:comps + sleep 60 } function validate_service() { local INPUT_DATA="$1" - tei_service_port=6000 - result=$(http_proxy="" curl http://${ip_address}:${tei_service_port}/v1/embeddings \ + pg_service_port=5124 + result=$(http_proxy="" curl http://${ip_address}:${pg_service_port}/v1/embeddings \ -X POST \ -d "$INPUT_DATA" \ -H 'Content-Type: application/json') @@ -55,14 +55,6 @@ function validate_service() { } function validate_microservice() { - ## query with single text - validate_service \ - '{"text":"What is Deep Learning?"}' - - ## query with multiple texts - validate_service \ - '{"text":["What is Deep Learning?","How are you?"]}' - ## Test OpenAI API, input single text validate_service \ '{"input":"What is Deep Learning?"}' diff --git a/tests/embeddings/test_embeddings_tei_langchain.sh b/tests/embeddings/test_embeddings_tei.sh similarity index 82% rename from tests/embeddings/test_embeddings_tei_langchain.sh rename to tests/embeddings/test_embeddings_tei.sh index df2642cf1..f419a2a6f 100644 --- a/tests/embeddings/test_embeddings_tei_langchain.sh +++ b/tests/embeddings/test_embeddings_tei.sh @@ -10,12 +10,12 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/embedding-tei:comps -f comps/embeddings/tei/langchain/Dockerfile . + docker build --no-cache -t opea/embedding:comps -f comps/embeddings/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/embedding-tei built fail" + echo "opea/embedding built fail" exit 1 else - echo "opea/embedding-tei built successful" + echo "opea/embedding built successful" fi } @@ -24,10 +24,11 @@ function start_service() { model="BAAI/bge-base-en-v1.5" unset http_proxy docker run -d --name="test-comps-embedding-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model + sleep 3m export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" tei_service_port=5002 - docker run -d --name="test-comps-embedding-tei-server" -e LOGFLAG=True -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${tei_service_port}:6000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/embedding-tei:comps - sleep 3m + docker run -d --name="test-comps-embedding-tei-server" -e LOGFLAG=True -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${tei_service_port}:6000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/embedding:comps + sleep 15 } function validate_service() { @@ -48,14 +49,6 @@ function validate_service() { } function validate_microservice() { - ## query with single text - validate_service \ - '{"text":"What is Deep Learning?"}' - - ## query with multiple texts - validate_service \ - '{"text":["What is Deep Learning?","How are you?"]}' - ## Test OpenAI API, input single text validate_service \ '{"input":"What is Deep Learning?"}' @@ -88,7 +81,6 @@ function main() { start_service validate_microservice - pip install -no-cache-dir openai validate_microservice_with_openai stop_docker diff --git a/tests/embeddings/test_embeddings_tei_llama_index.sh b/tests/embeddings/test_embeddings_tei_llama_index.sh deleted file mode 100644 index e1d04ab5e..000000000 --- a/tests/embeddings/test_embeddings_tei_llama_index.sh +++ /dev/null @@ -1,95 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - echo $(pwd) - docker build --no-cache -t opea/embedding-tei-llama-index:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/llama_index/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/embedding-tei-llama-index built fail" - exit 1 - else - echo "opea/embedding-tei-llama-index built successful" - fi -} - -function start_service() { - tei_endpoint=5001 - model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-embedding-tei-llama-index-endpoint" -p $tei_endpoint:80 -v ./data:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" - tei_service_port=5034 - docker run -d --name="test-comps-embedding-tei-llama-index-server" -e LOGFLAG=True -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${tei_service_port}:6000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/embedding-tei-llama-index:comps - sleep 3m -} - -function validate_service() { - local INPUT_DATA="$1" - - tei_service_port=5034 - URL="http://${ip_address}:$tei_service_port/v1/embeddings" - docker logs test-comps-embedding-tei-llama-index-server >> ${LOG_PATH}/embedding.log - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ embedding - llama_index ] HTTP status is 200. Checking content..." - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/embedding.log) - - if echo '"text":"What is Deep Learning?","embedding":\[' | grep -q "$EXPECTED_RESULT"; then - echo "[ embedding - llama_index ] Content is as expected." - else - echo "[ embedding - llama_index ] Content does not match the expected result: $CONTENT" - docker logs test-comps-embedding-tei-llama-index-server >> ${LOG_PATH}/embedding.log - exit 1 - fi - else - echo "[ embedding - llama_index ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-embedding-tei-llama-index-server >> ${LOG_PATH}/embedding.log - exit 1 - fi -} - -function validate_microservice() { - ## query with single text - validate_service \ - '{"text":"What is Deep Learning?"}' - - ## query with multiple texts - validate_service \ - '{"text":["What is Deep Learning?","How are you?"]}' - - ## Test OpenAI API, input single text - validate_service \ - '{"input":"What is Deep Learning?"}' - - ## Test OpenAI API, input multiple texts with parameters - validate_service \ - '{"input":["What is Deep Learning?","How are you?"], "dimensions":100}' -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-embedding-*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/llms/test_llms_text-generation_tgi.sh b/tests/llms/test_llms_text-generation_opea_tgi.sh similarity index 81% rename from tests/llms/test_llms_text-generation_tgi.sh rename to tests/llms/test_llms_text-generation_opea_tgi.sh index ef36456a3..d167f1b5d 100644 --- a/tests/llms/test_llms_text-generation_tgi.sh +++ b/tests/llms/test_llms_text-generation_opea_tgi.sh @@ -10,12 +10,12 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH - docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/llm-tgi:comps -f comps/llms/text-generation/tgi/Dockerfile . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/llm:comps -f comps/llms/src/text-generation/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/llm-tgi built fail" + echo "opea/llm built fail" exit 1 else - echo "opea/llm-tgi built successful" + echo "opea/llm built successful" fi } @@ -25,23 +25,24 @@ function start_service() { # Remember to set HF_TOKEN before invoking this test! export HF_TOKEN=${HF_TOKEN} docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ~/.cache/huggingface/hub:/data --shm-size 1g -e HF_TOKEN=${HF_TOKEN} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${hf_llm_model} --max-input-tokens 1024 --max-total-tokens 2048 - export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint_port}" - - llm_port=5005 - unset http_proxy - docker run -d --name="test-comps-llm-tgi-server" -p ${llm_port}:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN opea/llm-tgi:comps + export LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint_port}" # check whether tgi is fully ready n=0 until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do - docker logs test-comps-llm-tgi-endpoint >> ${LOG_PATH}/${hf_llm_model}-llm-tgi.log + docker logs test-comps-llm-tgi-endpoint >> ${LOG_PATH}/test-comps-vllm-service.log n=$((n+1)) - if grep -q Connected ${LOG_PATH}/${hf_llm_model}-llm-tgi.log; then + if grep -q Connected ${LOG_PATH}/test-comps-vllm-service.log; then break fi sleep 5s done sleep 5s + + llm_port=5005 + unset http_proxy + docker run -d --name="test-comps-llm-tgi-server" -p ${llm_port}:9000 --ipc=host -e LOGFLAG=True -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LLM_ENDPOINT=$LLM_ENDPOINT -e LLM_MODEL_ID=$hf_llm_model -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN opea/llm:comps + sleep 20s } function validate_microservice() { @@ -56,7 +57,7 @@ function validate_microservice() { else echo "Result wrong. Received was $result" docker logs test-comps-llm-tgi-endpoint >> ${LOG_PATH}/llm-tgi.log - docker logs test-comps-llm-tgi-server >> ${LOG_PATH}/llm-tgi-server.log + docker logs test-comps-llm-tgi-server >> ${LOG_PATH}/llm-server.log exit 1 fi } @@ -66,7 +67,7 @@ function validate_microservice_with_openai() { python3 ${WORKPATH}/tests/utils/validate_svc_with_openai.py "$ip_address" "$llm_service_port" "llm" if [ $? -ne 0 ]; then docker logs test-comps-llm-tgi-endpoint >> ${LOG_PATH}/llm-tgi.log - docker logs test-comps-llm-tgi-server >> ${LOG_PATH}/llm-tgi-server.log + docker logs test-comps-llm-tgi-server >> ${LOG_PATH}/llm-server.log exit 1 fi } diff --git a/tests/llms/test_llms_text-generation_vllm_langchain_on_intel_hpu.sh b/tests/llms/test_llms_text-generation_opea_vllm_on_intel_hpu.sh similarity index 86% rename from tests/llms/test_llms_text-generation_vllm_langchain_on_intel_hpu.sh rename to tests/llms/test_llms_text-generation_opea_vllm_on_intel_hpu.sh index c83799128..eb5911bb6 100644 --- a/tests/llms/test_llms_text-generation_vllm_langchain_on_intel_hpu.sh +++ b/tests/llms/test_llms_text-generation_opea_vllm_on_intel_hpu.sh @@ -24,18 +24,18 @@ function build_docker_images() { ## Build OPEA microservice docker cd $WORKPATH docker build \ - --no-cache -t opea/llm-vllm:comps \ - -f comps/llms/text-generation/vllm/langchain/Dockerfile . + --no-cache -t opea/llm:comps \ + -f comps/llms/src/text-generation/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/llm-vllm built fail" + echo "opea/llm built fail" exit 1 else - echo "opea/llm-vllm built successful" + echo "opea/llm built successful" fi } function start_service() { - export LLM_MODEL="Intel/neural-chat-7b-v3-3" + export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" port_number=5025 docker run -d --rm \ --runtime=habana \ @@ -49,17 +49,9 @@ function start_service() { --ipc=host \ -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ opea/vllm-gaudi:comps \ - --model $LLM_MODEL --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048 + --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048 - export vLLM_ENDPOINT="http://${ip_address}:${port_number}" - docker run -d --rm \ - --name="test-comps-vllm-microservice" \ - -p 5030:9000 \ - --ipc=host \ - -e vLLM_ENDPOINT=$vLLM_ENDPOINT \ - -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ - -e LLM_MODEL=$LLM_MODEL \ - opea/llm-vllm:comps + export LLM_ENDPOINT="http://${ip_address}:${port_number}" # check whether vllm ray is fully ready n=0 @@ -72,6 +64,17 @@ function start_service() { sleep 5s done sleep 5s + + docker run -d --rm \ + --name="test-comps-vllm-microservice" \ + -p 5030:9000 \ + --ipc=host \ + -e LLM_ENDPOINT=$LLM_ENDPOINT \ + -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ + -e LLM_MODEL_ID=$LLM_MODEL_ID \ + -e LOGFLAG=True \ + opea/llm:comps + sleep 20s } function validate_microservice() { diff --git a/tests/llms/test_llms_text-generation_vllm_langchain_openvino.sh b/tests/llms/test_llms_text-generation_vllm_langchain_openvino.sh deleted file mode 100644 index 40d1af965..000000000 --- a/tests/llms/test_llms_text-generation_vllm_langchain_openvino.sh +++ /dev/null @@ -1,127 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH="$( cd "$( dirname "$0" )" && pwd )" - -# Define variables -port=5033 -HF_CACHE_DIR=$HOME/.cache/huggingface -DOCKER_IMAGE="vllm-openvino:comps" -CONTAINER_NAME="test-comps-vllm-openvino-container" - -function build_container() { - cd $WORKPATH - git clone https://github.com/vllm-project/vllm.git vllm-openvino - cd ./vllm-openvino/ && git checkout v0.6.1 # something wrong with main branch image build - - docker build --no-cache -t $DOCKER_IMAGE \ - -f Dockerfile.openvino \ - . \ - --build-arg https_proxy=$https_proxy \ - --build-arg http_proxy=$http_proxy - if [ $? -ne 0 ]; then - echo "vllm-openvino built fail" - exit 1 - else - echo "vllm-openvino built successful" - fi - cd $WORKPATH - rm -rf vllm-openvino -} - -# Function to start Docker container -start_container() { - - docker run -d --rm --name=$CONTAINER_NAME \ - -p $port:$port \ - --ipc=host \ - -e HTTPS_PROXY=$https_proxy \ - -e HTTP_PROXY=$https_proxy \ - -v $HF_CACHE_DIR:/root/.cache/huggingface \ - vllm-openvino:comps /bin/bash -c "\ - cd / && \ - export VLLM_CPU_KVCACHE_SPACE=50 && \ - python3 -m vllm.entrypoints.openai.api_server \ - --model \"Intel/neural-chat-7b-v3-3\" \ - --host 0.0.0.0 \ - --port $port" - - # check whether service is fully ready - n=0 - until [[ "$n" -ge 300 ]]; do - docker logs $CONTAINER_NAME > /tmp/$CONTAINER_NAME.log 2>&1 - n=$((n+1)) - if grep -q "Uvicorn running on" /tmp/$CONTAINER_NAME.log; then - break - fi - sleep 3s - done - -} - -# Cleanup Function -cleanup() { - # Stop and remove Docker container and images - cid=$(docker ps -aq --filter "name=$CONTAINER_NAME") - if [[ ! -z "$cid" ]]; then docker stop $cid || docker rm $cid && sleep 1s; fi - docker rmi -f $DOCKER_IMAGE - rm /tmp/$CONTAINER_NAME.log -} - -# Function to test API endpoint -function test_api_endpoint { - local endpoint="$1" - local expected_status="$2" - - # Make the HTTP request - if test "$1" = "v1/completions" - then - local response=$(curl "http://localhost:$port/$endpoint" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "Intel/neural-chat-7b-v3-3", - "prompt": "What is the key advantage of Openvino framework", - "max_tokens": 300, - "temperature": 0.7 - }' \ - --write-out '%{http_code}' \ - --silent \ - --output /dev/null) - else - local response=$(curl "http://localhost:$port/$endpoint" \ - --write-out '%{http_code}' \ - --silent \ - --output /dev/null) - fi - - # Assert the response status code - if [[ "$response" -eq "$expected_status" ]]; then - echo "PASS: $endpoint returned expected status code: $expected_status" - else - echo "FAIL: $endpoint returned unexpected status code: $response (expected: $expected_status)" - docker logs $CONTAINER_NAME - exit 1 - fi -} -# Main function -main() { - - build_container - start_container - - # Sleep to allow the container to start up fully - sleep 10 - # Test the /v1/models API - test_api_endpoint "v1/models" 200 - - # Test the /v1/completions API - test_api_endpoint "v1/completions" 200 - - cleanup -} - -# Call main function -main diff --git a/tests/llms/test_llms_text-generation_vllm_langchain_openvino_on_intel_arc.sh b/tests/llms/test_llms_text-generation_vllm_langchain_openvino_on_intel_arc.sh deleted file mode 100644 index 2b483dff6..000000000 --- a/tests/llms/test_llms_text-generation_vllm_langchain_openvino_on_intel_arc.sh +++ /dev/null @@ -1,128 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH="$( cd "$( dirname "$0" )" && pwd )" -DOCKER_FILE="$WORKPATH"/../../comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_gpu - -# Define variables -port=5033 -RENDER_GROUP_ID=110 -DOCKER_IMAGE="vllm-openvino:comps" -CONTAINER_NAME="test-comps-vllm-openvino-container" -HF_CACHE_DIR=$HOME/.cache/huggingface - -function build_container() { - docker build --no-cache -t $DOCKER_IMAGE \ - -f $DOCKER_FILE \ - . \ - --build-arg https_proxy=$https_proxy \ - --build-arg http_proxy=$http_proxy - - if [ $? -ne 0 ]; then - echo "vllm-openvino built fail" - exit 1 - else - echo "vllm-openvino built successful" - fi -} - -# Function to start Docker container -start_container() { - - docker run -d --rm --name=$CONTAINER_NAME \ - -p $port:$port \ - --ipc=host \ - -e HTTPS_PROXY=$https_proxy \ - -e HTTP_PROXY=$https_proxy \ - -v $HF_CACHE_DIR:/root/.cache/huggingface \ - --device=/dev/dri:/dev/dri \ - --group-add $RENDER_GROUP_ID \ - vllm-openvino:comps /bin/bash -c "\ - export VLLM_OPENVINO_DEVICE=GPU && \ - export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \ - python3 -m vllm.entrypoints.openai.api_server \ - --model Intel/neural-chat-7b-v3-3 \ - --host 0.0.0.0 \ - --port $port \ - --max_model_len 8192" - - # check whether service is fully ready - n=0 - until [[ "$n" -ge 300 ]]; do - docker logs $CONTAINER_NAME > /tmp/$CONTAINER_NAME.log 2>&1 - n=$((n+1)) - if grep -q "Uvicorn running on" /tmp/$CONTAINER_NAME.log; then - break - fi - sleep 3s - done - -} - -# Cleanup Function -cleanup() { - # Stop and remove Docker container and images - cid=$(docker ps -aq --filter "name=$CONTAINER_NAME") - if [[ ! -z "$cid" ]]; then docker stop $cid || docker rm $cid && sleep 1s; fi - docker rmi -f $DOCKER_IMAGE - rm /tmp/$CONTAINER_NAME.log -} - -# Function to test API endpoint -function test_api_endpoint { - local endpoint="$1" - local expected_status="$2" - - # Make the HTTP request - if test "$1" = "v1/completions" - then - local response=$(curl "http://localhost:$port/$endpoint" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "Intel/neural-chat-7b-v3-3", - "prompt": "What is the key advantage of Openvino framework", - "max_tokens": 300, - "temperature": 0.7 - }' \ - --write-out '%{http_code}' \ - --silent \ - --output /dev/null) - else - local response=$(curl "http://localhost:$port/$endpoint" \ - --write-out '%{http_code}' \ - --silent \ - --output /dev/null) - fi - - # Assert the response status code - if [[ "$response" -eq "$expected_status" ]]; then - echo "PASS: $endpoint returned expected status code: $expected_status" - else - echo "FAIL: $endpoint returned unexpected status code: $response (expected: $expected_status)" - docker logs $CONTAINER_NAME - exit 1 - fi -} - -# Main function -main() { - - build_container - start_container - - # Sleep to allow the container to start up fully - sleep 10 - # Test the /v1/models API - test_api_endpoint "v1/models" 200 - - # Test the /v1/completions API - test_api_endpoint "v1/completions" 200 - - cleanup -} - -# Call main function -main diff --git a/tests/llms/test_llms_text-generation_vllm_llamaindex_on_intel_hpu.sh b/tests/llms/test_llms_text-generation_vllm_llamaindex_on_intel_hpu.sh deleted file mode 100644 index 91a30ed85..000000000 --- a/tests/llms/test_llms_text-generation_vllm_llamaindex_on_intel_hpu.sh +++ /dev/null @@ -1,126 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - ## Build VLLM docker - cd $WORKPATH - git clone https://github.com/HabanaAI/vllm-fork.git - cd vllm-fork/ - git checkout 3c39626 - docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:comps --shm-size=128g . - if [ $? -ne 0 ]; then - echo "opea/vllm-gaudi built fail" - exit 1 - else - echo "opea/vllm-gaudi built successful" - fi - - ## Build OPEA microservice docker - cd $WORKPATH - docker build \ - --no-cache -t opea/llm-vllm-llamaindex:comps \ - -f comps/llms/text-generation/vllm/llama_index/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/llm-vllm-llamaindex built fail" - exit 1 - else - echo "opea/llm-vllm-llamaindex built successful" - fi -} - -function start_service() { - export LLM_MODEL="Intel/neural-chat-7b-v3-3" - port_number=5025 - docker run -d --rm \ - --runtime=habana \ - --name="test-comps-vllm-service" \ - -v $PWD/data:/data \ - -p $port_number:80 \ - -e HABANA_VISIBLE_DEVICES=all \ - -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ - --cap-add=sys_nice \ - --ipc=host \ - -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ - opea/vllm-gaudi:comps \ - --model $LLM_MODEL --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048 - - export vLLM_ENDPOINT="http://${ip_address}:${port_number}" - docker run -d --rm \ - --name="test-comps-vllm-microservice" \ - -p 5030:9000 \ - --ipc=host \ - -e vLLM_ENDPOINT=$vLLM_ENDPOINT \ - -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ - -e LLM_MODEL=$LLM_MODEL \ - opea/llm-vllm-llamaindex:comps - - # check whether vllm ray is fully ready - n=0 - until [[ "$n" -ge 160 ]] || [[ $ready == true ]]; do - docker logs test-comps-vllm-service > ${WORKPATH}/tests/test-comps-vllm-service.log - n=$((n+1)) - if grep -q throughput ${WORKPATH}/tests/test-comps-vllm-service.log; then - break - fi - sleep 5s - done - sleep 5s -} - -function validate_microservice() { - result=$(http_proxy="" curl http://${ip_address}:5025/v1/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "Intel/neural-chat-7b-v3-3", - "prompt": "What is Deep Learning?", - "max_tokens": 32, - "temperature": 0 - }') - if [[ $result == *"text"* ]]; then - echo "Result correct." - else - echo "Result wrong. Received was $result" - docker logs test-comps-vllm-service - docker logs test-comps-vllm-microservice - exit 1 - fi - result=$(http_proxy="" curl http://${ip_address}:5030/v1/chat/completions \ - -X POST \ - -d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":0.95,"temperature":0.01,"streaming":false}' \ - -H 'Content-Type: application/json') - if [[ $result == *"text"* ]]; then - echo "Result correct." - else - echo "Result wrong. Received was $result" - docker logs test-comps-vllm-service - docker logs test-comps-vllm-microservice - exit 1 - fi -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-vllm*") - if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/reranks/test_reranks_fastrag.sh b/tests/reranks/test_reranks_fastrag.sh index 2ce978dcb..17bc01503 100644 --- a/tests/reranks/test_reranks_fastrag.sh +++ b/tests/reranks/test_reranks_fastrag.sh @@ -59,4 +59,4 @@ function main() { } -main +# main diff --git a/tests/reranks/test_reranks_mosec_langchain.sh b/tests/reranks/test_reranks_mosec_langchain.sh deleted file mode 100644 index 7d0a1a288..000000000 --- a/tests/reranks/test_reranks_mosec_langchain.sh +++ /dev/null @@ -1,83 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -ip_address=$(hostname -I | awk '{print $1}') - -function build_mosec_docker_images() { - cd $WORKPATH - echo $(pwd) - docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t opea/reranking-langchain-mosec-endpoint:comps -f comps/reranks/mosec/langchain/dependency/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/reranking-langchain-mosec-endpoint built fail" - exit 1 - else - echo "opea/reranking-langchain-mosec-endpoint built successful" - fi -} - -function build_docker_images() { - cd $WORKPATH - echo $(pwd) - docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t opea/reranking-langchain-mosec:comps -f comps/reranks/mosec/langchain/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/reranking-langchain-mosec built fail" - exit 1 - else - echo "opea/reranking-langchain-mosec built successful" - fi -} - -function start_service() { - mosec_endpoint=5006 - model="BAAI/bge-reranker-base" - unset http_proxy - docker run -d --name="test-comps-reranking-langchain-mosec-endpoint" -p $mosec_endpoint:8000 opea/reranking-langchain-mosec-endpoint:comps - export MOSEC_RERANKING_ENDPOINT="http://${ip_address}:${mosec_endpoint}" - mosec_service_port=5007 - docker run -d --name="test-comps-reranking-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${mosec_service_port}:8000 --ipc=host -e MOSEC_RERANKING_ENDPOINT=$MOSEC_RERANKING_ENDPOINT opea/reranking-langchain-mosec:comps - sleep 3m -} - -function validate_microservice() { - mosec_service_port=5007 - result=$(http_proxy="" curl http://${ip_address}:${mosec_service_port}/v1/reranking\ - -X POST \ - -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ - -H 'Content-Type: application/json') - if [[ $result == *"Human"* ]]; then - echo "Result correct." - else - echo "Result wrong. Received was $result" - docker logs test-comps-reranking-langchain-mosec-endpoint - docker logs test-comps-reranking-langchain-mosec-server - exit 1 - fi -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-reranking-langchain-mosec-*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - - build_mosec_docker_images - - build_docker_images - - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/reranks/test_reranks_tei.sh b/tests/reranks/test_reranks_opea_tei.sh similarity index 70% rename from tests/reranks/test_reranks_tei.sh rename to tests/reranks/test_reranks_opea_tei.sh index f28a0a189..65acf6830 100644 --- a/tests/reranks/test_reranks_tei.sh +++ b/tests/reranks/test_reranks_opea_tei.sh @@ -6,14 +6,15 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') + function build_docker_images() { cd $WORKPATH - docker build --no-cache -t opea/reranking-tei:comps -f comps/reranks/tei/Dockerfile . + docker build --no-cache -t opea/reranking:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/src/Dockerfile . if [ $? -ne 0 ]; then - echo "opea/reranking-tei built fail" + echo "opea/reranking built fail" exit 1 else - echo "opea/reranking-tei built successful" + echo "opea/reranking built successful" fi } @@ -24,13 +25,13 @@ function start_service() { model=BAAI/bge-reranker-base revision=refs/pr/4 volume=$PWD/data - docker run -d --name="test-comps-reranking-tei-endpoint" -p $tei_endpoint:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model - + docker run -d --name="test-comps-reranking-tei-endpoint" -p $tei_endpoint:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model + sleep 3m export TEI_RERANKING_ENDPOINT="http://${ip_address}:${tei_endpoint}" tei_service_port=5007 unset http_proxy - docker run -d --name="test-comps-reranking-tei-server" -p ${tei_service_port}:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_RERANKING_ENDPOINT=$TEI_RERANKING_ENDPOINT -e HF_TOKEN=$HF_TOKEN opea/reranking-tei:comps - sleep 3m + docker run -d --name="test-comps-reranking-tei-server" -e LOGFLAG=True -p ${tei_service_port}:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_RERANKING_ENDPOINT=$TEI_RERANKING_ENDPOINT -e HF_TOKEN=$HF_TOKEN -e RERANK_TYPE="tei" opea/reranking:comps + sleep 15 } function validate_microservice() { diff --git a/tests/reranks/test_reranks_videoqna.sh b/tests/reranks/test_reranks_videoqna.sh index 9d6cdf051..e63e13fe0 100755 --- a/tests/reranks/test_reranks_videoqna.sh +++ b/tests/reranks/test_reranks_videoqna.sh @@ -96,4 +96,4 @@ function main() { } -main +# main diff --git a/tests/retrievers/test_retrievers_milvus.sh b/tests/retrievers/test_retrievers_milvus.sh new file mode 100644 index 000000000..2fe6fc0c9 --- /dev/null +++ b/tests/retrievers/test_retrievers_milvus.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t opea/retriever-milvus:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/retriever-milvus built fail" + exit 1 + else + echo "opea/retriever-milvus built successful" + fi +} + +function start_service() { + # start milvus vector db + cd $WORKPATH/comps/dataprep/milvus/langchain/ + # wget https://raw.githubusercontent.com/milvus-io/milvus/v2.4.9/configs/milvus.yaml + # wget https://github.com/milvus-io/milvus/releases/download/v2.4.9/milvus-standalone-docker-compose.yml -O docker-compose.yml + # sed '/- \${DOCKER_VOLUME_DIRECTORY:-\.}\/volumes\/milvus:\/var\/lib\/milvus/a \ \ \ \ \ \ - \${DOCKER_VOLUME_DIRECTORY:-\.}\/milvus.yaml:\/milvus\/configs\/milvus.yaml' -i docker-compose.yml + docker compose up -d + + # tei endpoint + tei_endpoint=5014 + model="BAAI/bge-base-en-v1.5" + docker run -d --name="test-comps-retriever-milvus-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" + + # milvus retriever + export MILVUS_HOST=${ip_address} + export HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN + retriever_port=5015 + # unset http_proxy + docker run -d --name="test-comps-retriever-milvus-server" -p ${retriever_port}:7000 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MILVUS_HOST=$ip_address -e LOGFLAG=true -e RETRIEVER_TYPE="milvus" opea/retriever-milvus:comps + + sleep 1m +} + +function validate_microservice() { + local test_embedding="$1" + + retriever_port=5015 + export PATH="${HOME}/miniforge3/bin:$PATH" + source activate + URL="http://${ip_address}:$retriever_port/v1/retrieval" + + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ retriever ] HTTP status is 200. Checking content..." + local CONTENT=$(curl -s -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/retriever.log) + + if echo "$CONTENT" | grep -q "retrieved_docs"; then + echo "[ retriever ] Content is as expected." + else + echo "[ retriever ] Content does not match the expected result: $CONTENT" + docker logs test-comps-retriever-milvus-server >> ${LOG_PATH}/retriever.log + exit 1 + fi + else + echo "[ retriever ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-retriever-milvus-server >> ${LOG_PATH}/retriever.log + exit 1 + fi +} + +function stop_docker() { + cid_retrievers=$(docker ps -aq --filter "name=test-comps-retriever-milvus*") + if [[ ! -z "$cid_retrievers" ]]; then + docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s + fi + cid=$(docker ps -aq --filter "name=milvus-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + build_docker_images + + start_service + test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + validate_microservice "$test_embedding" + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/retrievers/test_retrievers_redis_langchain.sh b/tests/retrievers/test_retrievers_redis.sh similarity index 93% rename from tests/retrievers/test_retrievers_redis_langchain.sh rename to tests/retrievers/test_retrievers_redis.sh index 685d20ba4..b22195d6b 100644 --- a/tests/retrievers/test_retrievers_redis_langchain.sh +++ b/tests/retrievers/test_retrievers_redis.sh @@ -10,7 +10,7 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH - docker build --no-cache -t opea/retriever-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile . + docker build --no-cache -t opea/retriever-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/retriever-redis built fail" exit 1 @@ -37,7 +37,7 @@ function start_service() { export HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN retriever_port=5435 # unset http_proxy - docker run -d --name="test-comps-retriever-redis-server" -p ${retriever_port}:7000 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/retriever-redis:comps + docker run -d --name="test-comps-retriever-redis-server" -p ${retriever_port}:7000 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e LOGFLAG=true -e RETRIEVER_TYPE="redis" opea/retriever-redis:comps sleep 3m } @@ -52,7 +52,7 @@ function start_multimodal_service() { export INDEX_NAME="rag-redis" retriever_port=5435 unset http_proxy - docker run -d --name="test-comps-retriever-redis-server" -p ${retriever_port}:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e BRIDGE_TOWER_EMBEDDING=true opea/retriever-redis:comps + docker run -d --name="test-comps-retriever-redis-server" -p ${retriever_port}:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e BRIDGE_TOWER_EMBEDDING=true -e LOGFLAG=true -e RETRIEVER_TYPE="redis" opea/retriever-redis:comps sleep 2m }