diff --git a/.github/workflows/docker/compose/llms-compose.yaml b/.github/workflows/docker/compose/llms-compose.yaml index 6b800a4a1..f50507542 100644 --- a/.github/workflows/docker/compose/llms-compose.yaml +++ b/.github/workflows/docker/compose/llms-compose.yaml @@ -11,10 +11,6 @@ services: build: dockerfile: comps/llms/src/text-generation/Dockerfile.intel_hpu image: ${REGISTRY:-opea}/llm-textgen-gaudi:${TAG:-latest} - llm-ollama: - build: - dockerfile: comps/llms/text-generation/ollama/langchain/Dockerfile - image: ${REGISTRY:-opea}/llm-ollama:${TAG:-latest} llm-docsum: build: dockerfile: comps/llms/src/doc-summarization/Dockerfile diff --git a/comps/asr/src/README.md b/comps/asr/src/README.md index 406b7c541..5fc731eb1 100644 --- a/comps/asr/src/README.md +++ b/comps/asr/src/README.md @@ -74,14 +74,14 @@ Alternatively, you can also start the ASR microservice with Docker. ```bash cd ../.. -docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/Dockerfile . +docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile . ``` - Gaudi2 HPU ```bash cd ../.. -docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/Dockerfile.intel_hpu . +docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu . ``` #### 2.1.2 ASR Service Image diff --git a/comps/finetuning/src/README.md b/comps/finetuning/src/README.md index ac6bb8cad..55fad973d 100644 --- a/comps/finetuning/src/README.md +++ b/comps/finetuning/src/README.md @@ -244,7 +244,7 @@ curl http://${your_ip}:8015/v1/finetune/list_checkpoints -X POST -H "Content-Typ ### 3.4 Leverage fine-tuned model -After fine-tuning job is done, fine-tuned model can be chosen from listed checkpoints, then the fine-tuned model can be used in other microservices. For example, fine-tuned reranking model can be used in [reranks](../../rerankings/src/README.md) microservice by assign its path to the environment variable `RERANK_MODEL_ID`, fine-tuned embedding model can be used in [embeddings](../../embeddings/src/README.md) microservice by assign its path to the environment variable `model`, LLMs after instruction tuning can be used in [llms](../../llms/text-generation/README.md) microservice by assign its path to the environment variable `your_hf_llm_model`. +After fine-tuning job is done, fine-tuned model can be chosen from listed checkpoints, then the fine-tuned model can be used in other microservices. For example, fine-tuned reranking model can be used in [reranks](../../rerankings/src/README.md) microservice by assign its path to the environment variable `RERANK_MODEL_ID`, fine-tuned embedding model can be used in [embeddings](../../embeddings/src/README.md) microservice by assign its path to the environment variable `model`, LLMs after instruction tuning can be used in [llms](../../llms/src/text-generation/README.md) microservice by assign its path to the environment variable `your_hf_llm_model`. ## 🚀4. Descriptions for Finetuning parameters diff --git a/comps/llms/text-generation/ollama/langchain/README.md b/comps/llms/src/text-generation/README_ollama.md similarity index 83% rename from comps/llms/text-generation/ollama/langchain/README.md rename to comps/llms/src/text-generation/README_ollama.md index 9700fc6a6..c18c8febc 100644 --- a/comps/llms/text-generation/ollama/langchain/README.md +++ b/comps/llms/src/text-generation/README_ollama.md @@ -57,18 +57,18 @@ curl --noproxy "*" http://localhost:11434/api/generate -d '{ ## Build Docker Image ```bash -cd GenAIComps/ -docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/langchain/Dockerfile . +cd ../../../../ +docker build -t opea/llm-textgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile . ``` ## Run the Ollama Microservice ```bash -docker run --network host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/llm-ollama:latest +docker run --network host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LLM_ENDPOINT="http://localhost:11434" -e LLM_MODEL_ID="llama3" opea/llm-textgen:latest ``` ## Consume the Ollama Microservice ```bash -curl http://127.0.0.1:9000/v1/chat/completions -X POST -d '{"model": "llama3", "query":"What is Deep Learning?","max_tokens":32,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"stream":true}' -H 'Content-Type: application/json' +curl http://127.0.0.1:9000/v1/chat/completions -X POST -d '{"messages": [{"role": "user", "content": "What is Deep Learning?"}]}' -H 'Content-Type: application/json' ``` diff --git a/comps/llms/text-generation/README.md b/comps/llms/src/text-generation/README_textgen.md similarity index 100% rename from comps/llms/text-generation/README.md rename to comps/llms/src/text-generation/README_textgen.md diff --git a/comps/llms/text-generation/ollama/langchain/Dockerfile b/comps/llms/text-generation/ollama/langchain/Dockerfile deleted file mode 100644 index 41e3720cd..000000000 --- a/comps/llms/text-generation/ollama/langchain/Dockerfile +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.11-slim - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - curl \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/user/comps/llms/text-generation/ollama/langchain/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/llms/text-generation/ollama/langchain - -ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/text-generation/ollama/langchain/__init__.py b/comps/llms/text-generation/ollama/langchain/__init__.py deleted file mode 100644 index 916f3a44b..000000000 --- a/comps/llms/text-generation/ollama/langchain/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/llms/text-generation/ollama/langchain/entrypoint.sh b/comps/llms/text-generation/ollama/langchain/entrypoint.sh deleted file mode 100644 index d60eddd36..000000000 --- a/comps/llms/text-generation/ollama/langchain/entrypoint.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -pip --no-cache-dir install -r requirements-runtime.txt - -python llm.py diff --git a/comps/llms/text-generation/ollama/langchain/llm.py b/comps/llms/text-generation/ollama/langchain/llm.py deleted file mode 100644 index a17fa9308..000000000 --- a/comps/llms/text-generation/ollama/langchain/llm.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -from fastapi.responses import StreamingResponse -from langchain_community.llms import Ollama - -from comps import CustomLogger, GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice - -logger = CustomLogger("llm_ollama") -logflag = os.getenv("LOGFLAG", False) - - -@register_microservice( - name="opea_service@llm_ollama", - service_type=ServiceType.LLM, - endpoint="/v1/chat/completions", - host="0.0.0.0", - port=9000, -) -async def llm_generate(input: LLMParamsDoc): - if logflag: - logger.info(input) - ollama = Ollama( - base_url=ollama_endpoint, - model=input.model if input.model else model_name, - num_predict=input.max_tokens, - top_k=input.top_k, - top_p=input.top_p, - temperature=input.temperature, - repeat_penalty=input.repetition_penalty, - ) - # assuming you have Ollama installed and have llama3 model pulled with `ollama pull llama3` - if input.stream: - - async def stream_generator(): - chat_response = "" - async for text in ollama.astream(input.query): - chat_response += text - chunk_repr = repr(text.encode("utf-8")) - if logflag: - logger.info(f"[llm - chat_stream] chunk:{chunk_repr}") - yield f"data: {chunk_repr}\n\n" - if logflag: - logger.info(f"[llm - chat_stream] stream response: {chat_response}") - yield "data: [DONE]\n\n" - - return StreamingResponse(stream_generator(), media_type="text/event-stream") - else: - response = await ollama.ainvoke(input.query) - if logflag: - logger.info(response) - return GeneratedDoc(text=response, prompt=input.query) - - -if __name__ == "__main__": - ollama_endpoint = os.getenv("OLLAMA_ENDPOINT", "http://localhost:11434") - model_name = os.getenv("OLLAMA_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct") - opea_microservices["opea_service@llm_ollama"].start() diff --git a/comps/llms/text-generation/ollama/langchain/requirements-runtime.txt b/comps/llms/text-generation/ollama/langchain/requirements-runtime.txt deleted file mode 100644 index 225adde27..000000000 --- a/comps/llms/text-generation/ollama/langchain/requirements-runtime.txt +++ /dev/null @@ -1 +0,0 @@ -langserve diff --git a/comps/llms/text-generation/ollama/langchain/requirements.txt b/comps/llms/text-generation/ollama/langchain/requirements.txt deleted file mode 100644 index c936696b5..000000000 --- a/comps/llms/text-generation/ollama/langchain/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -docarray[full] -fastapi -huggingface_hub -langchain -langchain-community -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -shortuuid -transformers -uvicorn diff --git a/tests/llms/test_llms_text-generation_service_ollama.sh b/tests/llms/test_llms_text-generation_service_ollama.sh new file mode 100644 index 000000000..4d98ffb2d --- /dev/null +++ b/tests/llms/test_llms_text-generation_service_ollama.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +ollama_endpoint_port=11435 +llm_port=9000 + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/llm:comps -f comps/llms/src/text-generation/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llm built fail" + exit 1 + else + echo "opea/llm built successful" + fi +} + +function start_service() { + export llm_model=$1 + docker run -d --name="test-comps-llm-ollama-endpoint" -e https_proxy=$https_proxy -p $ollama_endpoint_port:11434 ollama/ollama + export LLM_ENDPOINT="http://${ip_address}:${ollama_endpoint_port}" + + sleep 5s + docker exec test-comps-llm-ollama-endpoint ollama pull $llm_model + sleep 20s + + unset http_proxy + docker run -d --name="test-comps-llm-ollama-server" -p $llm_port:9000 --ipc=host -e LOGFLAG=True -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LLM_ENDPOINT=$LLM_ENDPOINT -e LLM_MODEL_ID=$llm_model opea/llm:comps + sleep 20s +} + +function validate_microservice() { + result=$(http_proxy="" curl http://${ip_address}:${llm_port}/v1/chat/completions \ + -X POST \ + -d '{"messages": [{"role": "user", "content": "What is Deep Learning?"}]}' \ + -H 'Content-Type: application/json') + if [[ $result == *"content"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-llm-ollama-endpoint >> ${LOG_PATH}/llm-ollama.log + docker logs test-comps-llm-ollama-server >> ${LOG_PATH}/llm-server.log + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-llm-ollama*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + build_docker_images + + pip install --no-cache-dir openai + + llm_models=( + llama3.2:1b + ) + for model in "${llm_models[@]}"; do + start_service "${model}" + validate_microservice + stop_docker + done + + echo y | docker system prune + +} + +main