Compare commits

..

6 Commits

Author SHA1 Message Date
Ying Hu
b10456e42a Update test_compose_vllm_on_xeon.sh
move the vllm-service
2025-03-04 19:11:12 +08:00
pre-commit-ci[bot]
3fb6cb590c [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2025-03-04 09:06:37 +00:00
Ying Hu
75ee579021 Create test_compose_vllm_on_xeon.sh for vLLM
Create test_compose_vllm_on_xeon.sh for vLLM
2025-03-04 17:05:30 +08:00
Ying Hu
768f1a45e2 Create compose_vllm.yaml for vLLM
Create compose_vllm.yaml for vLLM
2025-03-04 17:01:41 +08:00
pre-commit-ci[bot]
c4dffdad80 [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2025-03-04 08:59:57 +00:00
Ying Hu
2a0c4ccb81 Support vLLM for DBQnA
Support vLLM for DBQnA
1. update Readme
2025-03-04 16:57:49 +08:00
89 changed files with 1185 additions and 1449 deletions

View File

@@ -12,7 +12,6 @@ run_matrix="{\"include\":["
examples=$(printf '%s\n' "${changed_files[@]}" | grep '/' | cut -d'/' -f1 | sort -u)
for example in ${examples}; do
if [[ ! -d $WORKSPACE/$example ]]; then continue; fi
cd $WORKSPACE/$example
if [[ ! $(find . -type f | grep ${test_mode}) ]]; then continue; fi
cd tests

View File

@@ -8,7 +8,7 @@ services:
ports:
- "${AGENTQNA_TGI_SERVICE_PORT-8085}:80"
volumes:
- ${HF_CACHE_DIR:-/var/opea/agent-service/}:/data
- /var/opea/agent-service/:/data
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}

View File

@@ -9,7 +9,7 @@ echo "WORKDIR=${WORKDIR}"
export ip_address=$(hostname -I | awk '{print $1}')
export host_ip=${ip_address}
export HF_CACHE_DIR=${model_cache:-"$WORKDIR/hf_cache"}
export HF_CACHE_DIR=$WORKDIR/hf_cache
if [ ! -d "$HF_CACHE_DIR" ]; then
echo "Creating HF_CACHE directory"
mkdir -p "$HF_CACHE_DIR"

View File

@@ -13,7 +13,7 @@ export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
model="meta-llama/Llama-3.3-70B-Instruct" #"meta-llama/Meta-Llama-3.1-70B-Instruct"
export HF_CACHE_DIR=${model_cache:-"/data2/huggingface"}
export HF_CACHE_DIR=/data2/huggingface
if [ ! -d "$HF_CACHE_DIR" ]; then
HF_CACHE_DIR=$WORKDIR/hf_cache
mkdir -p "$HF_CACHE_DIR"

View File

@@ -11,7 +11,7 @@ export ip_address=$(hostname -I | awk '{print $1}')
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export HF_CACHE_DIR=${model_cache:-"$WORKDIR/hf_cache"}
export HF_CACHE_DIR=$WORKDIR/hf_cache
if [ ! -d "$HF_CACHE_DIR" ]; then
mkdir -p "$HF_CACHE_DIR"
fi

View File

@@ -1,8 +1,48 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
ARG BASE_TAG=latest
FROM opea/comps-base:$BASE_TAG
# Stage 1: base setup used by other stages
FROM python:3.11-slim AS base
# get security updates
RUN apt-get update && apt-get upgrade -y && \
apt-get clean && rm -rf /var/lib/apt/lists/*
ENV HOME=/home/user
RUN useradd -m -s /bin/bash user && \
mkdir -p $HOME && \
chown -R user $HOME
WORKDIR $HOME
# Stage 2: latest GenAIComps sources
FROM base AS git
RUN apt-get update && apt-get install -y --no-install-recommends git
RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git
# Stage 3: common layer shared by services using GenAIComps
FROM base AS comps-base
# copy just relevant parts
COPY --from=git $HOME/GenAIComps/comps $HOME/GenAIComps/comps
COPY --from=git $HOME/GenAIComps/*.* $HOME/GenAIComps/LICENSE $HOME/GenAIComps/
WORKDIR $HOME/GenAIComps
RUN pip install --no-cache-dir --upgrade pip setuptools && \
pip install --no-cache-dir -r $HOME/GenAIComps/requirements.txt
WORKDIR $HOME
ENV PYTHONPATH=$PYTHONPATH:$HOME/GenAIComps
USER user
# Stage 4: unique part
FROM comps-base
COPY ./audioqna.py $HOME/audioqna.py

View File

@@ -1,8 +1,48 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
ARG BASE_TAG=latest
FROM opea/comps-base:$BASE_TAG
# Stage 1: base setup used by other stages
FROM python:3.11-slim AS base
# get security updates
RUN apt-get update && apt-get upgrade -y && \
apt-get clean && rm -rf /var/lib/apt/lists/*
ENV HOME=/home/user
RUN useradd -m -s /bin/bash user && \
mkdir -p $HOME && \
chown -R user $HOME
WORKDIR $HOME
# Stage 2: latest GenAIComps sources
FROM base AS git
RUN apt-get update && apt-get install -y --no-install-recommends git
RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git
# Stage 3: common layer shared by services using GenAIComps
FROM base AS comps-base
# copy just relevant parts
COPY --from=git $HOME/GenAIComps/comps $HOME/GenAIComps/comps
COPY --from=git $HOME/GenAIComps/*.* $HOME/GenAIComps/LICENSE $HOME/GenAIComps/
WORKDIR $HOME/GenAIComps
RUN pip install --no-cache-dir --upgrade pip setuptools && \
pip install --no-cache-dir -r $HOME/GenAIComps/requirements.txt
WORKDIR $HOME
ENV PYTHONPATH=$PYTHONPATH:$HOME/GenAIComps
USER user
# Stage 4: unique part
FROM comps-base
COPY ./audioqna_multilang.py $HOME/audioqna_multilang.py

View File

@@ -16,14 +16,13 @@ SPEECHT5_SERVER_HOST_IP = os.getenv("SPEECHT5_SERVER_HOST_IP", "0.0.0.0")
SPEECHT5_SERVER_PORT = int(os.getenv("SPEECHT5_SERVER_PORT", 7055))
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 3006))
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "Intel/neural-chat-7b-v3-3")
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
if self.services[cur_node].service_type == ServiceType.LLM:
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
next_inputs = {}
next_inputs["model"] = LLM_MODEL_ID
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
next_inputs["top_p"] = llm_parameters_dict["top_p"]

View File

@@ -17,7 +17,6 @@ GPT_SOVITS_SERVER_HOST_IP = os.getenv("GPT_SOVITS_SERVER_HOST_IP", "0.0.0.0")
GPT_SOVITS_SERVER_PORT = int(os.getenv("GPT_SOVITS_SERVER_PORT", 9088))
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888))
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "Intel/neural-chat-7b-v3-3")
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
@@ -25,7 +24,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
if self.services[cur_node].service_type == ServiceType.LLM:
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
next_inputs = {}
next_inputs["model"] = LLM_MODEL_ID
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
next_inputs["top_p"] = llm_parameters_dict["top_p"]

View File

@@ -69,7 +69,6 @@ services:
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
- LLM_MODEL_ID=${LLM_MODEL_ID}
- SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
- SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
ipc: host

View File

@@ -30,7 +30,7 @@ services:
ports:
- "3006:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -61,7 +61,6 @@ services:
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
- LLM_MODEL_ID=${LLM_MODEL_ID}
- SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
- SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
ipc: host

View File

@@ -31,7 +31,7 @@ services:
ports:
- "3006:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}

View File

@@ -40,7 +40,7 @@ services:
ports:
- "3006:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
@@ -82,7 +82,6 @@ services:
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
- LLM_MODEL_ID=${LLM_MODEL_ID}
- SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
- SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
ipc: host

View File

@@ -30,7 +30,7 @@ services:
ports:
- "${CHATQNA_TEI_EMBEDDING_PORT}:80"
volumes:
- "${MODEL_CACHE:-/var/opea/chatqna-service/data}:/data"
- "/var/opea/chatqna-service/data:/data"
shm_size: 1g
ipc: host
environment:
@@ -72,7 +72,7 @@ services:
ports:
- "${CHATQNA_TEI_RERANKING_PORT}:80"
volumes:
- "${MODEL_CACHE:-/var/opea/chatqna-service/data}:/data"
- "/var/opea/chatqna-service/data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -104,7 +104,7 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
volumes:
- "${MODEL_CACHE:-/var/opea/chatqna-service/data}:/data"
- "/var/opea/chatqna-service/data:/data"
shm_size: 1g
devices:
- /dev/kfd:/dev/kfd

View File

@@ -31,7 +31,7 @@ services:
ports:
- "6006:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -64,7 +64,7 @@ services:
ports:
- "8808:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -80,7 +80,7 @@ services:
ports:
- "9009:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 128g
environment:
no_proxy: ${no_proxy}

View File

@@ -28,7 +28,7 @@ services:
ports:
- "6006:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -59,7 +59,7 @@ services:
ports:
- "8808:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -75,7 +75,7 @@ services:
ports:
- "9009:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 128g
environment:
no_proxy: ${no_proxy}

View File

@@ -32,7 +32,7 @@ services:
ports:
- "6040:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -64,7 +64,7 @@ services:
ports:
- "6041:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -80,7 +80,7 @@ services:
ports:
- "6042:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 128g
environment:
no_proxy: ${no_proxy}

View File

@@ -31,7 +31,7 @@ services:
ports:
- "6006:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -64,7 +64,7 @@ services:
ports:
- "8808:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -80,7 +80,7 @@ services:
ports:
- "9009:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}

View File

@@ -31,7 +31,7 @@ services:
ports:
- "6006:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -64,7 +64,7 @@ services:
ports:
- "9009:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 128g
environment:
no_proxy: ${no_proxy}

View File

@@ -31,7 +31,7 @@ services:
ports:
- "8090:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -62,7 +62,7 @@ services:
ports:
- "8808:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
@@ -83,7 +83,7 @@ services:
ports:
- "8007:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}

View File

@@ -31,7 +31,7 @@ services:
ports:
- "8088:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
@@ -70,7 +70,7 @@ services:
ports:
- "8090:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -103,7 +103,7 @@ services:
ports:
- "8808:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
@@ -124,7 +124,7 @@ services:
ports:
- "8008:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}

View File

@@ -31,7 +31,7 @@ services:
ports:
- "8090:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -64,7 +64,7 @@ services:
ports:
- "8808:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
@@ -85,7 +85,7 @@ services:
ports:
- "8005:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}

View File

@@ -31,7 +31,7 @@ services:
ports:
- "8090:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -64,7 +64,7 @@ services:
ports:
- "8007:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}

View File

@@ -9,7 +9,6 @@ echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"

View File

@@ -9,7 +9,6 @@ echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"

View File

@@ -9,7 +9,6 @@ echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"/var/opea/chatqna-service/data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"

View File

@@ -9,7 +9,6 @@ echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"

View File

@@ -9,7 +9,6 @@ echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"

View File

@@ -9,7 +9,6 @@ echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"

View File

@@ -9,7 +9,6 @@ echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"

View File

@@ -9,7 +9,6 @@ echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"

View File

@@ -9,7 +9,6 @@ echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"

View File

@@ -9,7 +9,6 @@ echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"

View File

@@ -8,7 +8,7 @@ services:
ports:
- "8028:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}

View File

@@ -8,7 +8,7 @@ services:
ports:
- "8028:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}

View File

@@ -2,8 +2,6 @@
This document outlines the deployment process for a CodeTrans application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server. The steps include Docker image creation, container deployment via Docker Compose, and service execution using microservices `llm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service.
The default pipeline deploys with vLLM as the LLM serving component. It also provides options of using TGI backend for LLM microservice, please refer to [start-microservice-docker-containers](#start-microservice-docker-containers) section in this page.
## 🚀 Create an AWS Xeon Instance
To run the example on a AWS Xeon instance, start by creating an AWS account if you don't have one already. Then, get started with the [EC2 Console](https://console.aws.amazon.com/ec2/v2/home). AWS EC2 M7i, C7i, C7i-flex and M7i-flex are Intel Xeon Scalable processor instances suitable for the task. (code named Sapphire Rapids).
@@ -65,37 +63,6 @@ By default, the LLM model is set to a default value as listed below:
Change the `LLM_MODEL_ID` below for your needs.
For users in China who are unable to download models directly from Huggingface, you can use [ModelScope](https://www.modelscope.cn/models) or a Huggingface mirror to download models. The vLLM/TGI can load the models either online or offline as described below:
1. Online
```bash
export HF_TOKEN=${your_hf_token}
export HF_ENDPOINT="https://hf-mirror.com"
model_name="mistralai/Mistral-7B-Instruct-v0.3"
# Start vLLM LLM Service
docker run -p 8008:80 -v ./data:/data --name vllm-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 128g opea/vllm:latest --model $model_name --host 0.0.0.0 --port 80
# Start TGI LLM Service
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model_name
```
2. Offline
- Search your model name in ModelScope. For example, check [this page](https://www.modelscope.cn/models/rubraAI/Mistral-7B-Instruct-v0.3/files) for model `mistralai/Mistral-7B-Instruct-v0.3`.
- Click on `Download this model` button, and choose one way to download the model to your local path `/path/to/model`.
- Run the following command to start the LLM service.
```bash
export HF_TOKEN=${your_hf_token}
export model_path="/path/to/model"
# Start vLLM LLM Service
docker run -p 8008:80 -v $model_path:/data --name vllm-service --shm-size 128g opea/vllm:latest --model /data --host 0.0.0.0 --port 80
# Start TGI LLM Service
docker run -p 8008:80 -v $model_path:/data --name tgi-service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id /data
```
### Setup Environment Variables
1. Set the required environment variables:
@@ -128,47 +95,15 @@ For users in China who are unable to download models directly from Huggingface,
```bash
cd GenAIExamples/CodeTrans/docker_compose/intel/cpu/xeon
```
If use vLLM as the LLM serving backend.
```bash
docker compose -f compose.yaml up -d
```
If use TGI as the LLM serving backend.
```bash
docker compose -f compose_tgi.yaml up -d
docker compose up -d
```
### Validate Microservices
1. LLM backend Service
In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready.
Try the command below to check whether the LLM serving is ready.
1. TGI Service
```bash
# vLLM service
docker logs codetrans-xeon-vllm-service 2>&1 | grep complete
# If the service is ready, you will get the response like below.
INFO: Application startup complete.
```
```bash
# TGI service
docker logs codetrans-xeon-tgi-service | grep Connected
# If the service is ready, you will get the response like below.
2024-09-03T02:47:53.402023Z INFO text_generation_router::server: router/src/server.rs:2311: Connected
```
Then try the `cURL` command below to validate services.
```bash
# either vLLM or TGI service
curl http://${host_ip}:8008/v1/chat/completions \
curl http://${host_ip}:8008/generate \
-X POST \
-d '{"inputs":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-H 'Content-Type: application/json'

View File

@@ -2,32 +2,31 @@
# SPDX-License-Identifier: Apache-2.0
services:
vllm-service:
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
container_name: codetrans-xeon-vllm-service
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: codetrans-tgi-service
ports:
- "8008:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL_ID: ${LLM_MODEL_ID}
VLLM_TORCH_PROFILER_DIR: "/mnt"
host_ip: ${host_ip}
healthcheck:
test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
llm:
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
container_name: codetrans-xeon-llm-server
container_name: llm-textgen-server
depends_on:
vllm-service:
tgi-service:
condition: service_healthy
ports:
- "9000:9000"
@@ -36,19 +35,18 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
codetrans-xeon-backend-server:
image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}
container_name: codetrans-xeon-backend-server
depends_on:
- vllm-service
- tgi-service
- llm
ports:
- "${BACKEND_SERVICE_PORT:-7777}:7777"
- "7777:7777"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
@@ -63,7 +61,7 @@ services:
depends_on:
- codetrans-xeon-backend-server
ports:
- "${FRONTEND_SERVICE_PORT:-5173}:5173"
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}

View File

@@ -1,95 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: codetrans-xeon-tgi-service
ports:
- "8008:80"
volumes:
- "${MODEL_CACHE}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
host_ip: ${host_ip}
healthcheck:
test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
llm:
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
container_name: codetrans-xeon-llm-server
depends_on:
tgi-service:
condition: service_healthy
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
codetrans-xeon-backend-server:
image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}
container_name: codetrans-xeon-backend-server
depends_on:
- tgi-service
- llm
ports:
- "${BACKEND_SERVICE_PORT:-7777}:7777"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
ipc: host
restart: always
codetrans-xeon-ui-server:
image: ${REGISTRY:-opea}/codetrans-ui:${TAG:-latest}
container_name: codetrans-xeon-ui-server
depends_on:
- codetrans-xeon-backend-server
ports:
- "${FRONTEND_SERVICE_PORT:-5173}:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- BASE_URL=${BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always
codetrans-xeon-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: codetrans-xeon-nginx-server
depends_on:
- codetrans-xeon-backend-server
- codetrans-xeon-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -2,8 +2,6 @@
This document outlines the deployment process for a CodeTrans application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server. The steps include Docker image creation, container deployment via Docker Compose, and service execution using microservices `llm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service.
The default pipeline deploys with vLLM as the LLM serving component. It also provides options of using TGI backend for LLM microservice, please refer to [start-microservice-docker-containers](#start-microservice-docker-containers) section in this page.
## 🚀 Build Docker Images
First of all, you need to build Docker Images locally and install the python package of it. This step can be ignored after the Docker images published to Docker hub.
@@ -57,37 +55,6 @@ By default, the LLM model is set to a default value as listed below:
Change the `LLM_MODEL_ID` below for your needs.
For users in China who are unable to download models directly from Huggingface, you can use [ModelScope](https://www.modelscope.cn/models) or a Huggingface mirror to download models. The vLLM/TGI can load the models either online or offline as described below:
1. Online
```bash
export HF_TOKEN=${your_hf_token}
export HF_ENDPOINT="https://hf-mirror.com"
model_name="mistralai/Mistral-7B-Instruct-v0.3"
# Start vLLM LLM Service
docker run -p 8008:80 -v ./data:/data --name vllm-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 128g opea/vllm:latest --model $model_name --host 0.0.0.0 --port 80
# Start TGI LLM Service
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model_name
```
2. Offline
- Search your model name in ModelScope. For example, check [this page](https://www.modelscope.cn/models/rubraAI/Mistral-7B-Instruct-v0.3/files) for model `mistralai/Mistral-7B-Instruct-v0.3`.
- Click on `Download this model` button, and choose one way to download the model to your local path `/path/to/model`.
- Run the following command to start the LLM service.
```bash
export HF_TOKEN=${your_hf_token}
export model_path="/path/to/model"
# Start vLLM LLM Service
docker run -p 8008:80 -v $model_path:/data --name vllm-service --shm-size 128g opea/vllm:latest --model /data --host 0.0.0.0 --port 80
# Start TGI LLM Service
docker run -p 8008:80 -v $model_path:/data --name tgi-service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id /data
```
### Setup Environment Variables
1. Set the required environment variables:
@@ -120,43 +87,12 @@ For users in China who are unable to download models directly from Huggingface,
```bash
cd GenAIExamples/CodeTrans/docker_compose/intel/hpu/gaudi
```
If use vLLM as the LLM serving backend.
```bash
docker compose -f compose.yaml up -d
```
If use TGI as the LLM serving backend.
```bash
docker compose -f compose_tgi.yaml up -d
docker compose up -d
```
### Validate Microservices
1. LLM backend Service
In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready.
Try the command below to check whether the LLM serving is ready.
```bash
# vLLM service
docker logs codetrans-gaudi-vllm-service 2>&1 | grep complete
# If the service is ready, you will get the response like below.
INFO: Application startup complete.
```
```bash
# TGI service
docker logs codetrans-gaudi-tgi-service | grep Connected
# If the service is ready, you will get the response like below.
2024-09-03T02:47:53.402023Z INFO text_generation_router::server: router/src/server.rs:2311: Connected
```
Then try the `cURL` command below to validate services.
1. TGI Service
```bash
curl http://${host_ip}:8008/generate \

View File

@@ -2,38 +2,39 @@
# SPDX-License-Identifier: Apache-2.0
services:
vllm-service:
image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
container_name: codetrans-gaudi-vllm-service
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
container_name: codetrans-tgi-service
ports:
- "8008:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
LLM_MODEL_ID: ${LLM_MODEL_ID}
NUM_CARDS: ${NUM_CARDS}
VLLM_TORCH_PROFILER_DIR: "/mnt"
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
healthcheck:
test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
test: ["CMD-SHELL", "sleep 500 && exit 0"]
interval: 1s
timeout: 505s
retries: 1
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model $LLM_MODEL_ID --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq_len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE}
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
llm:
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
container_name: codetrans-xeon-llm-server
container_name: llm-textgen-gaudi-server
depends_on:
vllm-service:
tgi-service:
condition: service_healthy
ports:
- "9000:9000"
@@ -42,19 +43,18 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
codetrans-gaudi-backend-server:
image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}
container_name: codetrans-gaudi-backend-server
depends_on:
- vllm-service
- tgi-service
- llm
ports:
- "${BACKEND_SERVICE_PORT:-7777}:7777"
- "7777:7777"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
@@ -69,7 +69,7 @@ services:
depends_on:
- codetrans-gaudi-backend-server
ports:
- "${FRONTEND_SERVICE_PORT:-5173}:5173"
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}

View File

@@ -1,99 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
container_name: codetrans-gaudi-tgi-service
ports:
- "8008:80"
volumes:
- "${MODEL_CACHE}:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096
llm:
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
container_name: codetrans-gaudi-llm-server
depends_on:
- tgi-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
codetrans-gaudi-backend-server:
image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}
container_name: codetrans-gaudi-backend-server
depends_on:
- tgi-service
- llm
ports:
- "${BACKEND_SERVICE_PORT:-7777}:7777"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
ipc: host
restart: always
codetrans-gaudi-ui-server:
image: ${REGISTRY:-opea}/codetrans-ui:${TAG:-latest}
container_name: codetrans-gaudi-ui-server
depends_on:
- codetrans-gaudi-backend-server
ports:
- "${FRONTEND_SERVICE_PORT:-5173}:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- BASE_URL=${BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always
codetrans-gaudi-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: codetrans-gaudi-nginx-server
depends_on:
- codetrans-gaudi-backend-server
- codetrans-gaudi-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -8,12 +8,7 @@ popd > /dev/null
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export LLM_ENDPOINT="http://${host_ip}:8008"
export LLM_COMPONENT_NAME="OpeaTextGenService"
export NUM_CARDS=1
export BLOCK_SIZE=128
export MAX_NUM_SEQS=256
export MAX_SEQ_LEN_TO_CAPTURE=2048
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7777/v1/codetrans"

View File

@@ -23,18 +23,6 @@ services:
dockerfile: comps/llms/src/text-generation/Dockerfile
extends: codetrans
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
vllm:
build:
context: vllm
dockerfile: Dockerfile.cpu
extends: codetrans
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
vllm-gaudi:
build:
context: vllm-fork
dockerfile: Dockerfile.hpu
extends: codetrans
image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
nginx:
build:
context: GenAIComps

View File

@@ -30,12 +30,12 @@ function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="codetrans codetrans-ui llm-textgen vllm-gaudi nginx"
service_list="codetrans codetrans-ui llm-textgen nginx"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
docker images && sleep 1s
}
@@ -45,12 +45,7 @@ function start_services() {
export http_proxy=${http_proxy}
export https_proxy=${http_proxy}
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export LLM_ENDPOINT="http://${ip_address}:8008"
export LLM_COMPONENT_NAME="OpeaTextGenService"
export NUM_CARDS=1
export BLOCK_SIZE=128
export MAX_NUM_SEQS=256
export MAX_SEQ_LEN_TO_CAPTURE=2048
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
@@ -70,15 +65,13 @@ function start_services() {
n=0
until [[ "$n" -ge 100 ]]; do
docker logs codetrans-gaudi-vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1
if grep -q complete ${LOG_PATH}/vllm_service_start.log; then
docker logs codetrans-tgi-service > ${LOG_PATH}/tgi_service_start.log
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
sleep 1m
}
function validate_services() {
@@ -110,19 +103,27 @@ function validate_services() {
}
function validate_microservices() {
# tgi for embedding service
validate_services \
"${ip_address}:8008/generate" \
"generated_text" \
"tgi" \
"codetrans-tgi-service" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
# llm microservice
validate_services \
"${ip_address}:9000/v1/chat/completions" \
"data: " \
"llm" \
"codetrans-xeon-llm-server" \
"llm-textgen-gaudi-server" \
'{"query":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:"}'
}
function validate_megaservice() {
# Curl the Mega Service
validate_services \
"${ip_address}:${BACKEND_SERVICE_PORT}/v1/codetrans" \
"${ip_address}:7777/v1/codetrans" \
"print" \
"mega-codetrans" \
"codetrans-gaudi-backend-server" \
@@ -130,7 +131,7 @@ function validate_megaservice() {
# test the megeservice via nginx
validate_services \
"${ip_address}:${NGINX_PORT}/v1/codetrans" \
"${ip_address}:80/v1/codetrans" \
"print" \
"mega-codetrans-nginx" \
"codetrans-gaudi-nginx-server" \
@@ -169,7 +170,7 @@ function validate_frontend() {
function stop_docker() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
docker compose -f compose.yaml stop && docker compose rm -f
docker compose stop && docker compose rm -f
}
function main() {

View File

@@ -30,16 +30,12 @@ function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
git clone https://github.com/vllm-project/vllm.git && cd vllm
VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )"
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null
cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="codetrans codetrans-ui llm-textgen vllm nginx"
service_list="codetrans codetrans-ui llm-textgen nginx"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
docker images && sleep 1s
}
@@ -48,8 +44,7 @@ function start_services() {
export http_proxy=${http_proxy}
export https_proxy=${http_proxy}
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export LLM_ENDPOINT="http://${ip_address}:8008"
export LLM_COMPONENT_NAME="OpeaTextGenService"
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
@@ -65,19 +60,17 @@ function start_services() {
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
# Start Docker Containers
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 100 ]]; do
docker logs codetrans-xeon-vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1
if grep -q complete ${LOG_PATH}/vllm_service_start.log; then
docker logs codetrans-tgi-service > ${LOG_PATH}/tgi_service_start.log
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
sleep 1m
}
function validate_services() {
@@ -109,12 +102,20 @@ function validate_services() {
}
function validate_microservices() {
# tgi for embedding service
validate_services \
"${ip_address}:8008/generate" \
"generated_text" \
"tgi" \
"codetrans-tgi-service" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
# llm microservice
validate_services \
"${ip_address}:9000/v1/chat/completions" \
"data: " \
"llm" \
"codetrans-xeon-llm-server" \
"llm-textgen-server" \
'{"query":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:"}'
}
@@ -122,7 +123,7 @@ function validate_microservices() {
function validate_megaservice() {
# Curl the Mega Service
validate_services \
"${ip_address}:${BACKEND_SERVICE_PORT}/v1/codetrans" \
"${ip_address}:7777/v1/codetrans" \
"print" \
"mega-codetrans" \
"codetrans-xeon-backend-server" \
@@ -130,7 +131,7 @@ function validate_megaservice() {
# test the megeservice via nginx
validate_services \
"${ip_address}:${NGINX_PORT}/v1/codetrans" \
"${ip_address}:80/v1/codetrans" \
"print" \
"mega-codetrans-nginx" \
"codetrans-xeon-nginx-server" \
@@ -168,7 +169,7 @@ function validate_frontend() {
function stop_docker() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/
docker compose -f compose.yaml stop && docker compose rm -f
docker compose stop && docker compose rm -f
}
function main() {

View File

@@ -1,194 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -xe
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="codetrans codetrans-ui llm-textgen nginx"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
docker images && sleep 1s
}
function start_services() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi/
export http_proxy=${http_proxy}
export https_proxy=${http_proxy}
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export LLM_ENDPOINT="http://${ip_address}:8008"
export LLM_COMPONENT_NAME="OpeaTextGenService"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"
export FRONTEND_SERVICE_IP=${ip_address}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_NAME=codetrans
export BACKEND_SERVICE_IP=${ip_address}
export BACKEND_SERVICE_PORT=7777
export NGINX_PORT=80
export host_ip=${ip_address}
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
# Start Docker Containers
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 100 ]]; do
docker logs codetrans-gaudi-tgi-service > ${LOG_PATH}/tgi_service_start.log
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
sleep 1m
}
function validate_services() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local INPUT_DATA="$5"
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
if [ "$HTTP_STATUS" -eq 200 ]; then
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
echo "[ $SERVICE_NAME ] Content is as expected."
else
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
else
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
sleep 5s
}
function validate_microservices() {
# tgi for embedding service
validate_services \
"${ip_address}:8008/generate" \
"generated_text" \
"tgi" \
"codetrans-gaudi-tgi-service" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
# llm microservice
validate_services \
"${ip_address}:9000/v1/chat/completions" \
"data: " \
"llm" \
"codetrans-gaudi-llm-server" \
'{"query":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:"}'
}
function validate_megaservice() {
# Curl the Mega Service
validate_services \
"${ip_address}:${BACKEND_SERVICE_PORT}/v1/codetrans" \
"print" \
"mega-codetrans" \
"codetrans-gaudi-backend-server" \
'{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}"}'
# test the megeservice via nginx
validate_services \
"${ip_address}:${NGINX_PORT}/v1/codetrans" \
"print" \
"mega-codetrans-nginx" \
"codetrans-gaudi-nginx-server" \
'{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}"}'
}
function validate_frontend() {
cd $WORKPATH/ui/svelte
local conda_env_name="OPEA_e2e"
export PATH=${HOME}/miniforge3/bin/:$PATH
if conda info --envs | grep -q "$conda_env_name"; then
echo "$conda_env_name exist!"
else
conda create -n ${conda_env_name} python=3.12 -y
fi
source activate ${conda_env_name}
sed -i "s/localhost/$ip_address/g" playwright.config.ts
conda install -c conda-forge nodejs=22.6.0 -y
npm install && npm ci && npx playwright install --with-deps
node -v && npm -v && pip list
exit_status=0
npx playwright test || exit_status=$?
if [ $exit_status -ne 0 ]; then
echo "[TEST INFO]: ---------frontend test failed---------"
exit $exit_status
else
echo "[TEST INFO]: ---------frontend test passed---------"
fi
}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi/
docker compose -f compose_tgi.yaml stop && docker compose rm -f
}
function main() {
stop_docker
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_services
validate_microservices
validate_megaservice
validate_frontend
stop_docker
echo y | docker system prune
}
main

View File

@@ -1,194 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -xe
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi
cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="codetrans codetrans-ui llm-textgen nginx"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
docker images && sleep 1s
}
function start_services() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/
export http_proxy=${http_proxy}
export https_proxy=${http_proxy}
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export LLM_ENDPOINT="http://${ip_address}:8008"
export LLM_COMPONENT_NAME="OpeaTextGenService"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"
export FRONTEND_SERVICE_IP=${ip_address}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_NAME=codetrans
export BACKEND_SERVICE_IP=${ip_address}
export BACKEND_SERVICE_PORT=7777
export NGINX_PORT=80
export host_ip=${ip_address}
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
# Start Docker Containers
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 100 ]]; do
docker logs codetrans-xeon-tgi-service > ${LOG_PATH}/tgi_service_start.log
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
sleep 1m
}
function validate_services() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local INPUT_DATA="$5"
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
if [ "$HTTP_STATUS" -eq 200 ]; then
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
echo "[ $SERVICE_NAME ] Content is as expected."
else
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
else
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
sleep 5s
}
function validate_microservices() {
# tgi for embedding service
validate_services \
"${ip_address}:8008/generate" \
"generated_text" \
"tgi" \
"codetrans-xeon-tgi-service" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
# llm microservice
validate_services \
"${ip_address}:9000/v1/chat/completions" \
"data: " \
"llm" \
"codetrans-xeon-llm-server" \
'{"query":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:"}'
}
function validate_megaservice() {
# Curl the Mega Service
validate_services \
"${ip_address}:${BACKEND_SERVICE_PORT}/v1/codetrans" \
"print" \
"mega-codetrans" \
"codetrans-xeon-backend-server" \
'{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}"}'
# test the megeservice via nginx
validate_services \
"${ip_address}:${NGINX_PORT}/v1/codetrans" \
"print" \
"mega-codetrans-nginx" \
"codetrans-xeon-nginx-server" \
'{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}"}'
}
function validate_frontend() {
cd $WORKPATH/ui/svelte
local conda_env_name="OPEA_e2e"
export PATH=${HOME}/miniforge3/bin/:$PATH
if conda info --envs | grep -q "$conda_env_name"; then
echo "$conda_env_name exist!"
else
conda create -n ${conda_env_name} python=3.12 -y
fi
source activate ${conda_env_name}
sed -i "s/localhost/$ip_address/g" playwright.config.ts
conda install -c conda-forge nodejs=22.6.0 -y
npm install && npm ci && npx playwright install --with-deps
node -v && npm -v && pip list
exit_status=0
npx playwright test || exit_status=$?
if [ $exit_status -ne 0 ]; then
echo "[TEST INFO]: ---------frontend test failed---------"
exit $exit_status
else
echo "[TEST INFO]: ---------frontend test passed---------"
fi
}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/
docker compose -f compose_tgi.yaml stop && docker compose rm -f
}
function main() {
stop_docker
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_services
validate_microservices
validate_megaservice
validate_frontend
stop_docker
echo y | docker system prune
}
main

View File

@@ -51,16 +51,20 @@ Since the `compose.yaml` will consume some environment variables, you need to se
export host_ip=$(hostname -I | awk '{print $1}')
# Example: no_proxy="localhost,127.0.0.1,192.168.1.1"
export no_proxy=${no_proxy},${host_ip}
export no_proxy=${no_proxy},${host_ip},dbqna-xeon-react-ui-server,text2sql-service,vllm-service,tgi-service
# If you are in a proxy environment, also set the proxy-related environment variables:
export http_proxy=${http_proxy}
export https_proxy=${https_proxy}
# Set other required variables
#TGI Service
export TGI_PORT=8008
export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_PORT}
#vLLM Sercice
export LLM_ENDPOINT_PORT=8008
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
export POSTGRES_USER=postgres
@@ -89,6 +93,13 @@ cd GenAIExamples/DBQnA/docker_compose/intel/cpu/xeon
docker compose up -d
```
or use vLLM service
```bash
cd GenAIExamples/DBQnA/docker_compose/intel/cpu/xeon
docker compose -f compose_vllm.yaml up -d
```
#### 2.2.2 Alternatively we can start the microservices by running individual docker services
**NOTE:** Make sure all the individual docker services are down before starting them.
@@ -108,7 +119,7 @@ docker run --name test-text2sql-postgres --ipc=host -e POSTGRES_USER=${POSTGRES_
```bash
docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $model
docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model
```
- Start Text-to-SQL Service
@@ -127,7 +138,9 @@ docker run -d --name="test-dbqna-react-ui-server" --ipc=host -p 5174:80 -e no_pr
## 🚀 Validate Microservices
### 3.1 TGI Service
### 3.1 TGI Service Or vllm Sercice
TGI Service
```bash
@@ -137,6 +150,13 @@ curl http://${host_ip}:$TGI_PORT/generate \
-H 'Content-Type: application/json'
```
vllm Sercice
````bash
curl http://${host_ip}:8008/v1/chat/completions \
-X POST \
-d '{"model":"mistralai/Mistral-7B-Instruct-v0.3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
-H 'Content-Type: application/json'
### 3.2 Postgres Microservice
Once Text-to-SQL microservice is started, user can use below command
@@ -147,7 +167,7 @@ Once Text-to-SQL microservice is started, user can use below command
curl --location http://${host_ip}:9090/v1/postgres/health \
--header 'Content-Type: application/json' \
--data '{"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${host_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}'
```
````
#### 3.2.2 Invoke the microservice.

View File

@@ -8,7 +8,7 @@ services:
ports:
- "8008:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}

View File

@@ -0,0 +1,67 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
vllm-service:
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
container_name: vllm-service
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "${DATA_PATH:-./data}:/data"
shm_size: 128g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL_ID: ${LLM_MODEL_ID}
VLLM_TORCH_PROFILER_DIR: "${VLLM_TORCH_PROFILER_DIR:-/mnt}"
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
postgres:
image: postgres:latest
container_name: postgres-container
restart: always
environment:
- POSTGRES_USER=${POSTGRES_USER}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
- POSTGRES_DB=${POSTGRES_DB}
ports:
- '5442:5432'
volumes:
- ./chinook.sql:/docker-entrypoint-initdb.d/chinook.sql
text2sql-service:
image: ${REGISTRY:-opea}/text2sql:${TAG:-latest}
container_name: text2sql-service
ports:
- "9090:8080"
environment:
- TGI_LLM_ENDPOINT=${LLM_ENDPOINT}
dbqna-xeon-react-ui-server:
image: ${REGISTRY:-opea}/text2sql-react-ui:${TAG:-latest}
container_name: dbqna-xeon-react-ui-server
depends_on:
- text2sql-service
ports:
- "5174:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -0,0 +1,137 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -xe
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH
git clone https://github.com/vllm-project/vllm.git
cd ./vllm/
VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )"
echo "Check out vLLM tag ${VLLM_VER}"
git checkout ${VLLM_VER} &> /dev/null
docker build --no-cache -f Dockerfile.cpu -t ${REGISTRY:-opea}/vllm:${TAG:-latest} --shm-size=128g .
if [ $? -ne 0 ]; then
echo "opea/vllm built fail"
exit 1
else
echo "opea/vllm built successful"
fi
cd $WORKPATH/docker_image_build
git clone --single-branch --branch "${opea_branch:-"main"}" https://github.com/opea-project/GenAIComps.git
echo "Build all the images with --no-cache, check docker_image_build.log for details... #vllm-service"
service_list="text2sql text2sql-react-ui"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
}
function start_service() {
cd $WORKPATH/docker_compose/intel/cpu/xeon
export model="mistralai/Mistral-7B-Instruct-v0.3"
export LLM_MODEL_ID=${model}
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export POSTGRES_USER=postgres
export POSTGRES_PASSWORD=testpwd
export POSTGRES_DB=chinook
export TEXT2SQL_PORT=9090
export LLM_ENDPOINT_PORT=8008
export LLM_ENDPOINT="http://${ip_address}:${LLM_ENDPOINT_PORT}"
# Start Docker Containers
docker compose -f compose_vllm.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
# check whether vLLM is fully ready.
n=0
until [[ "$n" -ge 100 ]]; do
docker logs vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1
if grep -q complete ${LOG_PATH}/vllm_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
}
function validate_microservice() {
result=$(http_proxy="" curl --connect-timeout 5 --max-time 120000 http://${ip_address}:$TEXT2SQL_PORT/v1/text2sql\
-X POST \
-d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${ip_address}'", "port": "5442", "database": "'${POSTGRES_DB}'" }}' \
-H 'Content-Type: application/json')
if [[ $result == *"output"* ]]; then
echo $result
echo "Result correct."
else
echo "Result wrong. Received was $result"
docker logs text2sql-service > ${LOG_PATH}/text2sql.log
docker logs tgi-service > ${LOG_PATH}/tgi.log
exit 1
fi
}
function validate_frontend() {
echo "[ TEST INFO ]: --------- frontend test started ---------"
cd $WORKPATH/ui/react
local conda_env_name="OPEA_e2e"
export PATH=${HOME}/miniforge3/bin/:$PATH
if conda info --envs | grep -q "$conda_env_name"; then
echo "$conda_env_name exist!"
else
conda create -n ${conda_env_name} python=3.12 -y
fi
source activate ${conda_env_name}
echo "[ TEST INFO ]: --------- conda env activated ---------"
conda install -c conda-forge nodejs=22.6.0 -y
npm install && npm ci
node -v && npm -v && pip list
exit_status=0
npm run test || exit_status=$?
if [ $exit_status -ne 0 ]; then
echo "[TEST INFO]: ---------frontend test failed---------"
exit $exit_status
else
echo "[TEST INFO]: ---------frontend test passed---------"
fi
}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/cpu/xeon
docker compose stop && docker compose rm -f
}
function main() {
stop_docker
build_docker_images
start_service
validate_microservice
validate_frontend
stop_docker
echo y | docker system prune
}
main

View File

@@ -1,8 +1,48 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
ARG BASE_TAG=latest
FROM opea/comps-base:$BASE_TAG
# Stage 1: base setup used by other stages
FROM python:3.11-slim AS base
# get security updates
RUN apt-get update && apt-get upgrade -y && \
apt-get clean && rm -rf /var/lib/apt/lists/*
ENV HOME=/home/user
RUN useradd -m -s /bin/bash user && \
mkdir -p $HOME && \
chown -R user $HOME
WORKDIR $HOME
# Stage 2: latest GenAIComps sources
FROM base AS git
RUN apt-get update && apt-get install -y --no-install-recommends git
RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git
# Stage 3: common layer shared by services using GenAIComps
FROM base AS comps-base
# copy just relevant parts
COPY --from=git $HOME/GenAIComps/comps $HOME/GenAIComps/comps
COPY --from=git $HOME/GenAIComps/*.* $HOME/GenAIComps/LICENSE $HOME/GenAIComps/
WORKDIR $HOME/GenAIComps
RUN pip install --no-cache-dir --upgrade pip setuptools && \
pip install --no-cache-dir -r $HOME/GenAIComps/requirements.txt
WORKDIR $HOME
ENV PYTHONPATH=$PYTHONPATH:$HOME/GenAIComps
USER user
# Stage 4: unique part
FROM comps-base
COPY ./retrieval_tool.py $HOME/retrieval_tool.py

View File

@@ -38,7 +38,7 @@ services:
ports:
- "6006:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -96,7 +96,7 @@ services:
ports:
- "8808:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}

View File

@@ -34,7 +34,7 @@ services:
ports:
- "8090:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
runtime: habana
cap_add:
- SYS_NICE
@@ -95,7 +95,7 @@ services:
ports:
- "8808:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}

View File

@@ -21,7 +21,7 @@ services:
timeout: 10s
retries: 100
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
shm_size: 1g
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}

View File

@@ -1,8 +1,48 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
ARG BASE_TAG=latest
FROM opea/comps-base:$BASE_TAG
# Stage 1: base setup used by other stages
FROM python:3.11-slim AS base
# get security updates
RUN apt-get update && apt-get upgrade -y && \
apt-get clean && rm -rf /var/lib/apt/lists/*
ENV HOME=/home/user
RUN useradd -m -s /bin/bash user && \
mkdir -p $HOME && \
chown -R user $HOME
WORKDIR $HOME
# Stage 2: latest GenAIComps sources
FROM base AS git
RUN apt-get update && apt-get install -y --no-install-recommends git
RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git
# Stage 3: common layer shared by services using GenAIComps
FROM base AS comps-base
# copy just relevant parts
COPY --from=git $HOME/GenAIComps/comps $HOME/GenAIComps/comps
COPY --from=git $HOME/GenAIComps/*.* $HOME/GenAIComps/LICENSE $HOME/GenAIComps/
WORKDIR $HOME/GenAIComps
RUN pip install --no-cache-dir --upgrade pip setuptools && \
pip install --no-cache-dir -r $HOME/GenAIComps/requirements.txt
WORKDIR $HOME
ENV PYTHONPATH=$PYTHONPATH:$HOME/GenAIComps
USER user
# Stage 4: unique part
FROM comps-base
COPY ./chatqna.py $HOME/chatqna.py

View File

@@ -37,8 +37,7 @@ RUN mkdir -p /home/user/gradio_cache
ENV GRADIO_TEMP_DIR=/home/user/gradio_cache
WORKDIR /home/user/edgecraftrag
RUN pip install --no-cache-dir --upgrade pip setuptools==70.0.0 && \
pip install --no-cache-dir -r requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
WORKDIR /home/user/

View File

@@ -1,6 +1,6 @@
docx2txt
faiss-cpu>=1.8.0.post1
langchain-core>=0.2.29
langchain-core==0.2.29
llama-index>=0.11.0
llama-index-embeddings-openvino>=0.4.0
llama-index-llms-openai-like>=0.2.0
@@ -9,7 +9,7 @@ llama-index-postprocessor-openvino-rerank>=0.3.0
llama-index-readers-file>=0.4.0
llama-index-retrievers-bm25>=0.3.0
llama-index-vector-stores-faiss>=0.2.1
opea-comps>=1.2
opea-comps>=0.9
pillow>=10.4.0
python-docx==1.1.2
unstructured==0.16.11

View File

@@ -15,7 +15,7 @@ RUN mkdir -p /home/user/gradio_cache
ENV GRADIO_TEMP_DIR=/home/user/gradio_cache
WORKDIR /home/user/ui
RUN pip install --no-cache-dir --upgrade pip setuptools==70.0.0 && \
RUN pip install --no-cache-dir --upgrade pip setuptools && \
pip install --no-cache-dir -r requirements.txt
USER user

View File

@@ -1,8 +1,48 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
ARG BASE_TAG=latest
FROM opea/comps-base:$BASE_TAG
# Stage 1: base setup used by other stages
FROM python:3.11-slim AS base
# get security updates
RUN apt-get update && apt-get upgrade -y && \
apt-get clean && rm -rf /var/lib/apt/lists/*
ENV HOME=/home/user
RUN useradd -m -s /bin/bash user && \
mkdir -p $HOME && \
chown -R user $HOME
WORKDIR $HOME
# Stage 2: latest GenAIComps sources
FROM base AS git
RUN apt-get update && apt-get install -y --no-install-recommends git
RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git
# Stage 3: common layer shared by services using GenAIComps
FROM base AS comps-base
# copy just relevant parts
COPY --from=git $HOME/GenAIComps/comps $HOME/GenAIComps/comps
COPY --from=git $HOME/GenAIComps/*.* $HOME/GenAIComps/LICENSE $HOME/GenAIComps/
WORKDIR $HOME/GenAIComps
RUN pip install --no-cache-dir --upgrade pip setuptools && \
pip install --no-cache-dir -r $HOME/GenAIComps/requirements.txt
WORKDIR $HOME
ENV PYTHONPATH=$PYTHONPATH:$HOME/GenAIComps
USER user
# Stage 4: unique part
FROM comps-base
COPY ./faqgen.py $HOME/faqgen.py

View File

@@ -8,7 +8,7 @@ services:
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}

File diff suppressed because one or more lines are too long

View File

@@ -72,7 +72,7 @@ Here is an example of `Nike 2023` pdf.
# download pdf file
wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf
# upload pdf file with dataprep
curl -X POST "http://${host_ip}:11103/v1/dataprep/ingest" \
curl -X POST "http://${host_ip}:6004/v1/dataprep/ingest" \
-H "Content-Type: multipart/form-data" \
-F "files=@./nke-10k-2023.pdf"
```
@@ -80,7 +80,8 @@ curl -X POST "http://${host_ip}:11103/v1/dataprep/ingest" \
```bash
curl http://${host_ip}:8888/v1/graphrag \
-H "Content-Type: application/json" \
-d '{"messages": [{"role": "user","content": "where do Nike subsidiaries operate?
-d '{
"model": "gpt-4o-mini","messages": [{"role": "user","content": "What is the revenue of Nike in 2023?
"}]}'
```

View File

@@ -5,65 +5,52 @@ services:
neo4j-apoc:
image: neo4j:latest
container_name: neo4j-apoc
ports:
- "${NEO4J_PORT1:-7474}:7474"
- "${NEO4J_PORT2:-7687}:7687"
volumes:
- ./data/neo4j/logs:/logs
- ./data/neo4j/config:/config
- ./data/neo4j/data:/data
- ./data/neo4j/plugins:/plugins
- /$HOME/neo4j/logs:/logs
- /$HOME/neo4j/config:/config
- /$HOME/neo4j/data:/data
- /$HOME/neo4j/plugins:/plugins
ipc: host
environment:
- no_proxy=${no_proxy}
- http_proxy=${http_proxy}
- https_proxy=${https_proxy}
- NEO4J_AUTH=${NEO4J_USERNAME}/${NEO4J_PASSWORD}
- NEO4J_PLUGINS=["apoc"]
- NEO4J_apoc_export_file_enabled=true
- NEO4J_apoc_import_file_enabled=true
- NEO4J_apoc_import_file_use__neo4j__config=true
- NEO4J_dbms_security_procedures_unrestricted=apoc.\*
- NEO4J_server_bolt_advertised__address=localhost:${NEO4J_PORT2}
restart: always
healthcheck:
test: wget http://localhost:7474 || exit 1
interval: 5s
timeout: 10s
retries: 20
start_period: 3s
tei-embedding-serving:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-serving
entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate"
ports:
- "${TEI_EMBEDDER_PORT:-12000}:80"
- "7474:7474"
- "7687:7687"
restart: always
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-server
ports:
- "6006:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
NO_PROXY: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
host_ip: ${host_ip}
HF_TOKEN: ${HF_TOKEN}
healthcheck:
test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"]
interval: 10s
timeout: 6s
retries: 48
tgi-gaudi-server:
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
ipc: host
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
tgi-gaudi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
container_name: tgi-gaudi-server
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
- "6005:80"
volumes:
- "${DATA_PATH:-./data}:/data"
- "./data:/data"
environment:
no_proxy: ${no_proxy}
NO_PROXY: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
@@ -73,44 +60,63 @@ services:
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS:-2048}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS:-4096}
TEXT_GENERATION_SERVER_IGNORE_EOS_TOKEN: false
runtime: habana
cap_add:
- SYS_NICE
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${LLM_MODEL_ID}
command: --model-id ${LLM_MODEL_ID} --max-input-length 6000 --max-total-tokens 8192
dataprep-neo4j-llamaindex:
image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
container_name: dataprep-neo4j-llamaindex
container_name: dataprep-neo4j-server
depends_on:
neo4j-apoc:
condition: service_healthy
tgi-gaudi-server:
condition: service_healthy
tei-embedding-serving:
condition: service_healthy
- neo4j-apoc
- tgi-gaudi-service
- tei-embedding-service
ports:
- "${DATAPREP_PORT:-11103}:5000"
- "6004:5000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
host_ip: ${host_ip}
DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_NEO4J_LLAMAINDEX"
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
NEO4J_URL: ${NEO4J_URL}
NEO4J_USERNAME: ${NEO4J_USERNAME}
NEO4J_PASSWORD: ${NEO4J_PASSWORD}
DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_NEO4J_LLAMAINDEX"
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
OPENAI_API_KEY: ${OPENAI_API_KEY}
OPENAI_EMBEDDING_MODEL: ${OPENAI_EMBEDDING_MODEL}
OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL}
EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
LLM_MODEL_ID: ${LLM_MODEL_ID}
MAX_OUTPUT_TOKENS: ${MAX_OUTPUT_TOKENS}
LOGFLAG: ${LOGFLAG}
restart: unless-stopped
retriever-neo4j-llamaindex:
image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
container_name: retriever-neo4j-server
depends_on:
- neo4j-apoc
- tgi-gaudi-service
- tei-embedding-service
ports:
- "7000:7000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
host_ip: ${host_ip}
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
NEO4J_URI: ${NEO4J_URL}
NEO4J_USERNAME: ${NEO4J_USERNAME}
NEO4J_PASSWORD: ${NEO4J_PASSWORD}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
OPENAI_API_KEY: ${OPENAI_API_KEY}
@@ -118,61 +124,29 @@ services:
OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL}
EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
LLM_MODEL_ID: ${LLM_MODEL_ID}
MAX_OUTPUT_TOKENS: ${MAX_OUTPUT_TOKENS}
LOGFLAG: ${LOGFLAG}
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
HF_TOKEN: ${HF_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS:-4096}
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_NEO4J"
restart: unless-stopped
retriever-neo4j:
image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
container_name: retriever-neo4j
ports:
- "${RETRIEVER_PORT:-7000}:7000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
LOGFLAG: ${LOGFLAG:-False}
RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_NEO4J}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
LLM_MODEL_ID: ${LLM_MODEL_ID}
NEO4J_URI: ${NEO4J_URI}
NEO4J_URL: ${NEO4J_URI}
NEO4J_USERNAME: ${NEO4J_USERNAME}
NEO4J_PASSWORD: ${NEO4J_PASSWORD}
VDMS_USE_CLIP: 0
host_ip: ${host_ip}
depends_on:
neo4j-apoc:
condition: service_healthy
tei-embedding-serving:
condition: service_healthy
tgi-gaudi-server:
condition: service_healthy
graphrag-gaudi-backend-server:
image: ${REGISTRY:-opea}/graphrag:${TAG:-latest}
container_name: graphrag-gaudi-backend-server
depends_on:
- neo4j-apoc
- tei-embedding-serving
- retriever-neo4j
- tgi-gaudi-server
- tei-embedding-service
- retriever-neo4j-llamaindex
- tgi-gaudi-service
ports:
- "8888:8888"
- "${MEGA_SERVICE_PORT:-8888}:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=graphrag-gaudi-backend-server
- RETRIEVER_SERVICE_HOST_IP=retriever-neo4j
- RETRIEVER_SERVICE_HOST_IP=retriever-neo4j-llamaindex
- RETRIEVER_SERVICE_PORT=7000
- LLM_SERVER_HOST_IP=tgi-gaudi-server
- LLM_SERVER_PORT=80
- LLM_SERVER_HOST_IP=tgi-gaudi-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LLM_MODEL_ID=${LLM_MODEL_ID}
- LOGFLAG=${LOGFLAG}
ipc: host

View File

@@ -10,25 +10,16 @@ pushd "../../../../../" > /dev/null
source .set_env.sh
popd > /dev/null
export TEI_EMBEDDER_PORT=11633
export LLM_ENDPOINT_PORT=11634
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export OPENAI_EMBEDDING_MODEL="text-embedding-3-small"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
export OPENAI_LLM_MODEL="gpt-4o"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export NEO4J_PORT1=11631
export NEO4J_PORT2=11632
export NEO4J_URI="bolt://${host_ip}:${NEO4J_PORT2}"
export NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}"
export NEO4J_USERNAME="neo4j"
export NEO4J_PASSWORD="neo4jtest"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
export TGI_LLM_ENDPOINT="http://${host_ip}:6005"
export NEO4J_URL="bolt://${host_ip}:7687"
export NEO4J_USERNAME=neo4j
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
export LOGFLAG=True
export MAX_INPUT_TOKENS=4096
export MAX_TOTAL_TOKENS=8192
export DATA_PATH="/mnt/nvme2n1/hf_cache"
export DATAPREP_PORT=11103
export RETRIEVER_PORT=11635
export RETRIEVER_SERVICE_PORT=80
export LLM_SERVER_PORT=80
export MAX_OUTPUT_TOKENS=1024

View File

@@ -12,7 +12,7 @@ export TAG=${IMAGE_TAG}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
export host_ip=$(hostname -I | awk '{print $1}')
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
opea_branch=${opea_branch:-"main"}
@@ -33,38 +33,25 @@ function build_docker_images() {
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
docker images && sleep 1s
}
function start_services() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export TEI_EMBEDDER_PORT=11633
export LLM_ENDPOINT_PORT=11634
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export OPENAI_EMBEDDING_MODEL="text-embedding-3-small"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
export OPENAI_LLM_MODEL="gpt-4o"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export NEO4J_PORT1=11631
export NEO4J_PORT2=11632
export NEO4J_URI="bolt://${host_ip}:${NEO4J_PORT2}"
export NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}"
export NEO4J_USERNAME="neo4j"
export NEO4J_PASSWORD="neo4jtest"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
export LOGFLAG=True
export MAX_INPUT_TOKENS=4096
export MAX_TOTAL_TOKENS=8192
export DATAPREP_PORT=11103
export RETRIEVER_PORT=11635
export MEGA_SERVICE_PORT=8888
export NEO4J_URL="bolt://${ip_address}:7687"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006"
export TGI_LLM_ENDPOINT="http://${ip_address}:6005"
export host_ip=${ip_address}
export LOGFLAG=true
export MAX_OUTPUT_TOKENS="1024"
unset OPENAI_API_KEY
# Start Docker Containers
@@ -129,7 +116,7 @@ function validate_microservices() {
# validate neo4j-apoc
validate_service \
"${host_ip}:${NEO4J_PORT1}" \
"${ip_address}:7474" \
"200 OK" \
"neo4j-apoc" \
"neo4j-apoc" \
@@ -137,46 +124,45 @@ function validate_microservices() {
# tei for embedding service
validate_service \
"${host_ip}:${TEI_EMBEDDER_PORT}/embed" \
"${ip_address}:6006/embed" \
"[[" \
"tei-embedding-service" \
"tei-embedding-serving" \
"tei-embedding-server" \
'{"inputs":"What is Deep Learning?"}'
sleep 1m # retrieval can't curl as expected, try to wait for more time
# tgi for llm service
validate_service \
"${host_ip}:${LLM_ENDPOINT_PORT}/generate" \
"generated_text" \
"tgi-gaudi-service" \
"tgi-gaudi-server" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
# test /v1/dataprep/ingest graph extraction
echo "Like many companies in the O&G sector, the stock of Chevron (NYSE:CVX) has declined about 10% over the past 90-days despite the fact that Q2 consensus earnings estimates have risen sharply (~25%) during that same time frame. Over the years, Chevron has kept a very strong balance sheet. FirstEnergy (NYSE:FE Get Rating) posted its earnings results on Tuesday. The utilities provider reported $0.53 earnings per share for the quarter, topping the consensus estimate of $0.52 by $0.01, RTT News reports. FirstEnergy had a net margin of 10.85% and a return on equity of 17.17%. The Dáil was almost suspended on Thursday afternoon after Sinn Féin TD John Brady walked across the chamber and placed an on-call pager in front of the Minister for Housing Darragh OBrien during a debate on retained firefighters. Mr OBrien said Mr Brady had taken part in an act of theatre that was obviously choreographed.Around 2,000 retained firefighters around the country staged a second day of industrial action on Tuesday and are due to start all out-strike action from next Tuesday. The mostly part-time workers, who keep the services going outside of Irelands larger urban centres, are taking industrial action in a dispute over pay and working conditions. Speaking in the Dáil, Sinn Féin deputy leader Pearse Doherty said firefighters had marched on Leinster House today and were very angry at the fact the Government will not intervene. Reintroduction of tax relief on mortgages needs to be considered, OBrien says. Martin withdraws comment after saying People Before Profit would put the jackboot on people Taoiseach propagated fears farmers forced to rewet land due to nature restoration law Cairns An intervention is required now. Im asking you to make an improved offer in relation to pay for retained firefighters, Mr Doherty told the housing minister.Im also asking you, and challenging you, to go outside after this Order of Business and meet with the firefighters because they are just fed up to the hilt in relation to what you said.Some of them have handed in their pagers to members of the Opposition and have challenged you to wear the pager for the next number of weeks, put up with an €8,600 retainer and not leave your community for the two and a half kilometres and see how you can stand over those type of pay and conditions. At this point, Mr Brady got up from his seat, walked across the chamber and placed the pager on the desk in front of Mr OBrien. Ceann Comhairle Seán Ó Fearghaíl said the Sinn Féin TD was completely out of order and told him not to carry out a charade in this House, adding it was absolutely outrageous behaviour and not to be encouraged.Mr OBrien said Mr Brady had engaged in an act of theatre here today which was obviously choreographed and was then interrupted with shouts from the Opposition benches. Mr Ó Fearghaíl said he would suspend the House if this racket continues.Mr OBrien later said he said he was confident the dispute could be resolved and he had immense regard for firefighters. The minister said he would encourage the unions to re-engage with the States industrial relations process while also accusing Sinn Féin of using the issue for their own political gain." > $LOG_PATH/dataprep_file.txt
validate_service \
"http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/ingest" \
"http://${ip_address}:6004/v1/dataprep/ingest" \
"Data preparation succeeded" \
"extract_graph_neo4j" \
"dataprep-neo4j-llamaindex"
"dataprep-neo4j-server"
sleep 2m
# retrieval microservice
validate_service \
"${host_ip}:${RETRIEVER_PORT}/v1/retrieval" \
"documents" \
"${ip_address}:7000/v1/retrieval" \
"retrieved_docs" \
"retriever_community_answers_neo4j" \
"retriever-neo4j" \
"{\"messages\": [{\"role\": \"user\",\"content\": \"Who is John Brady and has he had any confrontations?\"}]}"
"retriever-neo4j-server" \
"{\"model\": \"gpt-4o-mini\",\"messages\": [{\"role\": \"user\",\"content\": \"Who is John Brady and has he had any confrontations?\"}]}"
}
# tgi for llm service
validate_service \
"${ip_address}:6005/generate" \
"generated_text" \
"tgi-gaudi-service" \
"tgi-gaudi-server" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
}
function validate_megaservice() {
# Curl the Mega Service
validate_service \
"${host_ip}:${MEGA_SERVICE_PORT}/v1/graphrag" \
"${ip_address}:8888/v1/graphrag" \
"data: " \
"graphrag-megaservice" \
"graphrag-gaudi-backend-server" \
@@ -195,7 +181,7 @@ function validate_frontend() {
fi
source activate ${conda_env_name}
sed -i "s/localhost/$host_ip/g" playwright.config.ts
sed -i "s/localhost/$ip_address/g" playwright.config.ts
conda install -c conda-forge nodejs=22.6.0 -y
npm install && npm ci && npx playwright install --with-deps

View File

@@ -1,23 +1,21 @@
# Finetuning
# Instruction Tuning
This example includes instruction tuning and rerank model finetuning. Instruction tuning is the process of further training LLMs on a dataset consisting of (instruction, output) pairs in a supervised fashion, which bridges the gap between the next-word prediction objective of LLMs and the users' objective of having LLMs adhere to human instructions. Rerank model finetuning is the process of further training rerank model on a dataset for improving its capability on specific field. The implementation of this example deploys a Ray cluster for the task.
Instruction tuning is the process of further training LLMs on a dataset consisting of (instruction, output) pairs in a supervised fashion, which bridges the gap between the next-word prediction objective of LLMs and the users' objective of having LLMs adhere to human instructions. This implementation deploys a Ray cluster for the task.
## Deploy Finetuning Service
## Deploy Instruction Tuning Service
### Deploy Finetuning Service on Xeon
### Deploy Instruction Tuning Service on Xeon
Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for detail.
### Deploy Finetuning Service on Gaudi
### Deploy Instruction Tuning Service on Gaudi
Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) for detail.
## Consume Finetuning Service
## Consume Instruction Tuning Service
### 1. Upload a training file
#### Instruction tuning dataset example
Download a training file `alpaca_data.json` and upload it to the server with below command, this file can be downloaded in [here](https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json):
```bash
@@ -25,19 +23,8 @@ Download a training file `alpaca_data.json` and upload it to the server with bel
curl http://${your_ip}:8015/v1/files -X POST -H "Content-Type: multipart/form-data" -F "file=@./alpaca_data.json" -F purpose="fine-tune"
```
#### Rerank model finetuning dataset example
Download a toy example training file `toy_finetune_data.jsonl` and upload it to the server with below command, this file can be downloaded in [here](https://github.com/FlagOpen/FlagEmbedding/blob/JUNJIE99-patch-1/examples/finetune/toy_finetune_data.jsonl):
```bash
# upload a training file
curl http://${your_ip}:8015/v1/files -X POST -H "Content-Type: multipart/form-data" -F "file=@./toy_finetune_data.jsonl" -F purpose="fine-tune"
```
### 2. Create fine-tuning job
#### Instruction tuning
After a training file like `alpaca_data.json` is uploaded, use the following command to launch a finetuning job using `meta-llama/Llama-2-7b-chat-hf` as base model:
```bash
@@ -53,25 +40,6 @@ curl http://${your_ip}:8015/v1/fine_tuning/jobs \
The outputs of the finetune job (adapter_model.safetensors, adapter_config,json... ) are stored in `/home/user/comps/finetuning/src/output` and other execution logs are stored in `/home/user/ray_results`
#### Rerank model finetuning
After a training file `toy_finetune_data.jsonl` is uploaded, use the following command to launch a finetuning job using `BAAI/bge-reranker-large` as base model:
```bash
# create a finetuning job
curl http://${your_ip}:8015/v1/fine_tuning/jobs \
-X POST \
-H "Content-Type: application/json" \
-d '{
"training_file": "toy_finetune_data.jsonl",
"model": "BAAI/bge-reranker-large",
"General":{
"task":"rerank",
"lora_config":null
}
}'
```
### 3. Manage fine-tuning job
Below commands show how to list finetuning jobs, retrieve a finetuning job, cancel a finetuning job and list checkpoints of a finetuning job.

View File

@@ -1,6 +1,6 @@
# Deploy Finetuning Service on Xeon
# Deploy Instruction Tuning Service on Xeon
This document outlines the deployment process for a finetuning Service utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice on Intel Xeon server. The steps include Docker image creation, container deployment. We will publish the Docker images to Docker Hub, it will simplify the deployment process for this service.
This document outlines the deployment process for a Instruction Tuning Service utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice on Intel Xeon server. The steps include Docker image creation, container deployment. We will publish the Docker images to Docker Hub, it will simplify the deployment process for this service.
## 🚀 Build Docker Images

View File

@@ -1,6 +1,6 @@
# Deploy Finetuning Service on Gaudi
# Deploy Instruction Tuning Service on Gaudi
This document outlines the deployment process for a finetuning Service utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice on Intel Gaudi server. The steps include Docker image creation, container deployment. We will publish the Docker images to Docker Hub, it will simplify the deployment process for this service.
This document outlines the deployment process for a Instruction Tuning Service utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice on Intel Gaudi server. The steps include Docker image creation, container deployment. We will publish the Docker images to Docker Hub, it will simplify the deployment process for this service.
## 🚀 Build Docker Images

View File

@@ -0,0 +1,13 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
finetuning:
build:
args:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
no_proxy: ${no_proxy}
context: GenAIComps
dockerfile: comps/finetuning/src/Dockerfile
image: ${REGISTRY:-opea}/finetuning:${TAG:-latest}

View File

@@ -2,13 +2,13 @@
## Introduction
GenAIExamples are designed to give developers an easy entry into generative AI, featuring microservice-based samples that simplify the processes of deploying, testing, and scaling GenAI applications. All examples are fully compatible with both Docker and Kubernetes, supporting a wide range of hardware platforms such as Gaudi, Xeon, NVIDIA GPUs, and other hardwares including AMD GPUs, ensuring flexibility and efficiency for your GenAI adoption.
GenAIExamples are designed to give developers an easy entry into generative AI, featuring microservice-based samples that simplify the processes of deploying, testing, and scaling GenAI applications. All examples are fully compatible with Docker and Kubernetes, supporting a wide range of hardware platforms such as Gaudi, Xeon, and NVIDIA GPU, and other hardwares, ensuring flexibility and efficiency for your GenAI adoption.
## Architecture
[GenAIComps](https://github.com/opea-project/GenAIComps) is a service-based tool that includes microservice components such as llm, embedding, reranking, and so on. Using these components, various examples in GenAIExample can be constructed including ChatQnA, DocSum, etc.
[GenAIComps](https://github.com/opea-project/GenAIComps) is a service-based tool that includes microservice components such as llm, embedding, reranking, and so on. Using these components, various examples in GenAIExample can be constructed, including ChatQnA, DocSum, etc.
[GenAIInfra](https://github.com/opea-project/GenAIInfra) is part of the OPEA containerization and cloud-native suite and enables quick and efficient deployment of GenAIExamples in the cloud.
[GenAIInfra](https://github.com/opea-project/GenAIInfra), part of the OPEA containerization and cloud-native suite, enables quick and efficient deployment of GenAIExamples in the cloud.
[GenAIEval](https://github.com/opea-project/GenAIEval) measures service performance metrics such as throughput, latency, and accuracy for GenAIExamples. This feature helps users compare performance across various hardware configurations easily.
@@ -18,18 +18,18 @@ The GenAIExamples [documentation](https://opea-project.github.io/latest/examples
## Getting Started
GenAIExamples offers flexible deployment options that cater to different user needs, enabling efficient use and deployment in various environments. Three primary methods are presently used to do this: Python startup, Docker Compose, and Kubernetes.
GenAIExamples offers flexible deployment options that cater to different user needs, enabling efficient use and deployment in various environments. Heres a brief overview of the three primary methods: Python startup, Docker Compose, and Kubernetes.
Users can choose the most suitable approach based on ease of setup, scalability needs, and the environment in which they are operating.
### Deployment Guide
Deployment is based on released docker images by default - check [docker image list](./docker_images_list.md) for detailed information. You can also build your own images following instructions.
Deployment is based on released docker images by default, check [docker image list](./docker_images_list.md) for detailed information. You can also build your own images following instructions.
#### Prerequisite
- For Docker Compose-based deployment, you should have docker compose installed. Refer to [docker compose install](https://docs.docker.com/compose/install/) for more information.
- For Kubernetes-based deployment, you can use [Helm](https://helm.sh) or [GMC](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md)-based deployment.
- For Docker Compose based deployment, you should have docker compose installed. Refer to [docker compose install](https://docs.docker.com/compose/install/).
- For Kubernetes based deployment, you can use [Helm](https://helm.sh) or [GMC](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md) based deployment.
- You should have a kubernetes cluster ready for use. If not, you can refer to [k8s install](https://github.com/opea-project/docs/tree/main/guide/installation/k8s_install/README.md) to deploy one.
- (Optional) You should have Helm (version >= 3.15) installed if you want to deploy with Helm Charts. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
@@ -37,7 +37,7 @@ Deployment is based on released docker images by default - check [docker image l
- Recommended Hardware Reference
Based on different deployment model sizes and performance requirements, you may choose different hardware platforms or cloud instances. Here are some of the reference platforms:
Based on different deployment model size and performance requirement, you may choose different hardware platforms or cloud instances. Here are some reference platforms
| Use Case | Deployment model | Reference Configuration | Hardware access/instances |
| -------- | ------------------------- | -------------------------------------------------------------------- | ---------------------------------------------------------------------------- |
@@ -47,7 +47,7 @@ Deployment is based on released docker images by default - check [docker image l
#### Deploy Examples
> **Note**: Check for [sample guides](https://opea-project.github.io/latest/examples/index.html) first for your use case. If it is not available, then refer to the table below:
> **Note**: Check for [sample guides](https://opea-project.github.io/latest/examples/index.html) first for your use case. If it is not available, then refer to the table below.
| Use Case | Docker Compose<br/>Deployment on Xeon | Docker Compose<br/>Deployment on Gaudi | Docker Compose<br/>Deployment on ROCm | Kubernetes with Helm Charts | Kubernetes with GMC |
| ----------------- | ------------------------------------------------------------------------------ | ---------------------------------------------------------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------------------------- | ------------------------------------------------------------ |

View File

@@ -0,0 +1,61 @@
# Rerank Model Finetuning
Rerank model finetuning is the process of further training rerank model on a dataset for improving its capability on specific field.
## Deploy Rerank Model Finetuning Service
### Deploy Rerank Model Finetuning Service on Xeon
Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for detail.
### Deploy Rerank Model Finetuning Service on Gaudi
Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) for detail.
## Consume Rerank Model Finetuning Service
### 1. Upload a training file
Download a toy example training file `toy_finetune_data.jsonl` and upload it to the server with below command, this file can be downloaded in [here](https://github.com/FlagOpen/FlagEmbedding/blob/master/examples/finetune/toy_finetune_data.jsonl):
```bash
# upload a training file
curl http://${your_ip}:8015/v1/files -X POST -H "Content-Type: multipart/form-data" -F "file=@./toy_finetune_data.jsonl" -F purpose="fine-tune"
```
### 2. Create fine-tuning job
After a training file `toy_finetune_data.jsonl` is uploaded, use the following command to launch a finetuning job using `BAAI/bge-reranker-large` as base model:
```bash
# create a finetuning job
curl http://${your_ip}:8015/v1/fine_tuning/jobs \
-X POST \
-H "Content-Type: application/json" \
-d '{
"training_file": "toy_finetune_data.jsonl",
"model": "BAAI/bge-reranker-large",
"General":{
"task":"rerank",
"lora_config":null
}
}'
```
### 3. Manage fine-tuning job
Below commands show how to list finetuning jobs, retrieve a finetuning job, cancel a finetuning job and list checkpoints of a finetuning job.
```bash
# list finetuning jobs
curl http://${your_ip}:8015/v1/fine_tuning/jobs -X GET
# retrieve one finetuning job
curl http://${your_ip}:8015/v1/fine_tuning/jobs/retrieve -X POST -H "Content-Type: application/json" -d '{"fine_tuning_job_id": ${fine_tuning_job_id}}'
# cancel one finetuning job
curl http://${your_ip}:8015/v1/fine_tuning/jobs/cancel -X POST -H "Content-Type: application/json" -d '{"fine_tuning_job_id": ${fine_tuning_job_id}}'
# list checkpoints of a finetuning job
curl http://${your_ip}:8015/v1/finetune/list_checkpoints -X POST -H "Content-Type: application/json" -d '{"fine_tuning_job_id": ${fine_tuning_job_id}}'
```

View File

@@ -0,0 +1,26 @@
# Deploy Rerank Model Finetuning Service on Xeon
This document outlines the deployment process for a rerank model finetuning service utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice on Intel Xeon server. The steps include Docker image creation, container deployment. We will publish the Docker images to Docker Hub, it will simplify the deployment process for this service.
## 🚀 Build Docker Images
First of all, you need to build Docker Images locally. This step can be ignored after the Docker images published to Docker hub.
### 1. Build Docker Image
Build docker image with below command:
```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
export HF_TOKEN=${your_huggingface_token}
docker build -t opea/finetuning:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg HF_TOKEN=$HF_TOKEN -f comps/finetuning/src/Dockerfile .
```
### 2. Run Docker with CLI
Start docker container with below command:
```bash
docker run -d --name="finetuning-server" -p 8015:8015 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/finetuning:latest
```

View File

@@ -0,0 +1,26 @@
# Deploy Rerank Model Finetuning Service on Gaudi
This document outlines the deployment process for a rerank model finetuning service utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice on Intel Xeon server. The steps include Docker image creation, container deployment. We will publish the Docker images to Docker Hub, it will simplify the deployment process for this service.
## 🚀 Build Docker Images
First of all, you need to build Docker Images locally. This step can be ignored after the Docker images published to Docker hub.
### 1. Build Docker Image
Build docker image with below command:
```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
docker build -t opea/finetuning-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/finetuning/src/Dockerfile.intel_hpu .
```
### 2. Run Docker with CLI
Start docker container with below command:
```bash
export HF_TOKEN=${your_huggingface_token}
docker run --runtime=habana -e HABANA_VISIBLE_DEVICES=all -p 8015:8015 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host -e https_proxy=$https_proxy -e http_proxy=$http_proxy -e no_proxy=$no_proxy -e HF_TOKEN=$HF_TOKEN opea/finetuning-gaudi:latest
```

View File

@@ -0,0 +1,131 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -x
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
finetuning_service_port=8015
ray_port=8265
service_name=finetuning-gaudi
function build_docker_images() {
cd $WORKPATH/docker_image_build
if [ ! -d "GenAIComps" ] ; then
git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git
fi
docker compose -f build.yaml build ${service_name} --no-cache > ${LOG_PATH}/docker_image_build.log
}
function start_service() {
export no_proxy="localhost,127.0.0.1,"${ip_address}
docker run -d --name="finetuning-server" -p $finetuning_service_port:$finetuning_service_port -p $ray_port:$ray_port --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy ${IMAGE_REPO}/finetuning-gaudi:${IMAGE_TAG}
sleep 1m
}
function validate_microservice() {
cd $LOG_PATH
export no_proxy="localhost,127.0.0.1,"${ip_address}
# test /v1/dataprep upload file
URL="http://${ip_address}:$finetuning_service_port/v1/files"
cat <<EOF > test_data.json
{"query": "Five women walk along a beach wearing flip-flops.", "pos": ["Some women with flip-flops on, are walking along the beach"], "neg": ["The 4 women are sitting on the beach.", "There was a reform in 1996.", "She's not going to court to clear her record.", "The man is talking about hawaii.", "A woman is standing outside.", "The battle was over. ", "A group of people plays volleyball."]}
{"query": "A woman standing on a high cliff on one leg looking over a river.", "pos": ["A woman is standing on a cliff."], "neg": ["A woman sits on a chair.", "George Bush told the Republicans there was no way he would let them even consider this foolish idea, against his top advisors advice.", "The family was falling apart.", "no one showed up to the meeting", "A boy is sitting outside playing in the sand.", "Ended as soon as I received the wire.", "A child is reading in her bedroom."]}
{"query": "Two woman are playing instruments; one a clarinet, the other a violin.", "pos": ["Some people are playing a tune."], "neg": ["Two women are playing a guitar and drums.", "A man is skiing down a mountain.", "The fatal dose was not taken when the murderer thought it would be.", "Person on bike", "The girl is standing, leaning against the archway.", "A group of women watch soap operas.", "No matter how old people get they never forget. "]}
{"query": "A girl with a blue tank top sitting watching three dogs.", "pos": ["A girl is wearing blue."], "neg": ["A girl is with three cats.", "The people are watching a funeral procession.", "The child is wearing black.", "Financing is an issue for us in public schools.", "Kids at a pool.", "It is calming to be assaulted.", "I face a serious problem at eighteen years old. "]}
{"query": "A yellow dog running along a forest path.", "pos": ["a dog is running"], "neg": ["a cat is running", "Steele did not keep her original story.", "The rule discourages people to pay their child support.", "A man in a vest sits in a car.", "Person in black clothing, with white bandanna and sunglasses waits at a bus stop.", "Neither the Globe or Mail had comments on the current state of Canada's road system. ", "The Spring Creek facility is old and outdated."]}
{"query": "It sets out essential activities in each phase along with critical factors related to those activities.", "pos": ["Critical factors for essential activities are set out."], "neg": ["It lays out critical activities but makes no provision for critical factors related to those activities.", "People are assembled in protest.", "The state would prefer for you to do that.", "A girl sits beside a boy.", "Two males are performing.", "Nobody is jumping", "Conrad was being plotted against, to be hit on the head."]}
EOF
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'file=@./test_data.json' -F purpose="fine-tune" -H 'Content-Type: multipart/form-data' "$URL")
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
SERVICE_NAME="finetuning-server - upload - file"
# Parse the JSON response
purpose=$(echo "$RESPONSE_BODY" | jq -r '.purpose')
filename=$(echo "$RESPONSE_BODY" | jq -r '.filename')
# Define expected values
expected_purpose="fine-tune"
expected_filename="test_data.json"
if [ "$HTTP_STATUS" -ne "200" ]; then
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_upload_file.log
exit 1
else
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
fi
# Check if the parsed values match the expected values
if [[ "$purpose" != "$expected_purpose" || "$filename" != "$expected_filename" ]]; then
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_upload_file.log
exit 1
else
echo "[ $SERVICE_NAME ] Content is as expected."
fi
# test /v1/fine_tuning/jobs
URL="http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs"
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' -d '{"training_file": "test_data.json","model": "BAAI/bge-reranker-base","General":{"task":"rerank","lora_config":null}}' "$URL")
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
SERVICE_NAME="finetuning-server - create finetuning job"
if [ "$HTTP_STATUS" -ne "200" ]; then
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_create.log
exit 1
else
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
fi
if [[ "$RESPONSE_BODY" != *'{"id":"ft-job'* ]]; then
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_create.log
exit 1
else
echo "[ $SERVICE_NAME ] Content is as expected."
fi
sleep 3m
docker logs finetuning-server 2>&1 | tee ${LOG_PATH}/finetuning-server_create.log
FINETUNING_LOG=$(grep "succeeded" ${LOG_PATH}/finetuning-server_create.log)
if [[ "$FINETUNING_LOG" != *'succeeded'* ]]; then
echo "Finetuning failed."
RAY_JOBID=$(grep "Submitted Ray job" ${LOG_PATH}/finetuning-server_create.log | sed 's/.*raysubmit/raysubmit/' | cut -d' ' -f 1)
docker exec finetuning-server python -c "import os;os.environ['RAY_ADDRESS']='http://localhost:8265';from ray.job_submission import JobSubmissionClient;client = JobSubmissionClient();print(client.get_job_logs('${RAY_JOBID}'))" 2>&1 | tee ${LOG_PATH}/finetuning.log
exit 1
else
echo "Finetuning succeeded."
fi
}
function stop_docker() {
cid=$(docker ps -aq --filter "name=finetuning-server*")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}
function main() {
stop_docker
build_docker_images
start_service
validate_microservice
stop_docker
echo y | docker system prune
}
main

View File

@@ -0,0 +1,131 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -x
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
finetuning_service_port=8015
ray_port=8265
service_name=finetuning
function build_docker_images() {
cd $WORKPATH/docker_image_build
if [ ! -d "GenAIComps" ] ; then
git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git
fi
docker compose -f build.yaml build ${service_name} --no-cache > ${LOG_PATH}/docker_image_build.log
}
function start_service() {
export no_proxy="localhost,127.0.0.1,"${ip_address}
docker run -d --name="finetuning-server" -p $finetuning_service_port:$finetuning_service_port -p $ray_port:$ray_port --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy ${IMAGE_REPO}/finetuning:${IMAGE_TAG}
sleep 1m
}
function validate_microservice() {
cd $LOG_PATH
export no_proxy="localhost,127.0.0.1,"${ip_address}
# test /v1/dataprep upload file
URL="http://${ip_address}:$finetuning_service_port/v1/files"
cat <<EOF > test_data.json
{"query": "Five women walk along a beach wearing flip-flops.", "pos": ["Some women with flip-flops on, are walking along the beach"], "neg": ["The 4 women are sitting on the beach.", "There was a reform in 1996.", "She's not going to court to clear her record.", "The man is talking about hawaii.", "A woman is standing outside.", "The battle was over. ", "A group of people plays volleyball."]}
{"query": "A woman standing on a high cliff on one leg looking over a river.", "pos": ["A woman is standing on a cliff."], "neg": ["A woman sits on a chair.", "George Bush told the Republicans there was no way he would let them even consider this foolish idea, against his top advisors advice.", "The family was falling apart.", "no one showed up to the meeting", "A boy is sitting outside playing in the sand.", "Ended as soon as I received the wire.", "A child is reading in her bedroom."]}
{"query": "Two woman are playing instruments; one a clarinet, the other a violin.", "pos": ["Some people are playing a tune."], "neg": ["Two women are playing a guitar and drums.", "A man is skiing down a mountain.", "The fatal dose was not taken when the murderer thought it would be.", "Person on bike", "The girl is standing, leaning against the archway.", "A group of women watch soap operas.", "No matter how old people get they never forget. "]}
{"query": "A girl with a blue tank top sitting watching three dogs.", "pos": ["A girl is wearing blue."], "neg": ["A girl is with three cats.", "The people are watching a funeral procession.", "The child is wearing black.", "Financing is an issue for us in public schools.", "Kids at a pool.", "It is calming to be assaulted.", "I face a serious problem at eighteen years old. "]}
{"query": "A yellow dog running along a forest path.", "pos": ["a dog is running"], "neg": ["a cat is running", "Steele did not keep her original story.", "The rule discourages people to pay their child support.", "A man in a vest sits in a car.", "Person in black clothing, with white bandanna and sunglasses waits at a bus stop.", "Neither the Globe or Mail had comments on the current state of Canada's road system. ", "The Spring Creek facility is old and outdated."]}
{"query": "It sets out essential activities in each phase along with critical factors related to those activities.", "pos": ["Critical factors for essential activities are set out."], "neg": ["It lays out critical activities but makes no provision for critical factors related to those activities.", "People are assembled in protest.", "The state would prefer for you to do that.", "A girl sits beside a boy.", "Two males are performing.", "Nobody is jumping", "Conrad was being plotted against, to be hit on the head."]}
EOF
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'file=@./test_data.json' -F purpose="fine-tune" -H 'Content-Type: multipart/form-data' "$URL")
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
SERVICE_NAME="finetuning-server - upload - file"
# Parse the JSON response
purpose=$(echo "$RESPONSE_BODY" | jq -r '.purpose')
filename=$(echo "$RESPONSE_BODY" | jq -r '.filename')
# Define expected values
expected_purpose="fine-tune"
expected_filename="test_data.json"
if [ "$HTTP_STATUS" -ne "200" ]; then
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_upload_file.log
exit 1
else
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
fi
# Check if the parsed values match the expected values
if [[ "$purpose" != "$expected_purpose" || "$filename" != "$expected_filename" ]]; then
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_upload_file.log
exit 1
else
echo "[ $SERVICE_NAME ] Content is as expected."
fi
# test /v1/fine_tuning/jobs
URL="http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs"
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' -d '{"training_file": "test_data.json","model": "BAAI/bge-reranker-base","General":{"task":"rerank","lora_config":null}}' "$URL")
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
SERVICE_NAME="finetuning-server - create finetuning job"
if [ "$HTTP_STATUS" -ne "200" ]; then
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_create.log
exit 1
else
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
fi
if [[ "$RESPONSE_BODY" != *'{"id":"ft-job'* ]]; then
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_create.log
exit 1
else
echo "[ $SERVICE_NAME ] Content is as expected."
fi
sleep 3m
docker logs finetuning-server 2>&1 | tee ${LOG_PATH}/finetuning-server_create.log
FINETUNING_LOG=$(grep "succeeded" ${LOG_PATH}/finetuning-server_create.log)
if [[ "$FINETUNING_LOG" != *'succeeded'* ]]; then
echo "Finetuning failed."
RAY_JOBID=$(grep "Submitted Ray job" ${LOG_PATH}/finetuning-server_create.log | sed 's/.*raysubmit/raysubmit/' | cut -d' ' -f 1)
docker exec finetuning-server python -c "import os;os.environ['RAY_ADDRESS']='http://localhost:8265';from ray.job_submission import JobSubmissionClient;client = JobSubmissionClient();print(client.get_job_logs('${RAY_JOBID}'))" 2>&1 | tee ${LOG_PATH}/finetuning.log
exit 1
else
echo "Finetuning succeeded."
fi
}
function stop_docker() {
cid=$(docker ps -aq --filter "name=finetuning-server*")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}
function main() {
stop_docker
build_docker_images
start_service
validate_microservice
stop_docker
echo y | docker system prune
}
main

View File

@@ -10,7 +10,7 @@ services:
ports:
- "3001:80"
volumes:
- "${MODEL_PATH:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -56,7 +56,7 @@ services:
ports:
- "3004:80"
volumes:
- "${MODEL_PATH:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -86,7 +86,7 @@ services:
ports:
- "3006:80"
volumes:
- "${MODEL_PATH:-./data}:/data"
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}

View File

@@ -9,7 +9,7 @@ services:
ports:
- "3001:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -60,7 +60,7 @@ services:
ports:
- "3004:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -96,7 +96,7 @@ services:
ports:
- "3006:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}

View File

@@ -9,7 +9,7 @@ services:
ports:
- "3001:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
@@ -67,7 +67,7 @@ services:
ports:
- "3004:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
@@ -103,7 +103,7 @@ services:
ports:
- "3006:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}

View File

@@ -9,7 +9,6 @@ echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"

View File

@@ -9,7 +9,6 @@ echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_PATH=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"

View File

@@ -9,7 +9,6 @@ echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
export MODEL_CACHE=${model_cache:-"./data"}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"

View File

@@ -21,7 +21,7 @@ services:
timeout: 10s
retries: 100
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
shm_size: 1g
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
llm:

View File

@@ -30,7 +30,7 @@ services:
- SYS_NICE
ipc: host
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
llm:
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}

View File

@@ -1,8 +1,48 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
ARG BASE_TAG=latest
FROM opea/comps-base:$BASE_TAG
# Stage 1: base setup used by other stages
FROM python:3.11-slim AS base
# get security updates
RUN apt-get update && apt-get upgrade -y && \
apt-get clean && rm -rf /var/lib/apt/lists/*
ENV HOME=/home/user
RUN useradd -m -s /bin/bash user && \
mkdir -p $HOME && \
chown -R user $HOME
WORKDIR $HOME
# Stage 2: latest GenAIComps sources
FROM base AS git
RUN apt-get update && apt-get install -y --no-install-recommends git
RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git
# Stage 3: common layer shared by services using GenAIComps
FROM base AS comps-base
# copy just relevant parts
COPY --from=git $HOME/GenAIComps/comps $HOME/GenAIComps/comps
COPY --from=git $HOME/GenAIComps/*.* $HOME/GenAIComps/LICENSE $HOME/GenAIComps/
WORKDIR $HOME/GenAIComps
RUN pip install --no-cache-dir --upgrade pip setuptools && \
pip install --no-cache-dir -r $HOME/GenAIComps/requirements.txt
WORKDIR $HOME
ENV PYTHONPATH=$PYTHONPATH:$HOME/GenAIComps
USER user
# Stage 4: unique part
FROM comps-base
COPY ./videoqna.py $HOME/videoqna.py

View File

@@ -8,7 +8,7 @@ services:
ports:
- "8399:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}

View File

@@ -8,7 +8,7 @@ services:
ports:
- "8399:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE}:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}