remove ray serve (#516)

Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
This commit is contained in:
XinyaoWa
2024-08-02 09:28:36 +08:00
committed by GitHub
parent 09a3196324
commit c71bc68c9c
3 changed files with 1 additions and 476 deletions

View File

@@ -69,20 +69,6 @@ Build microservice docker.
docker build --no-cache -t opea/llm-vllm-ray:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm-ray/docker/Dockerfile.microservice .
```
#### 5.4 Use Ray Serve
Build Ray Serve docker.
```bash
docker build --no-cache -t ray_serve:habana --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ray_serve/docker/Dockerfile.rayserve .
```
Build microservice docker.
```bash
docker build --no-cache -t opea/llm-ray:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ray_serve/docker/Dockerfile.microservice .
```
### 6. Build Dataprep Image
```bash
@@ -159,7 +145,7 @@ Then run the command `docker images`, you will have the following 8 Docker Image
1. `opea/embedding-tei:latest`
2. `opea/retriever-redis:latest`
3. `opea/reranking-tei:latest`
4. `opea/llm-tgi:latest` or `opea/llm-vllm:latest` or `opea/llm-vllm-ray:latest` or `opea/llm-ray:latest`
4. `opea/llm-tgi:latest` or `opea/llm-vllm:latest` or `opea/llm-vllm-ray:latest`
5. `opea/tei-gaudi:latest`
6. `opea/dataprep-redis:latest`
7. `opea/chatqna:latest` or `opea/chatqna-guardrails:latest`
@@ -192,7 +178,6 @@ export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
export vLLM_LLM_ENDPOINT="http://${host_ip}:8008"
export vLLM_RAY_LLM_ENDPOINT="http://${host_ip}:8008"
export RAY_Serve_LLM_ENDPOINT="http://${host_ip}:8008"
export LLM_SERVICE_PORT=9000
export REDIS_URL="redis://${host_ip}:6379"
export INDEX_NAME="rag-redis"
@@ -243,12 +228,6 @@ If use vllm-on-ray for llm backend.
docker compose -f compose_vllm_ray.yaml up -d
```
If use ray serve for llm backend.
```bash
docker compose -f compose_ray_serve.yaml up -d
```
If you want to enable guardrails microservice in the pipeline, please follow the below command instead:
```bash
@@ -342,13 +321,6 @@ curl http://${your_ip}:8008/v1/chat/completions \
-d '{"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
```
```bash
#Ray Serve Service
curl http://${your_ip}:8008/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{"model": "${LLM_MODEL_ID_NAME}", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 32 }'
```
7. LLM Microservice
```bash

View File

@@ -1,202 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
version: "3.8"
services:
redis-vector-db:
image: redis/redis-stack:7.2.0-v9
container_name: redis-vector-db
ports:
- "6379:6379"
- "8001:8001"
dataprep-redis-service:
image: opea/dataprep-redis:latest
container_name: dataprep-redis-server
depends_on:
- redis-vector-db
ports:
- "6007:6007"
- "6008:6008"
- "6009:6009"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
tei-embedding-service:
image: opea/tei-gaudi:latest
container_name: tei-embedding-gaudi-server
ports:
- "8090:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID}
embedding:
image: opea/embedding-tei:latest
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-embedding-service"
restart: unless-stopped
retriever:
image: opea/retriever-redis:latest
container_name: retriever-redis-server
depends_on:
- redis-vector-db
ports:
- "7000:7000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-retriever-service"
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
container_name: tei-reranking-gaudi-server
ports:
- "8808:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: opea/reranking-tei:latest
container_name: reranking-tei-gaudi-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-reranking-service"
restart: unless-stopped
ray-service:
image: ray_serve:habana
container_name: ray-gaudi-server
ports:
- "8008:80"
volumes:
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
LLM_MODEL: ${LLM_MODEL_ID}
TRUST_REMOTE_CODE: True
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: /bin/bash -c "ray start --head && python api_server_openai.py --port_number 80 --model_id_or_path $LLM_MODEL --chat_processor ChatModelLlama --num_cpus_per_worker 8 --num_hpus_per_worker 1"
llm:
image: opea/llm-ray:latest
container_name: llm-ray-gaudi-server
depends_on:
- ray-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
RAY_Serve_ENDPOINT: ${RAY_Serve_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL: ${LLM_MODEL_ID}
restart: unless-stopped
chaqna-gaudi-backend-server:
image: opea/chatqna:latest
container_name: chatqna-gaudi-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- ray-service
- llm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
ipc: host
restart: always
chaqna-gaudi-ui-server:
image: opea/chatqna-ui:latest
container_name: chatqna-gaudi-ui-server
depends_on:
- chaqna-gaudi-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -1,245 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -e
echo "IMAGE_REPO=${IMAGE_REPO}"
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
docker build -t opea/embedding-tei:latest -f comps/embeddings/langchain/docker/Dockerfile .
docker build -t opea/retriever-redis:latest -f comps/retrievers/langchain/redis/docker/Dockerfile .
docker build -t opea/reranking-tei:latest -f comps/reranks/tei/docker/Dockerfile .
docker build -t ray_serve:habana -f comps/llms/text-generation/ray_serve/docker/Dockerfile.rayserve .
docker build -t opea/llm-ray:latest -f comps/llms/text-generation/ray_serve/docker/Dockerfile.microservice .
docker build -t opea/dataprep-redis:latest -f comps/dataprep/redis/langchain/docker/Dockerfile .
# cd ..
# git clone https://github.com/huggingface/tei-gaudi
# cd tei-gaudi/
# docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest .
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
cd $WORKPATH/docker
docker build --no-cache -t opea/chatqna:latest -f Dockerfile .
cd $WORKPATH/docker/ui
docker build --no-cache -t opea/chatqna-ui:latest -f docker/Dockerfile .
docker images
}
function start_services() {
# build tei-gaudi for each test instead of pull from local registry
cd $WORKPATH
git clone https://github.com/huggingface/tei-gaudi
cd tei-gaudi/
docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest .
cd $WORKPATH/docker/gaudi
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:8090"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
export RAY_Serve_LLM_ENDPOINT="http://${ip_address}:8008"
export LLM_SERVICE_PORT=9000
export REDIS_URL="redis://${ip_address}:6379"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env
if [[ "$IMAGE_REPO" != "" ]]; then
# Replace the container name with a test-specific name
echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
sed -i "s#image: opea/chatqna:latest#image: opea/chatqna:${IMAGE_TAG}#g" compose_ray_serve.yaml
sed -i "s#image: opea/chatqna-ui:latest#image: opea/chatqna-ui:${IMAGE_TAG}#g" compose_ray_serve.yaml
sed -i "s#image: opea/chatqna-conversation-ui:latest#image: opea/chatqna-conversation-ui:${IMAGE_TAG}#g" compose_ray_serve.yaml
sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" compose_ray_serve.yaml
sed -i "s#image: ${IMAGE_REPO}opea/tei-gaudi:latest#image: opea/tei-gaudi:latest#g" compose_ray_serve.yaml
echo "cat compose_ray_serve.yaml"
cat compose_ray_serve.yaml
fi
# Start Docker Containers
docker compose -f compose_ray_serve.yaml up -d
n=0
until [[ "$n" -ge 180 ]]; do
docker logs ray-gaudi-server > ray_service_start.log
if grep -q Connected ray_service_start.log; then
break
fi
sleep 1s
n=$((n+1))
done
}
function validate_services() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local INPUT_DATA="$5"
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
if [ "$HTTP_STATUS" -eq 200 ]; then
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
echo "[ $SERVICE_NAME ] Content is as expected."
else
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
else
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
sleep 1s
}
function validate_microservices() {
# Check if the microservices are running correctly.
# tei for embedding service
validate_services \
"${ip_address}:8090/embed" \
"\[\[" \
"tei-embedding" \
"tei-embedding-gaudi-server" \
'{"inputs":"What is Deep Learning?"}'
# embedding microservice
validate_services \
"${ip_address}:6000/v1/embeddings" \
'"text":"What is Deep Learning?","embedding":\[' \
"embedding" \
"embedding-tei-server" \
'{"text":"What is Deep Learning?"}'
sleep 1m # retrieval can't curl as expected, try to wait for more time
# retrieval microservice
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
validate_services \
"${ip_address}:7000/v1/retrieval" \
" " \
"retrieval" \
"retriever-redis-server" \
"{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}"
# tei for rerank microservice
validate_services \
"${ip_address}:8808/rerank" \
'{"index":1,"score":' \
"tei-rerank" \
"tei-reranking-gaudi-server" \
'{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
# rerank microservice
validate_services \
"${ip_address}:8000/v1/reranking" \
"Deep learning is..." \
"rerank" \
"reranking-tei-gaudi-server" \
'{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}'
# ray for llm service
validate_services \
"${ip_address}:8008/v1/chat/completions" \
"content" \
"ray-llm" \
"ray-gaudi-server" \
'{"model": "neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 32 }'
# llm microservice
validate_services \
"${ip_address}:9000/v1/chat/completions" \
"data: " \
"llm" \
"llm-ray-gaudi-server" \
'{"query":"What is Deep Learning?"}'
}
function validate_megaservice() {
# Curl the Mega Service
validate_services \
"${ip_address}:8888/v1/chatqna" \
"billion" \
"mega-chatqna" \
"chatqna-gaudi-backend-server" \
'{"messages": "What is the revenue of Nike in 2023?"}'
}
function validate_frontend() {
cd $WORKPATH/docker/ui/svelte
local conda_env_name="OPEA_e2e"
export PATH=${HOME}/miniforge3/bin/:$PATH
# conda remove -n ${conda_env_name} --all -y
# conda create -n ${conda_env_name} python=3.12 -y
source activate ${conda_env_name}
sed -i "s/localhost/$ip_address/g" playwright.config.ts
# conda install -c conda-forge nodejs -y
npm install && npm ci && npx playwright install --with-deps
node -v && npm -v && pip list
exit_status=0
npx playwright test || exit_status=$?
if [ $exit_status -ne 0 ]; then
echo "[TEST INFO]: ---------frontend test failed---------"
exit $exit_status
else
echo "[TEST INFO]: ---------frontend test passed---------"
fi
}
function stop_docker() {
cd $WORKPATH/docker/gaudi
docker compose -f compose_ray_serve.yaml down
}
function main() {
stop_docker
if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi
start_time=$(date +%s)
start_services
end_time=$(date +%s)
duration=$((end_time-start_time))
echo "Mega service start duration is $duration s"
validate_microservices
validate_megaservice
# validate_frontend
stop_docker
echo y | docker system prune
}
main