remove ray serve (#516)

Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
2024-08-02 09:28:36 +08:00
parent 09a3196324
commit c71bc68c9c
3 changed files with 1 additions and 476 deletions
--- a/ChatQnA/docker/gaudi/README.md
+++ b/ChatQnA/docker/gaudi/README.md
@@ -69,20 +69,6 @@ Build microservice docker.
 docker build --no-cache -t opea/llm-vllm-ray:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm-ray/docker/Dockerfile.microservice .
 ```

-#### 5.4 Use Ray Serve
-
-Build Ray Serve docker.
-
-```bash
-docker build --no-cache -t ray_serve:habana --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ray_serve/docker/Dockerfile.rayserve .
-```
-
-Build microservice docker.
-
-```bash
-docker build --no-cache -t opea/llm-ray:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ray_serve/docker/Dockerfile.microservice .
-```
-
 ### 6. Build Dataprep Image

 ```bash
@@ -159,7 +145,7 @@ Then run the command `docker images`, you will have the following 8 Docker Image
 1. `opea/embedding-tei:latest`
 2. `opea/retriever-redis:latest`
 3. `opea/reranking-tei:latest`
-4. `opea/llm-tgi:latest` or `opea/llm-vllm:latest` or `opea/llm-vllm-ray:latest` or `opea/llm-ray:latest`
+4. `opea/llm-tgi:latest` or `opea/llm-vllm:latest` or `opea/llm-vllm-ray:latest`
 5. `opea/tei-gaudi:latest`
 6. `opea/dataprep-redis:latest`
 7. `opea/chatqna:latest` or `opea/chatqna-guardrails:latest`
@@ -192,7 +178,6 @@ export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
 export vLLM_LLM_ENDPOINT="http://${host_ip}:8008"
 export vLLM_RAY_LLM_ENDPOINT="http://${host_ip}:8008"
-export RAY_Serve_LLM_ENDPOINT="http://${host_ip}:8008"
 export LLM_SERVICE_PORT=9000
 export REDIS_URL="redis://${host_ip}:6379"
 export INDEX_NAME="rag-redis"
@@ -243,12 +228,6 @@ If use vllm-on-ray for llm backend.
 docker compose -f compose_vllm_ray.yaml up -d
 ```

-If use ray serve for llm backend.
-
-```bash
-docker compose -f compose_ray_serve.yaml up -d
-```
-
 If you want to enable guardrails microservice in the pipeline, please follow the below command instead:

 ```bash
@@ -342,13 +321,6 @@ curl http://${your_ip}:8008/v1/chat/completions \
  -d '{"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
 ```

-```bash
-#Ray Serve Service
-curl http://${your_ip}:8008/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{"model": "${LLM_MODEL_ID_NAME}", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 32 }'
-```
-
 7. LLM Microservice

 ```bash
--- a/ChatQnA/docker/gaudi/compose_ray_serve.yaml
+++ b/ChatQnA/docker/gaudi/compose_ray_serve.yaml
@@ -1,202 +0,0 @@
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-version: "3.8"
-
-services:
-  redis-vector-db:
-    image: redis/redis-stack:7.2.0-v9
-    container_name: redis-vector-db
-    ports:
-      - "6379:6379"
-      - "8001:8001"
-  dataprep-redis-service:
-    image: opea/dataprep-redis:latest
-    container_name: dataprep-redis-server
-    depends_on:
-      - redis-vector-db
-    ports:
-      - "6007:6007"
-      - "6008:6008"
-      - "6009:6009"
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      REDIS_URL: ${REDIS_URL}
-      INDEX_NAME: ${INDEX_NAME}
-  tei-embedding-service:
-    image: opea/tei-gaudi:latest
-    container_name: tei-embedding-gaudi-server
-    ports:
-      - "8090:80"
-    volumes:
-      - "./data:/data"
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      MAX_WARMUP_SEQUENCE_LENGTH: 512
-    command: --model-id ${EMBEDDING_MODEL_ID}
-  embedding:
-    image: opea/embedding-tei:latest
-    container_name: embedding-tei-server
-    depends_on:
-      - tei-embedding-service
-    ports:
-      - "6000:6000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
-      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
-      LANGCHAIN_PROJECT: "opea-embedding-service"
-    restart: unless-stopped
-  retriever:
-    image: opea/retriever-redis:latest
-    container_name: retriever-redis-server
-    depends_on:
-      - redis-vector-db
-    ports:
-      - "7000:7000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      REDIS_URL: ${REDIS_URL}
-      INDEX_NAME: ${INDEX_NAME}
-      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
-      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
-      LANGCHAIN_PROJECT: "opea-retriever-service"
-    restart: unless-stopped
-  tei-reranking-service:
-    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
-    container_name: tei-reranking-gaudi-server
-    ports:
-      - "8808:80"
-    volumes:
-      - "./data:/data"
-    shm_size: 1g
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
-  reranking:
-    image: opea/reranking-tei:latest
-    container_name: reranking-tei-gaudi-server
-    depends_on:
-      - tei-reranking-service
-    ports:
-      - "8000:8000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
-      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
-      LANGCHAIN_PROJECT: "opea-reranking-service"
-    restart: unless-stopped
-  ray-service:
-    image: ray_serve:habana
-    container_name: ray-gaudi-server
-    ports:
-      - "8008:80"
-    volumes:
-      - "./data:/data"
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      LLM_MODEL: ${LLM_MODEL_ID}
-      TRUST_REMOTE_CODE: True
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
-    command: /bin/bash -c "ray start --head && python api_server_openai.py --port_number 80 --model_id_or_path $LLM_MODEL --chat_processor ChatModelLlama --num_cpus_per_worker 8 --num_hpus_per_worker 1"
-  llm:
-    image: opea/llm-ray:latest
-    container_name: llm-ray-gaudi-server
-    depends_on:
-      - ray-service
-    ports:
-      - "9000:9000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      RAY_Serve_ENDPOINT: ${RAY_Serve_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      LLM_MODEL: ${LLM_MODEL_ID}
-    restart: unless-stopped
-  chaqna-gaudi-backend-server:
-    image: opea/chatqna:latest
-    container_name: chatqna-gaudi-backend-server
-    depends_on:
-      - redis-vector-db
-      - tei-embedding-service
-      - embedding
-      - retriever
-      - tei-reranking-service
-      - reranking
-      - ray-service
-      - llm
-    ports:
-      - "8888:8888"
-    environment:
-      - no_proxy=${no_proxy}
-      - https_proxy=${https_proxy}
-      - http_proxy=${http_proxy}
-      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
-      - EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
-      - RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
-      - RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
-      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
-      - LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
-    ipc: host
-    restart: always
-  chaqna-gaudi-ui-server:
-    image: opea/chatqna-ui:latest
-    container_name: chatqna-gaudi-ui-server
-    depends_on:
-      - chaqna-gaudi-backend-server
-    ports:
-      - "5173:5173"
-    environment:
-      - no_proxy=${no_proxy}
-      - https_proxy=${https_proxy}
-      - http_proxy=${http_proxy}
-      - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
-      - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
-      - GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
-      - DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
-    ipc: host
-    restart: always
-
-networks:
-  default:
-    driver: bridge
--- a/ChatQnA/tests/test_chatqna_ray_on_gaudi.sh
+++ b/ChatQnA/tests/test_chatqna_ray_on_gaudi.sh
@@ -1,245 +0,0 @@
-#!/bin/bash
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-set -e
-echo "IMAGE_REPO=${IMAGE_REPO}"
-
-WORKPATH=$(dirname "$PWD")
-LOG_PATH="$WORKPATH/tests"
-ip_address=$(hostname -I | awk '{print $1}')
-
-function build_docker_images() {
-    cd $WORKPATH
-    git clone https://github.com/opea-project/GenAIComps.git
-    cd GenAIComps
-
-    docker build -t opea/embedding-tei:latest -f comps/embeddings/langchain/docker/Dockerfile .
-    docker build -t opea/retriever-redis:latest -f comps/retrievers/langchain/redis/docker/Dockerfile .
-    docker build -t opea/reranking-tei:latest -f comps/reranks/tei/docker/Dockerfile .
-    docker build -t ray_serve:habana -f comps/llms/text-generation/ray_serve/docker/Dockerfile.rayserve .
-    docker build -t opea/llm-ray:latest -f comps/llms/text-generation/ray_serve/docker/Dockerfile.microservice .
-    docker build -t opea/dataprep-redis:latest -f comps/dataprep/redis/langchain/docker/Dockerfile .
-
-#    cd ..
-#    git clone https://github.com/huggingface/tei-gaudi
-#    cd tei-gaudi/
-#    docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest .
-
-    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
-
-    cd $WORKPATH/docker
-    docker build --no-cache -t opea/chatqna:latest -f Dockerfile .
-
-    cd $WORKPATH/docker/ui
-    docker build --no-cache -t opea/chatqna-ui:latest -f docker/Dockerfile .
-
-    docker images
-}
-
-function start_services() {
-    # build tei-gaudi for each test instead of pull from local registry
-    cd $WORKPATH
-    git clone https://github.com/huggingface/tei-gaudi
-    cd tei-gaudi/
-    docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest .
-
-    cd $WORKPATH/docker/gaudi
-    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
-    export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-    export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:8090"
-    export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
-    export RAY_Serve_LLM_ENDPOINT="http://${ip_address}:8008"
-    export LLM_SERVICE_PORT=9000
-    export REDIS_URL="redis://${ip_address}:6379"
-    export INDEX_NAME="rag-redis"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export MEGA_SERVICE_HOST_IP=${ip_address}
-    export EMBEDDING_SERVICE_HOST_IP=${ip_address}
-    export RETRIEVER_SERVICE_HOST_IP=${ip_address}
-    export RERANK_SERVICE_HOST_IP=${ip_address}
-    export LLM_SERVICE_HOST_IP=${ip_address}
-    export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna"
-    export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
-
-    sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env
-
-    if [[ "$IMAGE_REPO" != "" ]]; then
-        # Replace the container name with a test-specific name
-        echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
-        sed -i "s#image: opea/chatqna:latest#image: opea/chatqna:${IMAGE_TAG}#g" compose_ray_serve.yaml
-        sed -i "s#image: opea/chatqna-ui:latest#image: opea/chatqna-ui:${IMAGE_TAG}#g" compose_ray_serve.yaml
-        sed -i "s#image: opea/chatqna-conversation-ui:latest#image: opea/chatqna-conversation-ui:${IMAGE_TAG}#g" compose_ray_serve.yaml
-        sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" compose_ray_serve.yaml
-        sed -i "s#image: ${IMAGE_REPO}opea/tei-gaudi:latest#image: opea/tei-gaudi:latest#g" compose_ray_serve.yaml
-        echo "cat compose_ray_serve.yaml"
-        cat compose_ray_serve.yaml
-    fi
-
-    # Start Docker Containers
-    docker compose -f compose_ray_serve.yaml up -d
-    n=0
-    until [[ "$n" -ge 180 ]]; do
-        docker logs ray-gaudi-server > ray_service_start.log
-        if grep -q Connected ray_service_start.log; then
-            break
-        fi
-        sleep 1s
-        n=$((n+1))
-    done
-}
-
-function validate_services() {
-    local URL="$1"
-    local EXPECTED_RESULT="$2"
-    local SERVICE_NAME="$3"
-    local DOCKER_NAME="$4"
-    local INPUT_DATA="$5"
-
-    local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
-    if [ "$HTTP_STATUS" -eq 200 ]; then
-        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
-
-        local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
-
-        if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
-            echo "[ $SERVICE_NAME ] Content is as expected."
-        else
-            echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
-            docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
-            exit 1
-        fi
-    else
-        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
-        docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
-        exit 1
-    fi
-    sleep 1s
-}
-
-function validate_microservices() {
-    # Check if the microservices are running correctly.
-
-    # tei for embedding service
-    validate_services \
-        "${ip_address}:8090/embed" \
-        "\[\[" \
-        "tei-embedding" \
-        "tei-embedding-gaudi-server" \
-        '{"inputs":"What is Deep Learning?"}'
-
-    # embedding microservice
-    validate_services \
-        "${ip_address}:6000/v1/embeddings" \
-        '"text":"What is Deep Learning?","embedding":\[' \
-        "embedding" \
-        "embedding-tei-server" \
-        '{"text":"What is Deep Learning?"}'
-
-    sleep 1m # retrieval can't curl as expected, try to wait for more time
-
-    # retrieval microservice
-    test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
-    validate_services \
-        "${ip_address}:7000/v1/retrieval" \
-        " " \
-        "retrieval" \
-        "retriever-redis-server" \
-        "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}"
-
-    # tei for rerank microservice
-    validate_services \
-        "${ip_address}:8808/rerank" \
-        '{"index":1,"score":' \
-        "tei-rerank" \
-        "tei-reranking-gaudi-server" \
-        '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
-
-    # rerank microservice
-    validate_services \
-        "${ip_address}:8000/v1/reranking" \
-        "Deep learning is..." \
-        "rerank" \
-        "reranking-tei-gaudi-server" \
-        '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}'
-
-    # ray for llm service
-    validate_services \
-        "${ip_address}:8008/v1/chat/completions" \
-        "content" \
-        "ray-llm" \
-        "ray-gaudi-server" \
-        '{"model": "neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 32 }'
-
-    # llm microservice
-    validate_services \
-        "${ip_address}:9000/v1/chat/completions" \
-        "data: " \
-        "llm" \
-        "llm-ray-gaudi-server" \
-        '{"query":"What is Deep Learning?"}'
-
-}
-
-function validate_megaservice() {
-    # Curl the Mega Service
-    validate_services \
-        "${ip_address}:8888/v1/chatqna" \
-        "billion" \
-        "mega-chatqna" \
-        "chatqna-gaudi-backend-server" \
-        '{"messages": "What is the revenue of Nike in 2023?"}'
-
-}
-
-function validate_frontend() {
-    cd $WORKPATH/docker/ui/svelte
-    local conda_env_name="OPEA_e2e"
-    export PATH=${HOME}/miniforge3/bin/:$PATH
-#    conda remove -n ${conda_env_name} --all -y
-#    conda create -n ${conda_env_name} python=3.12 -y
-    source activate ${conda_env_name}
-
-    sed -i "s/localhost/$ip_address/g" playwright.config.ts
-
-#    conda install -c conda-forge nodejs -y
-    npm install && npm ci && npx playwright install --with-deps
-    node -v && npm -v && pip list
-
-    exit_status=0
-    npx playwright test || exit_status=$?
-
-    if [ $exit_status -ne 0 ]; then
-        echo "[TEST INFO]: ---------frontend test failed---------"
-        exit $exit_status
-    else
-        echo "[TEST INFO]: ---------frontend test passed---------"
-    fi
-}
-
-function stop_docker() {
-    cd $WORKPATH/docker/gaudi
-    docker compose -f compose_ray_serve.yaml down
-}
-
-function main() {
-
-    stop_docker
-    if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi
-    start_time=$(date +%s)
-    start_services
-    end_time=$(date +%s)
-    duration=$((end_time-start_time))
-    echo "Mega service start duration is $duration s"
-
-    validate_microservices
-    validate_megaservice
-    # validate_frontend
-
-    stop_docker
-    echo y | docker system prune
-
-}
-
-main