diff --git a/ChatQnA/docker/gaudi/README.md b/ChatQnA/docker/gaudi/README.md index 43ec84272..fc746d275 100644 --- a/ChatQnA/docker/gaudi/README.md +++ b/ChatQnA/docker/gaudi/README.md @@ -69,20 +69,6 @@ Build microservice docker. docker build --no-cache -t opea/llm-vllm-ray:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm-ray/docker/Dockerfile.microservice . ``` -#### 5.4 Use Ray Serve - -Build Ray Serve docker. - -```bash -docker build --no-cache -t ray_serve:habana --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ray_serve/docker/Dockerfile.rayserve . -``` - -Build microservice docker. - -```bash -docker build --no-cache -t opea/llm-ray:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ray_serve/docker/Dockerfile.microservice . -``` - ### 6. Build Dataprep Image ```bash @@ -159,7 +145,7 @@ Then run the command `docker images`, you will have the following 8 Docker Image 1. `opea/embedding-tei:latest` 2. `opea/retriever-redis:latest` 3. `opea/reranking-tei:latest` -4. `opea/llm-tgi:latest` or `opea/llm-vllm:latest` or `opea/llm-vllm-ray:latest` or `opea/llm-ray:latest` +4. `opea/llm-tgi:latest` or `opea/llm-vllm:latest` or `opea/llm-vllm-ray:latest` 5. `opea/tei-gaudi:latest` 6. `opea/dataprep-redis:latest` 7. `opea/chatqna:latest` or `opea/chatqna-guardrails:latest` @@ -192,7 +178,6 @@ export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808" export TGI_LLM_ENDPOINT="http://${host_ip}:8008" export vLLM_LLM_ENDPOINT="http://${host_ip}:8008" export vLLM_RAY_LLM_ENDPOINT="http://${host_ip}:8008" -export RAY_Serve_LLM_ENDPOINT="http://${host_ip}:8008" export LLM_SERVICE_PORT=9000 export REDIS_URL="redis://${host_ip}:6379" export INDEX_NAME="rag-redis" @@ -243,12 +228,6 @@ If use vllm-on-ray for llm backend. docker compose -f compose_vllm_ray.yaml up -d ``` -If use ray serve for llm backend. - -```bash -docker compose -f compose_ray_serve.yaml up -d -``` - If you want to enable guardrails microservice in the pipeline, please follow the below command instead: ```bash @@ -342,13 +321,6 @@ curl http://${your_ip}:8008/v1/chat/completions \ -d '{"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}' ``` -```bash -#Ray Serve Service -curl http://${your_ip}:8008/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{"model": "${LLM_MODEL_ID_NAME}", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 32 }' -``` - 7. LLM Microservice ```bash diff --git a/ChatQnA/docker/gaudi/compose_ray_serve.yaml b/ChatQnA/docker/gaudi/compose_ray_serve.yaml deleted file mode 100644 index 3bc7c9f09..000000000 --- a/ChatQnA/docker/gaudi/compose_ray_serve.yaml +++ /dev/null @@ -1,202 +0,0 @@ - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -version: "3.8" - -services: - redis-vector-db: - image: redis/redis-stack:7.2.0-v9 - container_name: redis-vector-db - ports: - - "6379:6379" - - "8001:8001" - dataprep-redis-service: - image: opea/dataprep-redis:latest - container_name: dataprep-redis-server - depends_on: - - redis-vector-db - ports: - - "6007:6007" - - "6008:6008" - - "6009:6009" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - REDIS_URL: ${REDIS_URL} - INDEX_NAME: ${INDEX_NAME} - tei-embedding-service: - image: opea/tei-gaudi:latest - container_name: tei-embedding-gaudi-server - ports: - - "8090:80" - volumes: - - "./data:/data" - runtime: habana - cap_add: - - SYS_NICE - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - MAX_WARMUP_SEQUENCE_LENGTH: 512 - command: --model-id ${EMBEDDING_MODEL_ID} - embedding: - image: opea/embedding-tei:latest - container_name: embedding-tei-server - depends_on: - - tei-embedding-service - ports: - - "6000:6000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} - LANGCHAIN_PROJECT: "opea-embedding-service" - restart: unless-stopped - retriever: - image: opea/retriever-redis:latest - container_name: retriever-redis-server - depends_on: - - redis-vector-db - ports: - - "7000:7000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - REDIS_URL: ${REDIS_URL} - INDEX_NAME: ${INDEX_NAME} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} - LANGCHAIN_PROJECT: "opea-retriever-service" - restart: unless-stopped - tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 - container_name: tei-reranking-gaudi-server - ports: - - "8808:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - command: --model-id ${RERANK_MODEL_ID} --auto-truncate - reranking: - image: opea/reranking-tei:latest - container_name: reranking-tei-gaudi-server - depends_on: - - tei-reranking-service - ports: - - "8000:8000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} - LANGCHAIN_PROJECT: "opea-reranking-service" - restart: unless-stopped - ray-service: - image: ray_serve:habana - container_name: ray-gaudi-server - ports: - - "8008:80" - volumes: - - "./data:/data" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - LLM_MODEL: ${LLM_MODEL_ID} - TRUST_REMOTE_CODE: True - runtime: habana - cap_add: - - SYS_NICE - ipc: host - command: /bin/bash -c "ray start --head && python api_server_openai.py --port_number 80 --model_id_or_path $LLM_MODEL --chat_processor ChatModelLlama --num_cpus_per_worker 8 --num_hpus_per_worker 1" - llm: - image: opea/llm-ray:latest - container_name: llm-ray-gaudi-server - depends_on: - - ray-service - ports: - - "9000:9000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - RAY_Serve_ENDPOINT: ${RAY_Serve_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL: ${LLM_MODEL_ID} - restart: unless-stopped - chaqna-gaudi-backend-server: - image: opea/chatqna:latest - container_name: chatqna-gaudi-backend-server - depends_on: - - redis-vector-db - - tei-embedding-service - - embedding - - retriever - - tei-reranking-service - - reranking - - ray-service - - llm - ports: - - "8888:8888" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} - - EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP} - - RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP} - - RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP} - - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} - - LLM_SERVICE_PORT=${LLM_SERVICE_PORT} - ipc: host - restart: always - chaqna-gaudi-ui-server: - image: opea/chatqna-ui:latest - container_name: chatqna-gaudi-ui-server - depends_on: - - chaqna-gaudi-backend-server - ports: - - "5173:5173" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT} - - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT} - - GET_FILE=${DATAPREP_GET_FILE_ENDPOINT} - - DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/ChatQnA/tests/test_chatqna_ray_on_gaudi.sh b/ChatQnA/tests/test_chatqna_ray_on_gaudi.sh deleted file mode 100644 index 30c03c77a..000000000 --- a/ChatQnA/tests/test_chatqna_ray_on_gaudi.sh +++ /dev/null @@ -1,245 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -e -echo "IMAGE_REPO=${IMAGE_REPO}" - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - git clone https://github.com/opea-project/GenAIComps.git - cd GenAIComps - - docker build -t opea/embedding-tei:latest -f comps/embeddings/langchain/docker/Dockerfile . - docker build -t opea/retriever-redis:latest -f comps/retrievers/langchain/redis/docker/Dockerfile . - docker build -t opea/reranking-tei:latest -f comps/reranks/tei/docker/Dockerfile . - docker build -t ray_serve:habana -f comps/llms/text-generation/ray_serve/docker/Dockerfile.rayserve . - docker build -t opea/llm-ray:latest -f comps/llms/text-generation/ray_serve/docker/Dockerfile.microservice . - docker build -t opea/dataprep-redis:latest -f comps/dataprep/redis/langchain/docker/Dockerfile . - -# cd .. -# git clone https://github.com/huggingface/tei-gaudi -# cd tei-gaudi/ -# docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest . - - docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 - - cd $WORKPATH/docker - docker build --no-cache -t opea/chatqna:latest -f Dockerfile . - - cd $WORKPATH/docker/ui - docker build --no-cache -t opea/chatqna-ui:latest -f docker/Dockerfile . - - docker images -} - -function start_services() { - # build tei-gaudi for each test instead of pull from local registry - cd $WORKPATH - git clone https://github.com/huggingface/tei-gaudi - cd tei-gaudi/ - docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest . - - cd $WORKPATH/docker/gaudi - export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" - export RERANK_MODEL_ID="BAAI/bge-reranker-base" - export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:8090" - export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808" - export RAY_Serve_LLM_ENDPOINT="http://${ip_address}:8008" - export LLM_SERVICE_PORT=9000 - export REDIS_URL="redis://${ip_address}:6379" - export INDEX_NAME="rag-redis" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export MEGA_SERVICE_HOST_IP=${ip_address} - export EMBEDDING_SERVICE_HOST_IP=${ip_address} - export RETRIEVER_SERVICE_HOST_IP=${ip_address} - export RERANK_SERVICE_HOST_IP=${ip_address} - export LLM_SERVICE_HOST_IP=${ip_address} - export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna" - export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep" - - sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env - - if [[ "$IMAGE_REPO" != "" ]]; then - # Replace the container name with a test-specific name - echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG" - sed -i "s#image: opea/chatqna:latest#image: opea/chatqna:${IMAGE_TAG}#g" compose_ray_serve.yaml - sed -i "s#image: opea/chatqna-ui:latest#image: opea/chatqna-ui:${IMAGE_TAG}#g" compose_ray_serve.yaml - sed -i "s#image: opea/chatqna-conversation-ui:latest#image: opea/chatqna-conversation-ui:${IMAGE_TAG}#g" compose_ray_serve.yaml - sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" compose_ray_serve.yaml - sed -i "s#image: ${IMAGE_REPO}opea/tei-gaudi:latest#image: opea/tei-gaudi:latest#g" compose_ray_serve.yaml - echo "cat compose_ray_serve.yaml" - cat compose_ray_serve.yaml - fi - - # Start Docker Containers - docker compose -f compose_ray_serve.yaml up -d - n=0 - until [[ "$n" -ge 180 ]]; do - docker logs ray-gaudi-server > ray_service_start.log - if grep -q Connected ray_service_start.log; then - break - fi - sleep 1s - n=$((n+1)) - done -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # tei for embedding service - validate_services \ - "${ip_address}:8090/embed" \ - "\[\[" \ - "tei-embedding" \ - "tei-embedding-gaudi-server" \ - '{"inputs":"What is Deep Learning?"}' - - # embedding microservice - validate_services \ - "${ip_address}:6000/v1/embeddings" \ - '"text":"What is Deep Learning?","embedding":\[' \ - "embedding" \ - "embedding-tei-server" \ - '{"text":"What is Deep Learning?"}' - - sleep 1m # retrieval can't curl as expected, try to wait for more time - - # retrieval microservice - test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - validate_services \ - "${ip_address}:7000/v1/retrieval" \ - " " \ - "retrieval" \ - "retriever-redis-server" \ - "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" - - # tei for rerank microservice - validate_services \ - "${ip_address}:8808/rerank" \ - '{"index":1,"score":' \ - "tei-rerank" \ - "tei-reranking-gaudi-server" \ - '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' - - # rerank microservice - validate_services \ - "${ip_address}:8000/v1/reranking" \ - "Deep learning is..." \ - "rerank" \ - "reranking-tei-gaudi-server" \ - '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' - - # ray for llm service - validate_services \ - "${ip_address}:8008/v1/chat/completions" \ - "content" \ - "ray-llm" \ - "ray-gaudi-server" \ - '{"model": "neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 32 }' - - # llm microservice - validate_services \ - "${ip_address}:9000/v1/chat/completions" \ - "data: " \ - "llm" \ - "llm-ray-gaudi-server" \ - '{"query":"What is Deep Learning?"}' - -} - -function validate_megaservice() { - # Curl the Mega Service - validate_services \ - "${ip_address}:8888/v1/chatqna" \ - "billion" \ - "mega-chatqna" \ - "chatqna-gaudi-backend-server" \ - '{"messages": "What is the revenue of Nike in 2023?"}' - -} - -function validate_frontend() { - cd $WORKPATH/docker/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH -# conda remove -n ${conda_env_name} --all -y -# conda create -n ${conda_env_name} python=3.12 -y - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - -# conda install -c conda-forge nodejs -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - -function stop_docker() { - cd $WORKPATH/docker/gaudi - docker compose -f compose_ray_serve.yaml down -} - -function main() { - - stop_docker - if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi - start_time=$(date +%s) - start_services - end_time=$(date +%s) - duration=$((end_time-start_time)) - echo "Mega service start duration is $duration s" - - validate_microservices - validate_megaservice - # validate_frontend - - stop_docker - echo y | docker system prune - -} - -main