diff --git a/AudioQnA/audioqna.py b/AudioQnA/audioqna.py index f74e58053..dcb59633c 100644 --- a/AudioQnA/audioqna.py +++ b/AudioQnA/audioqna.py @@ -16,7 +16,7 @@ SPEECHT5_SERVER_HOST_IP = os.getenv("SPEECHT5_SERVER_HOST_IP", "0.0.0.0") SPEECHT5_SERVER_PORT = int(os.getenv("SPEECHT5_SERVER_PORT", 7055)) LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0") LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 3006)) -LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "Intel/neural-chat-7b-v3-3") +LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct") def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs): diff --git a/AudioQnA/audioqna_multilang.py b/AudioQnA/audioqna_multilang.py index edc14cc93..8f4a65e74 100644 --- a/AudioQnA/audioqna_multilang.py +++ b/AudioQnA/audioqna_multilang.py @@ -17,7 +17,7 @@ GPT_SOVITS_SERVER_HOST_IP = os.getenv("GPT_SOVITS_SERVER_HOST_IP", "0.0.0.0") GPT_SOVITS_SERVER_PORT = int(os.getenv("GPT_SOVITS_SERVER_PORT", 9088)) LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0") LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888)) -LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "Intel/neural-chat-7b-v3-3") +LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct") def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs): diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/README.md b/AudioQnA/docker_compose/intel/cpu/xeon/README.md index 3f91c02e0..aabaf3659 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/README.md +++ b/AudioQnA/docker_compose/intel/cpu/xeon/README.md @@ -2,6 +2,10 @@ This document outlines the deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server. +The default pipeline deploys with vLLM as the LLM serving component. It also provides options of using TGI backend for LLM microservice, please refer to [Start the MegaService](#-start-the-megaservice) section in this page. + +Note: The default LLM is `meta-llama/Meta-Llama-3-8B-Instruct`. Before deploying the application, please make sure either you've requested and been granted the access to it on [Huggingface](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) or you've downloaded the model locally from [ModelScope](https://www.modelscope.cn/models). + ## 🚀 Build Docker images ### 1. Source Code install GenAIComps @@ -17,9 +21,15 @@ cd GenAIComps docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile . ``` -### 3. Build LLM Image +### 3. Build vLLM Image -Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu (https://github.com/huggingface/text-generation-inference) +```bash +git clone https://github.com/vllm-project/vllm.git +cd ./vllm/ +VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )" +git checkout ${VLLM_VER} +docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.cpu -t opea/vllm:latest --shm-size=128g . +``` ### 4. Build TTS Image @@ -43,9 +53,10 @@ docker build --no-cache -t opea/audioqna:latest --build-arg https_proxy=$https_p Then run the command `docker images`, you will have following images ready: 1. `opea/whisper:latest` -2. `opea/speecht5:latest` -3. `opea/audioqna:latest` -4. `opea/gpt-sovits:latest` (optional) +2. `opea/vllm:latest` +3. `opea/speecht5:latest` +4. `opea/audioqna:latest` +5. `opea/gpt-sovits:latest` (optional) ## 🚀 Set the environment variables @@ -55,7 +66,7 @@ Before starting the services with `docker compose`, you have to recheck the foll export host_ip= # export host_ip=$(hostname -I | awk '{print $1}') export HUGGINGFACEHUB_API_TOKEN= -export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3 +export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" export MEGA_SERVICE_HOST_IP=${host_ip} export WHISPER_SERVER_HOST_IP=${host_ip} @@ -73,40 +84,90 @@ export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna or use set_env.sh file to setup environment variables. -Note: Please replace with host_ip with your external IP address, do not use localhost. +Note: + +- Please replace with host_ip with your external IP address, do not use localhost. +- If you are in a proxy environment, also set the proxy-related environment variables: + +``` +export http_proxy="Your_HTTP_Proxy" +export https_proxy="Your_HTTPs_Proxy" +# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" +export no_proxy="Your_No_Proxy",${host_ip},whisper-service,speecht5-service,gpt-sovits-service,tgi-service,vllm-service,audioqna-xeon-backend-server,audioqna-xeon-ui-server +``` ## 🚀 Start the MegaService ```bash cd GenAIExamples/AudioQnA/docker_compose/intel/cpu/xeon/ +``` + +If use vLLM as the LLM serving backend: + +``` docker compose up -d # multilang tts (optional) docker compose -f compose_multilang.yaml up -d ``` +If use TGI as the LLM serving backend: + +``` +docker compose -f compose_tgi.yaml up -d +``` + ## 🚀 Test MicroServices -```bash -# whisper service -wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav -curl http://${host_ip}:7066/v1/audio/transcriptions \ - -H "Content-Type: multipart/form-data" \ - -F file="@./sample.wav" \ - -F model="openai/whisper-small" +1. Whisper Service -# tgi service -curl http://${host_ip}:3006/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ - -H 'Content-Type: application/json' + ```bash + wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav + curl http://${host_ip}:${WHISPER_SERVER_PORT}/v1/audio/transcriptions \ + -H "Content-Type: multipart/form-data" \ + -F file="@./sample.wav" \ + -F model="openai/whisper-small" + ``` -# speecht5 service -curl http://${host_ip}:7055/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3 +2. LLM backend Service -# gpt-sovits service (optional) -curl http://${host_ip}:9880/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3 -``` + In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready and the container (`vllm-service` or `tgi-service`) status shown via `docker ps` will be `healthy`. Before that, the status will be `health: starting`. + + Or try the command below to check whether the LLM serving is ready. + + ```bash + # vLLM service + docker logs vllm-service 2>&1 | grep complete + # If the service is ready, you will get the response like below. + INFO: Application startup complete. + ``` + + ```bash + # TGI service + docker logs tgi-service | grep Connected + # If the service is ready, you will get the response like below. + 2024-09-03T02:47:53.402023Z INFO text_generation_router::server: router/src/server.rs:2311: Connected + ``` + + Then try the `cURL` command below to validate services. + + ```bash + # either vLLM or TGI service + curl http://${host_ip}:${LLM_SERVER_PORT}/v1/chat/completions \ + -X POST \ + -d '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ + -H 'Content-Type: application/json' + ``` + +3. TTS Service + + ``` + # speecht5 service + curl http://${host_ip}:${SPEECHT5_SERVER_PORT}/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3 + + # gpt-sovits service (optional) + curl http://${host_ip}:${GPT_SOVITS_SERVER_PORT}/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3 + ``` ## 🚀 Test MegaService diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml index 3b47780d8..1fe5e6b2a 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -6,7 +6,7 @@ services: image: ${REGISTRY:-opea}/whisper:${TAG:-latest} container_name: whisper-service ports: - - "7066:7066" + - ${WHISPER_SERVER_PORT:-7066}:7066 ipc: host environment: no_proxy: ${no_proxy} @@ -17,38 +17,41 @@ services: image: ${REGISTRY:-opea}/speecht5:${TAG:-latest} container_name: speecht5-service ports: - - "7055:7055" + - ${SPEECHT5_SERVER_PORT:-7055}:7055 ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} restart: unless-stopped - tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu - container_name: tgi-service + vllm-service: + image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + container_name: vllm-service ports: - - "3006:80" + - ${LLM_SERVER_PORT:-3006}:80 volumes: - - "${MODEL_CACHE:-./data}:/data" - shm_size: 1g + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" + shm_size: 128g environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LLM_MODEL_ID: ${LLM_MODEL_ID} + VLLM_TORCH_PROFILER_DIR: "/mnt" + LLM_SERVER_PORT: ${LLM_SERVER_PORT} healthcheck: - test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"] + test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"] interval: 10s timeout: 10s retries: 100 - command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 + command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80 audioqna-xeon-backend-server: image: ${REGISTRY:-opea}/audioqna:${TAG:-latest} container_name: audioqna-xeon-backend-server depends_on: - whisper-service - - tgi-service + - vllm-service - speecht5-service ports: - "3008:8888" diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml index fde5a5690..3aecacf59 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml +++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml @@ -6,7 +6,7 @@ services: image: ${REGISTRY:-opea}/whisper:${TAG:-latest} container_name: whisper-service ports: - - "7066:7066" + - ${WHISPER_SERVER_PORT:-7066}:7066 ipc: host environment: no_proxy: ${no_proxy} @@ -18,27 +18,35 @@ services: image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest} container_name: gpt-sovits-service ports: - - "9880:9880" + - ${GPT_SOVITS_SERVER_PORT:-9880}:9880 ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} restart: unless-stopped - tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu - container_name: tgi-service + vllm-service: + image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + container_name: vllm-service ports: - - "3006:80" + - ${LLM_SERVER_PORT:-3006}:80 volumes: - - "${MODEL_CACHE:-./data}:/data" - shm_size: 1g + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" + shm_size: 128g environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 + LLM_MODEL_ID: ${LLM_MODEL_ID} + VLLM_TORCH_PROFILER_DIR: "/mnt" + LLM_SERVER_PORT: ${LLM_SERVER_PORT} + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80 audioqna-xeon-backend-server: image: ${REGISTRY:-opea}/audioqna-multilang:${TAG:-latest} container_name: audioqna-xeon-backend-server diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml new file mode 100644 index 000000000..d421f488f --- /dev/null +++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml @@ -0,0 +1,87 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - ${WHISPER_SERVER_PORT:-7066}:7066 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + speecht5-service: + image: ${REGISTRY:-opea}/speecht5:${TAG:-latest} + container_name: speecht5-service + ports: + - ${SPEECHT5_SERVER_PORT:-7055}:7055 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + tgi-service: + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + container_name: tgi-service + ports: + - ${LLM_SERVER_PORT:-3006}:80 + volumes: + - "${MODEL_CACHE:-./data}:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LLM_SERVER_PORT: ${LLM_SERVER_PORT} + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 + audioqna-xeon-backend-server: + image: ${REGISTRY:-opea}/audioqna:${TAG:-latest} + container_name: audioqna-xeon-backend-server + depends_on: + - whisper-service + - tgi-service + - speecht5-service + ports: + - "3008:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP} + - WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT} + - LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP} + - LLM_SERVER_PORT=${LLM_SERVER_PORT} + - LLM_MODEL_ID=${LLM_MODEL_ID} + - SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP} + - SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT} + ipc: host + restart: always + audioqna-xeon-ui-server: + image: ${REGISTRY:-opea}/audioqna-ui:${TAG:-latest} + container_name: audioqna-xeon-ui-server + depends_on: + - audioqna-xeon-backend-server + ports: + - "5173:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - CHAT_URL=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh b/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh index e98f6e04e..adc652f16 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ b/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh @@ -8,7 +8,7 @@ export host_ip=$(hostname -I | awk '{print $1}') export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} # -export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3 +export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" export MEGA_SERVICE_HOST_IP=${host_ip} export WHISPER_SERVER_HOST_IP=${host_ip} diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md index b60253a14..602b99ea2 100644 --- a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md @@ -2,6 +2,10 @@ This document outlines the deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server. +The default pipeline deploys with vLLM as the LLM serving component. It also provides options of using TGI backend for LLM microservice, please refer to [Start the MegaService](#-start-the-megaservice) section in this page. + +Note: The default LLM is `meta-llama/Meta-Llama-3-8B-Instruct`. Before deploying the application, please make sure either you've requested and been granted the access to it on [Huggingface](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) or you've downloaded the model locally from [ModelScope](https://www.modelscope.cn/models). + ## 🚀 Build Docker images ### 1. Source Code install GenAIComps @@ -17,9 +21,13 @@ cd GenAIComps docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu . ``` -### 3. Build LLM Image +### 3. Build vLLM Image -Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/tgi-gaudi:2.0.6 (https://github.com/huggingface/tgi-gaudi) +git clone https://github.com/HabanaAI/vllm-fork.git +cd vllm-fork/ +VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)") +git checkout ${VLLM_VER} +docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . ### 4. Build TTS Image @@ -40,8 +48,9 @@ docker build --no-cache -t opea/audioqna:latest --build-arg https_proxy=$https_p Then run the command `docker images`, you will have following images ready: 1. `opea/whisper-gaudi:latest` -2. `opea/speecht5-gaudi:latest` -3. `opea/audioqna:latest` +2. `opea/vllm-gaudi:latest` +3. `opea/speecht5-gaudi:latest` +4. `opea/audioqna:latest` ## 🚀 Set the environment variables @@ -51,7 +60,12 @@ Before starting the services with `docker compose`, you have to recheck the foll export host_ip= # export host_ip=$(hostname -I | awk '{print $1}') export HUGGINGFACEHUB_API_TOKEN= -export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3 +export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" +# set vLLM parameters +export NUM_CARDS=1 +export BLOCK_SIZE=128 +export MAX_NUM_SEQS=256 +export MAX_SEQ_LEN_TO_CAPTURE=2048 export MEGA_SERVICE_HOST_IP=${host_ip} export WHISPER_SERVER_HOST_IP=${host_ip} @@ -65,37 +79,90 @@ export LLM_SERVER_PORT=3006 export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna ``` +or use set_env.sh file to setup environment variables. + +Note: + +- Please replace with host_ip with your external IP address, do not use localhost. +- If you are in a proxy environment, also set the proxy-related environment variables: + +``` +export http_proxy="Your_HTTP_Proxy" +export https_proxy="Your_HTTPs_Proxy" +# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" +export no_proxy="Your_No_Proxy",${host_ip},whisper-service,speecht5-service,tgi-service,vllm-service,audioqna-gaudi-backend-server,audioqna-gaudi-ui-server +``` + ## 🚀 Start the MegaService > **_NOTE:_** Users will need at least three Gaudi cards for AudioQnA. ```bash cd GenAIExamples/AudioQnA/docker_compose/intel/hpu/gaudi/ +``` + +If use vLLM as the LLM serving backend: + +``` docker compose up -d ``` +If use TGI as the LLM serving backend: + +``` +docker compose -f compose_tgi.yaml up -d +``` + ## 🚀 Test MicroServices -```bash -# whisper service -curl http://${host_ip}:7066/v1/asr \ - -X POST \ - -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \ - -H 'Content-Type: application/json' +1. Whisper Service -# tgi service -curl http://${host_ip}:3006/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ - -H 'Content-Type: application/json' + ```bash + curl http://${host_ip}:${WHISPER_SERVER_PORT}/v1/asr \ + -X POST \ + -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \ + -H 'Content-Type: application/json' + ``` -# speecht5 service -curl http://${host_ip}:7055/v1/tts \ - -X POST \ - -d '{"text": "Who are you?"}' \ - -H 'Content-Type: application/json' +2. LLM backend Service -``` + In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready and the container (`vllm-gaudi-service` or `tgi-gaudi-service`) status shown via `docker ps` will be `healthy`. Before that, the status will be `health: starting`. + + Or try the command below to check whether the LLM serving is ready. + + ```bash + # vLLM service + docker logs vllm-gaudi-service 2>&1 | grep complete + # If the service is ready, you will get the response like below. + INFO: Application startup complete. + ``` + + ```bash + # TGI service + docker logs tgi-gaudi-service | grep Connected + # If the service is ready, you will get the response like below. + 2024-09-03T02:47:53.402023Z INFO text_generation_router::server: router/src/server.rs:2311: Connected + ``` + + Then try the `cURL` command below to validate services. + + ```bash + # either vLLM or TGI service + curl http://${host_ip}:${LLM_SERVER_PORT}/v1/chat/completions \ + -X POST \ + -d '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ + -H 'Content-Type: application/json' + ``` + +3. TTS Service + + ``` + # speecht5 service + curl http://${host_ip}:${SPEECHT5_SERVER_PORT}/v1/tts + -X POST \ + -d '{"text": "Who are you?"}' \ + -H 'Content-Type: application/json' + ``` ## 🚀 Test MegaService diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 9e43a355b..db93cd822 100644 --- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -6,7 +6,7 @@ services: image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest} container_name: whisper-service ports: - - "7066:7066" + - ${WHISPER_SERVER_PORT:-7066}:7066 ipc: host environment: no_proxy: ${no_proxy} @@ -22,7 +22,7 @@ services: image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest} container_name: speecht5-service ports: - - "7055:7055" + - ${SPEECHT5_SERVER_PORT:-7055}:7055 ipc: host environment: no_proxy: ${no_proxy} @@ -34,28 +34,27 @@ services: cap_add: - SYS_NICE restart: unless-stopped - tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.3.1 - container_name: tgi-gaudi-server + vllm-service: + image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} + container_name: vllm-gaudi-service ports: - - "3006:80" + - ${LLM_SERVER_PORT:-3006}:80 volumes: - - "${MODEL_CACHE:-./data}:/data" + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none - ENABLE_HPU_GRAPH: true - LIMIT_HPU_GRAPH: true - USE_FLASH_ATTENTION: true - FLASH_ATTENTION_RECOMPUTE: true + LLM_MODEL_ID: ${LLM_MODEL_ID} + VLLM_TORCH_PROFILER_DIR: "/mnt" + LLM_SERVER_PORT: ${LLM_SERVER_PORT} healthcheck: - test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"] + test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"] interval: 10s timeout: 10s retries: 100 @@ -63,13 +62,13 @@ services: cap_add: - SYS_NICE ipc: host - command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048 + command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq_len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE} audioqna-gaudi-backend-server: image: ${REGISTRY:-opea}/audioqna:${TAG:-latest} container_name: audioqna-gaudi-backend-server depends_on: - whisper-service - - tgi-service + - vllm-service - speecht5-service ports: - "3008:8888" diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml new file mode 100644 index 000000000..f14bd8cb9 --- /dev/null +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml @@ -0,0 +1,108 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + whisper-service: + image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest} + container_name: whisper-service + ports: + - ${WHISPER_SERVER_PORT:-7066}:7066 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + runtime: habana + cap_add: + - SYS_NICE + restart: unless-stopped + speecht5-service: + image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest} + container_name: speecht5-service + ports: + - ${SPEECHT5_SERVER_PORT:-7055}:7055 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + runtime: habana + cap_add: + - SYS_NICE + restart: unless-stopped + tgi-service: + image: ghcr.io/huggingface/tgi-gaudi:2.3.1 + container_name: tgi-gaudi-service + ports: + - ${LLM_SERVER_PORT:-3006}:80 + volumes: + - "${MODEL_CACHE:-./data}:/data" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + ENABLE_HPU_GRAPH: true + LIMIT_HPU_GRAPH: true + USE_FLASH_ATTENTION: true + FLASH_ATTENTION_RECOMPUTE: true + LLM_SERVER_PORT: ${LLM_SERVER_PORT} + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + runtime: habana + cap_add: + - SYS_NICE + ipc: host + command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048 + audioqna-gaudi-backend-server: + image: ${REGISTRY:-opea}/audioqna:${TAG:-latest} + container_name: audioqna-gaudi-backend-server + depends_on: + - whisper-service + - tgi-service + - speecht5-service + ports: + - "3008:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP} + - WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT} + - LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP} + - LLM_SERVER_PORT=${LLM_SERVER_PORT} + - LLM_MODEL_ID=${LLM_MODEL_ID} + - SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP} + - SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT} + ipc: host + restart: always + audioqna-gaudi-ui-server: + image: ${REGISTRY:-opea}/audioqna-ui:${TAG:-latest} + container_name: audioqna-gaudi-ui-server + depends_on: + - audioqna-gaudi-backend-server + ports: + - "5173:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - CHAT_URL=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh index e98f6e04e..179a8c2a2 100644 --- a/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh @@ -8,7 +8,13 @@ export host_ip=$(hostname -I | awk '{print $1}') export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} # -export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3 +export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" + +# set vLLM parameters +export NUM_CARDS=1 +export BLOCK_SIZE=128 +export MAX_NUM_SEQS=256 +export MAX_SEQ_LEN_TO_CAPTURE=2048 export MEGA_SERVICE_HOST_IP=${host_ip} export WHISPER_SERVER_HOST_IP=${host_ip} diff --git a/AudioQnA/docker_image_build/build.yaml b/AudioQnA/docker_image_build/build.yaml index bc9f67d9c..71bb44c81 100644 --- a/AudioQnA/docker_image_build/build.yaml +++ b/AudioQnA/docker_image_build/build.yaml @@ -71,3 +71,15 @@ services: dockerfile: comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile extends: audioqna image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest} + vllm: + build: + context: vllm + dockerfile: Dockerfile.cpu + extends: audioqna + image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + vllm-gaudi: + build: + context: vllm-fork + dockerfile: Dockerfile.hpu + extends: audioqna + image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} diff --git a/AudioQnA/tests/test_compose_on_gaudi.sh b/AudioQnA/tests/test_compose_on_gaudi.sh index fe5cff379..1e356750e 100644 --- a/AudioQnA/tests/test_compose_on_gaudi.sh +++ b/AudioQnA/tests/test_compose_on_gaudi.sh @@ -31,18 +31,27 @@ function build_docker_images() { cd $WORKPATH/docker_image_build git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + git clone https://github.com/HabanaAI/vllm-fork.git + cd vllm-fork/ + VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)") + echo "Check out vLLM tag ${VLLM_VER}" + git checkout ${VLLM_VER} &> /dev/null && cd ../ + echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi" + service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi vllm-gaudi" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker images && sleep 1s } function start_services() { cd $WORKPATH/docker_compose/intel/hpu/gaudi export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3 + export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct + export NUM_CARDS=1 + export BLOCK_SIZE=128 + export MAX_NUM_SEQS=256 + export MAX_SEQ_LEN_TO_CAPTURE=2048 export MEGA_SERVICE_HOST_IP=${ip_address} export WHISPER_SERVER_HOST_IP=${ip_address} @@ -61,8 +70,8 @@ function start_services() { docker compose up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 200 ]]; do - docker logs tgi-gaudi-server > $LOG_PATH/tgi_service_start.log - if grep -q Connected $LOG_PATH/tgi_service_start.log; then + docker logs vllm-gaudi-service > $LOG_PATH/vllm_service_start.log 2>&1 + if grep -q complete $LOG_PATH/vllm_service_start.log; then break fi sleep 5s @@ -86,7 +95,7 @@ function validate_megaservice() { # always print the log docker logs whisper-service > $LOG_PATH/whisper-service.log docker logs speecht5-service > $LOG_PATH/tts-service.log - docker logs tgi-gaudi-server > $LOG_PATH/tgi-gaudi-server.log + docker logs vllm-gaudi-service > $LOG_PATH/vllm-gaudi-service.log docker logs audioqna-gaudi-backend-server > $LOG_PATH/audioqna-gaudi-backend-server.log echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3 @@ -126,7 +135,7 @@ function validate_megaservice() { function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - docker compose stop && docker compose rm -f + docker compose -f compose.yaml stop && docker compose rm -f } function main() { diff --git a/AudioQnA/tests/test_compose_on_xeon.sh b/AudioQnA/tests/test_compose_on_xeon.sh index 11a86ba5c..b1ff1164d 100644 --- a/AudioQnA/tests/test_compose_on_xeon.sh +++ b/AudioQnA/tests/test_compose_on_xeon.sh @@ -31,18 +31,23 @@ function build_docker_images() { cd $WORKPATH/docker_image_build git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + git clone https://github.com/vllm-project/vllm.git + cd ./vllm/ + VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )" + echo "Check out vLLM tag ${VLLM_VER}" + git checkout ${VLLM_VER} &> /dev/null && cd ../ + echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="audioqna audioqna-ui whisper speecht5" + service_list="audioqna audioqna-ui whisper speecht5 vllm" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 docker images && sleep 1s } function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3 + export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct export MEGA_SERVICE_HOST_IP=${ip_address} export WHISPER_SERVER_HOST_IP=${ip_address} @@ -62,8 +67,8 @@ function start_services() { docker compose up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 200 ]]; do - docker logs tgi-service > $LOG_PATH/tgi_service_start.log - if grep -q Connected $LOG_PATH/tgi_service_start.log; then + docker logs vllm-service > $LOG_PATH/vllm_service_start.log 2>&1 + if grep -q complete $LOG_PATH/vllm_service_start.log; then break fi sleep 5s @@ -77,7 +82,7 @@ function validate_megaservice() { # always print the log docker logs whisper-service > $LOG_PATH/whisper-service.log docker logs speecht5-service > $LOG_PATH/tts-service.log - docker logs tgi-service > $LOG_PATH/tgi-service.log + docker logs vllm-service > $LOG_PATH/vllm-service.log docker logs audioqna-xeon-backend-server > $LOG_PATH/audioqna-xeon-backend-server.log echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3 @@ -117,7 +122,7 @@ function validate_megaservice() { function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose stop && docker compose rm -f + docker compose -f compose.yaml stop && docker compose rm -f } function main() { diff --git a/AudioQnA/tests/test_compose_tgi_on_gaudi.sh b/AudioQnA/tests/test_compose_tgi_on_gaudi.sh new file mode 100644 index 000000000..5a046adfd --- /dev/null +++ b/AudioQnA/tests/test_compose_tgi_on_gaudi.sh @@ -0,0 +1,146 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -e +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_CACHE=${model_cache:-"./data"} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + # If the opea_branch isn't main, replace the git clone branch in Dockerfile. + if [[ "${opea_branch}" != "main" ]]; then + cd $WORKPATH + OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" + NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" + find . -type f -name "Dockerfile*" | while read -r file; do + echo "Processing file: $file" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" + done + fi + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/intel/hpu/gaudi + export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct + + export MEGA_SERVICE_HOST_IP=${ip_address} + export WHISPER_SERVER_HOST_IP=${ip_address} + export SPEECHT5_SERVER_HOST_IP=${ip_address} + export LLM_SERVER_HOST_IP=${ip_address} + + export WHISPER_SERVER_PORT=7066 + export SPEECHT5_SERVER_PORT=7055 + export LLM_SERVER_PORT=3006 + + export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna + export host_ip=${ip_address} + # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env + + # Start Docker Containers + docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + n=0 + until [[ "$n" -ge 200 ]]; do + docker logs tgi-gaudi-service > $LOG_PATH/tgi_service_start.log + if grep -q Connected $LOG_PATH/tgi_service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done + + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs whisper-service > $LOG_PATH/whisper_service_start.log + if grep -q "Uvicorn server setup on port" $LOG_PATH/whisper_service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done +} + + +function validate_megaservice() { + response=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json') + # always print the log + docker logs whisper-service > $LOG_PATH/whisper-service.log + docker logs speecht5-service > $LOG_PATH/tts-service.log + docker logs tgi-gaudi-service > $LOG_PATH/tgi-gaudi-service.log + docker logs audioqna-gaudi-backend-server > $LOG_PATH/audioqna-gaudi-backend-server.log + echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3 + + if [[ $(file speech.mp3) == *"RIFF"* ]]; then + echo "Result correct." + else + echo "Result wrong." + exit 1 + fi + +} + +#function validate_frontend() { +# cd $WORKPATH/ui/svelte +# local conda_env_name="OPEA_e2e" +# export PATH=${HOME}/miniforge3/bin/:$PATH +## conda remove -n ${conda_env_name} --all -y +## conda create -n ${conda_env_name} python=3.12 -y +# source activate ${conda_env_name} +# +# sed -i "s/localhost/$ip_address/g" playwright.config.ts +# +## conda install -c conda-forge nodejs=22.6.0 -y +# npm install && npm ci && npx playwright install --with-deps +# node -v && npm -v && pip list +# +# exit_status=0 +# npx playwright test || exit_status=$? +# +# if [ $exit_status -ne 0 ]; then +# echo "[TEST INFO]: ---------frontend test failed---------" +# exit $exit_status +# else +# echo "[TEST INFO]: ---------frontend test passed---------" +# fi +#} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/hpu/gaudi + docker compose -f compose_tgi.yaml stop && docker compose rm -f +} + +function main() { + + stop_docker + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + start_services + + validate_megaservice + # validate_frontend + + stop_docker + echo y | docker system prune + +} + +main diff --git a/AudioQnA/tests/test_compose_tgi_on_xeon.sh b/AudioQnA/tests/test_compose_tgi_on_xeon.sh new file mode 100644 index 000000000..d735c87b9 --- /dev/null +++ b/AudioQnA/tests/test_compose_tgi_on_xeon.sh @@ -0,0 +1,137 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -e +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_CACHE=${model_cache:-"./data"} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + # If the opea_branch isn't main, replace the git clone branch in Dockerfile. + if [[ "${opea_branch}" != "main" ]]; then + cd $WORKPATH + OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" + NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" + find . -type f -name "Dockerfile*" | while read -r file; do + echo "Processing file: $file" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" + done + fi + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="audioqna audioqna-ui whisper speecht5" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct + + export MEGA_SERVICE_HOST_IP=${ip_address} + export WHISPER_SERVER_HOST_IP=${ip_address} + export SPEECHT5_SERVER_HOST_IP=${ip_address} + export LLM_SERVER_HOST_IP=${ip_address} + + export WHISPER_SERVER_PORT=7066 + export SPEECHT5_SERVER_PORT=7055 + export LLM_SERVER_PORT=3006 + + export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna + export host_ip=${ip_address} + + # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env + + # Start Docker Containers + docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + n=0 + until [[ "$n" -ge 200 ]]; do + docker logs tgi-service > $LOG_PATH/tgi_service_start.log + if grep -q Connected $LOG_PATH/tgi_service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done +} + + +function validate_megaservice() { + response=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json') + # always print the log + docker logs whisper-service > $LOG_PATH/whisper-service.log + docker logs speecht5-service > $LOG_PATH/tts-service.log + docker logs tgi-service > $LOG_PATH/tgi-service.log + docker logs audioqna-xeon-backend-server > $LOG_PATH/audioqna-xeon-backend-server.log + echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3 + + if [[ $(file speech.mp3) == *"RIFF"* ]]; then + echo "Result correct." + else + echo "Result wrong." + exit 1 + fi + +} + +#function validate_frontend() { +# cd $WORKPATH/ui/svelte +# local conda_env_name="OPEA_e2e" +# export PATH=${HOME}/miniforge3/bin/:$PATH +## conda remove -n ${conda_env_name} --all -y +## conda create -n ${conda_env_name} python=3.12 -y +# source activate ${conda_env_name} +# +# sed -i "s/localhost/$ip_address/g" playwright.config.ts +# +## conda install -c conda-forge nodejs=22.6.0 -y +# npm install && npm ci && npx playwright install --with-deps +# node -v && npm -v && pip list +# +# exit_status=0 +# npx playwright test || exit_status=$? +# +# if [ $exit_status -ne 0 ]; then +# echo "[TEST INFO]: ---------frontend test failed---------" +# exit $exit_status +# else +# echo "[TEST INFO]: ---------frontend test passed---------" +# fi +#} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + docker compose -f compose_tgi.yaml stop && docker compose rm -f +} + +function main() { + + stop_docker + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + start_services + + validate_megaservice + # validate_frontend + + stop_docker + echo y | docker system prune + +} + +main