Merge branch 'main' into update_vLLM

Update vLLM version to v0.9.0
2025-05-16 11:18:10 +08:00 · 2025-05-16 09:04:30 +08:00 · 2025-05-15 22:41:49 +00:00 · 2025-05-13 22:42:16 +00:00 · 2025-05-13 16:00:56 +08:00 · 2025-05-08 08:37:52 +00:00
229 changed files with 1462 additions and 1540 deletions
--- a/.github/env/_build_image.sh
+++ b/.github/env/_build_image.sh
@@ -1,5 +1,5 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-export VLLM_VER=v0.8.3
+export VLLM_VER=v0.9.0
 export VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
--- a/.github/workflows/_gmc-e2e.yml
+++ b/.github/workflows/_gmc-e2e.yml
@@ -55,7 +55,7 @@ jobs:
      - name: Run tests
        id: run-test
        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HF_TOKEN }}
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
        run: |
--- a/.github/workflows/_helm-e2e.yml
+++ b/.github/workflows/_helm-e2e.yml
@@ -165,8 +165,8 @@ jobs:
        env:
          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HF_TOKEN }}
-          HFTOKEN: ${{ secrets.HF_TOKEN }}
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HFTOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
          value_file: ${{ matrix.value_file }}
        run: |
          set -xe
--- a/.github/workflows/_run-docker-compose.yml
+++ b/.github/workflows/_run-docker-compose.yml
@@ -160,8 +160,8 @@ jobs:
      - name: Run test
        shell: bash
        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HF_TOKEN }}
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HF_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
          PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
@@ -204,10 +204,6 @@ jobs:
          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi

          echo "Cleaning up images ..."
-          df -h
-          sleep 1
-          docker system df
-          sleep 1
          if [[ "${{ inputs.hardware }}" == "xeon"* ]]; then
              docker system prune -a -f
          else
@@ -217,13 +213,7 @@ jobs:
              docker images --filter reference="opea/comps-base" -q | xargs -r docker rmi && sleep 1s
              docker system prune -f
          fi
-          sleep 5
          docker images
-          sleep 1
-          df -h
-          sleep 1
-          docker system df
-          sleep 1

      - name: Publish pipeline artifact
        if: ${{ !cancelled() }}
--- a/.github/workflows/pr-link-path-scan.yml
+++ b/.github/workflows/pr-link-path-scan.yml
@@ -80,7 +80,6 @@ jobs:
      - name: Checking Relative Path Validity
        run: |
          cd ${{github.workspace}}
-          delay=15
          fail="FALSE"
          repo_name=${{ github.event.pull_request.head.repo.full_name }}
          branch="https://github.com/$repo_name/blob/${{ github.event.pull_request.head.ref }}"
@@ -112,15 +111,14 @@ jobs:
                if [[ "$png_line" == *#* ]]; then
                  if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then
                    url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIExamples||')$png_path
-                    sleep $delay
                    response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev")
                    if [ "$response" -ne 200 ]; then
-                      echo "**********Validation failed ($response), try again**********"
+                      echo "**********Validation failed, try again**********"
                      response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev")
                      if [ "$response_retry" -eq 200 ]; then
                        echo "*****Retry successfully*****"
                      else
-                        echo "Invalid path ($response_retry) from ${{github.workspace}}/$refer_path: $png_path"
+                        echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path"
                        fail="TRUE"
                      fi
                    else
--- a/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml
@@ -13,8 +13,8 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TGI_LLM_ENDPOINT: "http://${ip_address}:${TGI_SERVICE_PORT}"
-      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    shm_size: 32g
    devices:
      - /dev/kfd:/dev/kfd
@@ -42,7 +42,7 @@ services:
      with_memory: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: tgi
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -76,7 +76,7 @@ services:
      use_hints: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -104,7 +104,7 @@ services:
      with_memory: true
      recursion_limit: ${recursion_limit_supervisor}
      llm_engine: tgi
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
--- a/AgentQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
@@ -10,8 +10,8 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      WILM_USE_TRITON_FLASH_ATTENTION: 0
@@ -46,7 +46,7 @@ services:
      with_memory: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -80,7 +80,7 @@ services:
      use_hints: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -108,7 +108,7 @@ services:
      with_memory: true
      recursion_limit: ${recursion_limit_supervisor}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
--- a/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh
@@ -19,7 +19,7 @@ export CRAG_SERVER_PORT="18114"

 export WORKPATH=$(dirname "$PWD")
 export WORKDIR=${WORKPATH}/../../../
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export HF_CACHE_DIR="./data"
 export MODEL_CACHE="./data"
@@ -39,7 +39,7 @@ export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
 export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
 export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
 export HF_CACHE_DIR=${HF_CACHE_DIR}
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export no_proxy=${no_proxy}
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
--- a/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_vllm_rocm.sh
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_vllm_rocm.sh
@@ -19,7 +19,7 @@ export CRAG_SERVER_PORT="18114"

 export WORKPATH=$(dirname "$PWD")
 export WORKDIR=${WORKPATH}/../../../
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export VLLM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export HF_CACHE_DIR="./data"
 export MODEL_CACHE="./data"
@@ -40,7 +40,7 @@ export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
 export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
 export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
 export HF_CACHE_DIR=${HF_CACHE_DIR}
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export no_proxy=${no_proxy}
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
--- a/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_vllm_rocm.sh
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_vllm_rocm.sh
@@ -20,8 +20,8 @@ export CRAG_SERVER_PORT="18114"

 export WORKPATH=$(dirname "$PWD")
 export WORKDIR=${WORKPATH}/../../../
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export VLLM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export HF_CACHE_DIR="./data"
 export MODEL_CACHE="./data"
@@ -42,7 +42,7 @@ export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
 export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
 export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
 export HF_CACHE_DIR=${HF_CACHE_DIR}
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export no_proxy=${no_proxy}
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
--- a/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -33,7 +33,7 @@ fi
 # retriever
 export host_ip=$(hostname -I | awk '{print $1}')
 export HF_CACHE_DIR=${HF_CACHE_DIR}
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export no_proxy=${no_proxy}
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -16,7 +16,7 @@ services:
      with_memory: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -50,7 +50,7 @@ services:
      use_hints: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -79,7 +79,7 @@ services:
      with_memory: true
      recursion_limit: ${recursion_limit_supervisor}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -122,7 +122,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      LLM_MODEL_ID: ${LLM_MODEL_ID}
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -16,8 +16,8 @@ export ip_address=$(hostname -I | awk '{print $1}')
 # LLM related environment variables
 export HF_CACHE_DIR=${HF_CACHE_DIR}
 ls $HF_CACHE_DIR
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
-export HF_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
 export NUM_SHARDS=4
 export LLM_ENDPOINT_URL="http://${ip_address}:8086"
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml
@@ -13,7 +13,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/AgentQnA/retrieval_tool/launch_retrieval_tool.sh
+++ b/AgentQnA/retrieval_tool/launch_retrieval_tool.sh
@@ -3,7 +3,7 @@

 host_ip=$(hostname -I | awk '{print $1}')
 export HF_CACHE_DIR=${HF_CACHE_DIR}
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export no_proxy=${no_proxy}
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
--- a/AgentQnA/tests/step2_start_retrieval_tool_rocm_vllm.sh
+++ b/AgentQnA/tests/step2_start_retrieval_tool_rocm_vllm.sh
@@ -20,7 +20,7 @@ function start_retrieval_tool() {
    cd $WORKPATH/../DocIndexRetriever/docker_compose/intel/cpu/xeon
    host_ip=$(hostname -I | awk '{print $1}')
    export HF_CACHE_DIR=${HF_CACHE_DIR}
-    export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
    export no_proxy=${no_proxy}
    export http_proxy=${http_proxy}
    export https_proxy=${https_proxy}
--- a/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh
+++ b/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh
@@ -11,8 +11,8 @@ export ip_address=$(hostname -I | awk '{print $1}')
 export host_ip=$ip_address
 echo "ip_address=${ip_address}"
 export TOOLSET_PATH=$WORKPATH/tools/
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
-HF_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 model="meta-llama/Llama-3.3-70B-Instruct" #"meta-llama/Meta-Llama-3.1-70B-Instruct"

 export HF_CACHE_DIR=${model_cache:-"/data2/huggingface"}
--- a/AgentQnA/tests/test_compose_on_gaudi.sh
+++ b/AgentQnA/tests/test_compose_on_gaudi.sh
@@ -7,7 +7,7 @@ WORKPATH=$(dirname "$PWD")
 export WORKDIR=$WORKPATH/../../
 echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
 export no_proxy="$no_proxy,rag-agent-endpoint,sql-agent-endpoint,react-agent-endpoint,agent-ui,vllm-gaudi-server,jaeger,grafana,prometheus,127.0.0.1,localhost,0.0.0.0,$ip_address"
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
--- a/AgentQnA/tests/test_compose_on_rocm.sh
+++ b/AgentQnA/tests/test_compose_on_rocm.sh
@@ -9,7 +9,7 @@ ls $WORKPATH
 export WORKDIR=$WORKPATH/../../
 echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export TOOLSET_PATH=$WORKPATH/tools/
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
--- a/AgentQnA/tests/test_compose_vllm_on_rocm.sh
+++ b/AgentQnA/tests/test_compose_vllm_on_rocm.sh
@@ -8,7 +8,7 @@ WORKPATH=$(dirname "$PWD")
 export WORKDIR=${WORKPATH}/../../
 echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export TOOLSET_PATH=$WORKPATH/tools/
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
--- a/AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml
@@ -40,7 +40,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${LLM_MODEL_ID}
--- a/AudioQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
+++ b/AudioQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
@@ -35,8 +35,8 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      WILM_USE_TRITON_FLASH_ATTENTION: 0
--- a/AudioQnA/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/AudioQnA/docker_compose/amd/gpu/rocm/set_env.sh
@@ -7,7 +7,7 @@
 # export host_ip=<your External Public IP>    # export host_ip=$(hostname -I | awk '{print $1}')

 export host_ip=${ip_address}
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 # <token>

 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
--- a/AudioQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh
+++ b/AudioQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh
@@ -8,7 +8,7 @@

 export host_ip=${ip_address}
 export external_host_ip=${ip_address}
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export HF_CACHE_DIR="./data"
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export VLLM_SERVICE_PORT="8081"
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -36,7 +36,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
      LLM_SERVER_PORT: ${LLM_SERVER_PORT}
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
@@ -40,7 +40,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
      LLM_SERVER_PORT: ${LLM_SERVER_PORT}
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
@@ -36,7 +36,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LLM_SERVER_PORT: ${LLM_SERVER_PORT}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"]
--- a/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -5,7 +5,7 @@

 # export host_ip=<your External Public IP>
 export host_ip=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 # <token>

 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -45,7 +45,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
@@ -45,7 +45,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -5,7 +5,7 @@

 # export host_ip=<your External Public IP>
 export host_ip=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 # <token>

 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
--- a/AudioQnA/tests/test_compose_multilang_on_xeon.sh
+++ b/AudioQnA/tests/test_compose_multilang_on_xeon.sh
@@ -27,7 +27,7 @@ function build_docker_images() {

    git clone https://github.com/vllm-project/vllm.git
    cd ./vllm/
-    VLLM_VER="v0.8.3"
+    VLLM_VER=v0.9.0
    echo "Check out vLLM tag ${VLLM_VER}"
    git checkout ${VLLM_VER} &> /dev/null && cd ../

--- a/AudioQnA/tests/test_compose_on_xeon.sh
+++ b/AudioQnA/tests/test_compose_on_xeon.sh
@@ -27,7 +27,7 @@ function build_docker_images() {

    git clone https://github.com/vllm-project/vllm.git
    cd ./vllm/
-    VLLM_VER="v0.8.3"
+    VLLM_VER=v0.9.0
    echo "Check out vLLM tag ${VLLM_VER}"
    git checkout ${VLLM_VER} &> /dev/null && cd ../

--- a/AvatarChatbot/docker_compose/amd/gpu/rocm/README.md
+++ b/AvatarChatbot/docker_compose/amd/gpu/rocm/README.md
@@ -68,7 +68,7 @@ Then run the command `docker images`, you will have following images ready:
 Before starting the services with `docker compose`, you have to recheck the following environment variables.

 ```bash
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export host_ip=$(hostname -I | awk '{print $1}')

 export TGI_SERVICE_PORT=3006
--- a/AvatarChatbot/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/AvatarChatbot/docker_compose/amd/gpu/rocm/compose.yaml
@@ -52,8 +52,8 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    shm_size: 1g
    devices:
      - /dev/kfd:/dev/kfd
--- a/AvatarChatbot/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/AvatarChatbot/docker_compose/amd/gpu/rocm/set_env.sh
@@ -3,7 +3,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export OPENAI_API_KEY=${OPENAI_API_KEY}
 export host_ip=$(hostname -I | awk '{print $1}')

@@ -41,7 +41,7 @@ export FACE="/home/user/comps/animation/src/assets/img/avatar5.png"
 # export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
 export AUDIO='None'
 export FACESIZE=96
-export OUTFILE="./outputs/result.mp4"
+export OUTFILE="/outputs/result.mp4"
 export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
 export UPSCALE_FACTOR=1
-export FPS=5
+export FPS=10
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
@@ -37,7 +37,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://${host_ip}:3006/health || exit 1"]
      interval: 10s
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/set_env.sh
@@ -5,32 +5,3 @@
 pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
-
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
-export host_ip=$(hostname -I | awk '{print $1}')
-export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
-export WAV2LIP_ENDPOINT=http://$host_ip:7860
-export MEGA_SERVICE_HOST_IP=${host_ip}
-export WHISPER_SERVER_HOST_IP=${host_ip}
-export WHISPER_SERVER_PORT=7066
-export SPEECHT5_SERVER_HOST_IP=${host_ip}
-export SPEECHT5_SERVER_PORT=7055
-export LLM_SERVER_HOST_IP=${host_ip}
-export LLM_SERVER_PORT=3006
-export ANIMATION_SERVICE_HOST_IP=${host_ip}
-export ANIMATION_SERVICE_PORT=3008
-
-export MEGA_SERVICE_PORT=8888
-
-export DEVICE="cpu"
-export WAV2LIP_PORT=7860
-export INFERENCE_MODE='wav2lip+gfpgan'
-export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
-export FACE="/home/user/comps/animation/src/assets/img/avatar5.png"
-# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
-export AUDIO='None'
-export FACESIZE=96
-export OUTFILE="/outputs/result.mp4"
-export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
-export UPSCALE_FACTOR=1
-export FPS=10
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -48,7 +48,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -5,35 +5,3 @@
 pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
-
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
-export host_ip=$(hostname -I | awk '{print $1}')
-
-export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
-
-export WAV2LIP_ENDPOINT=http://$host_ip:7860
-
-export MEGA_SERVICE_HOST_IP=${host_ip}
-export WHISPER_SERVER_HOST_IP=${host_ip}
-export WHISPER_SERVER_PORT=7066
-export SPEECHT5_SERVER_HOST_IP=${host_ip}
-export SPEECHT5_SERVER_PORT=7055
-export LLM_SERVER_HOST_IP=${host_ip}
-export LLM_SERVER_PORT=3006
-export ANIMATION_SERVICE_HOST_IP=${host_ip}
-export ANIMATION_SERVICE_PORT=3008
-
-export MEGA_SERVICE_PORT=8888
-
-export DEVICE="hpu"
-export WAV2LIP_PORT=7860
-export INFERENCE_MODE='wav2lip+gfpgan'
-export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
-export FACE="/home/user/comps/animation/src/assets/img/avatar1.jpg"
-# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
-export AUDIO='None'
-export FACESIZE=96
-export OUTFILE="/outputs/result.mp4"
-export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
-export UPSCALE_FACTOR=1
-export FPS=10
--- a/AvatarChatbot/tests/README.md
+++ b/AvatarChatbot/tests/README.md
@@ -1,27 +0,0 @@
-# AvatarChatbot E2E test scripts
-
-## Set the required environment variable
-
-```bash
-export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
-```
-
-## Run test
-
-On Intel Xeon with TGI:
-
-```bash
-bash test_compose_on_xeon.sh
-```
-
-On Intel Gaudi with TGI:
-
-```bash
-bash test_compose_on_gaudi.sh
-```
-
-On AMD ROCm with TGI:
-
-```bash
-bash test_compose_on_rocm.sh
-```
--- a/AvatarChatbot/tests/test_compose_on_gaudi.sh
+++ b/AvatarChatbot/tests/test_compose_on_gaudi.sh
@@ -45,7 +45,37 @@ function build_docker_images() {
 function start_services() {
    cd $WORKPATH/docker_compose/intel/hpu/gaudi

-    source set_env.sh
+    export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
+    export host_ip=$(hostname -I | awk '{print $1}')
+
+    export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
+
+    export WAV2LIP_ENDPOINT=http://$host_ip:7860
+
+    export MEGA_SERVICE_HOST_IP=${host_ip}
+    export WHISPER_SERVER_HOST_IP=${host_ip}
+    export WHISPER_SERVER_PORT=7066
+    export SPEECHT5_SERVER_HOST_IP=${host_ip}
+    export SPEECHT5_SERVER_PORT=7055
+    export LLM_SERVER_HOST_IP=${host_ip}
+    export LLM_SERVER_PORT=3006
+    export ANIMATION_SERVICE_HOST_IP=${host_ip}
+    export ANIMATION_SERVICE_PORT=3008
+
+    export MEGA_SERVICE_PORT=8888
+
+    export DEVICE="hpu"
+    export WAV2LIP_PORT=7860
+    export INFERENCE_MODE='wav2lip+gfpgan'
+    export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
+    export FACE="/home/user/comps/animation/src/assets/img/avatar1.jpg"
+    # export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
+    export AUDIO='None'
+    export FACESIZE=96
+    export OUTFILE="/outputs/result.mp4"
+    export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
+    export UPSCALE_FACTOR=1
+    export FPS=10

    # Start Docker Containers
    docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
--- a/AvatarChatbot/tests/test_compose_on_rocm.sh
+++ b/AvatarChatbot/tests/test_compose_on_rocm.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-set -xe
+set -e
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -42,8 +42,48 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/amd/gpu/rocm
+
+    export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
    export OPENAI_API_KEY=$OPENAI_API_KEY
-    source set_env.sh
+    export host_ip=${ip_address}
+
+    export TGI_SERVICE_PORT=3006
+    export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_SERVICE_PORT}
+    export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+
+    export ASR_ENDPOINT=http://${host_ip}:7066
+    export TTS_ENDPOINT=http://${host_ip}:7055
+    export WAV2LIP_ENDPOINT=http://${host_ip}:7860
+
+    export MEGA_SERVICE_HOST_IP=${host_ip}
+    export ASR_SERVICE_HOST_IP=${host_ip}
+    export TTS_SERVICE_HOST_IP=${host_ip}
+    export LLM_SERVICE_HOST_IP=${host_ip}
+    export ANIMATION_SERVICE_HOST_IP=${host_ip}
+    export WHISPER_SERVER_HOST_IP=${host_ip}
+    export WHISPER_SERVER_PORT=7066
+
+    export SPEECHT5_SERVER_HOST_IP=${host_ip}
+    export SPEECHT5_SERVER_PORT=7055
+
+    export MEGA_SERVICE_PORT=8888
+    export ASR_SERVICE_PORT=3001
+    export TTS_SERVICE_PORT=3002
+    export LLM_SERVICE_PORT=3006
+    export ANIMATION_SERVICE_PORT=3008
+
+    export DEVICE="cpu"
+    export WAV2LIP_PORT=7860
+    export INFERENCE_MODE='wav2lip+gfpgan'
+    export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
+    export FACE="/home/user/comps/animation/src/assets/img/avatar5.png"
+    # export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
+    export AUDIO='None'
+    export FACESIZE=96
+    export OUTFILE="./outputs/result.mp4"
+    export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
+    export UPSCALE_FACTOR=1
+    export FPS=5

    # Start Docker Containers
    docker compose up -d --force-recreate
--- a/AvatarChatbot/tests/test_compose_on_xeon.sh
+++ b/AvatarChatbot/tests/test_compose_on_xeon.sh
@@ -45,7 +45,37 @@ function build_docker_images() {
 function start_services() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon

-    source set_env.sh
+    export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
+    export host_ip=$(hostname -I | awk '{print $1}')
+
+    export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
+
+    export WAV2LIP_ENDPOINT=http://$host_ip:7860
+
+    export MEGA_SERVICE_HOST_IP=${host_ip}
+    export WHISPER_SERVER_HOST_IP=${host_ip}
+    export WHISPER_SERVER_PORT=7066
+    export SPEECHT5_SERVER_HOST_IP=${host_ip}
+    export SPEECHT5_SERVER_PORT=7055
+    export LLM_SERVER_HOST_IP=${host_ip}
+    export LLM_SERVER_PORT=3006
+    export ANIMATION_SERVICE_HOST_IP=${host_ip}
+    export ANIMATION_SERVICE_PORT=3008
+
+    export MEGA_SERVICE_PORT=8888
+
+    export DEVICE="cpu"
+    export WAV2LIP_PORT=7860
+    export INFERENCE_MODE='wav2lip+gfpgan'
+    export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
+    export FACE="/home/user/comps/animation/src/assets/img/avatar5.png"
+    # export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
+    export AUDIO='None'
+    export FACESIZE=96
+    export OUTFILE="/outputs/result.mp4"
+    export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
+    export UPSCALE_FACTOR=1
+    export FPS=10

    # Start Docker Containers
    docker compose up -d
--- a/ChatQnA/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/compose.yaml
@@ -16,7 +16,7 @@ services:
      - chatqna-redis-vector-db
      - chatqna-tei-embedding-service
    ports:
-      - "${CHATQNA_REDIS_DATAPREP_PORT:-18103}:5000"
+      - "${CHATQNA_REDIS_DATAPREP_PORT}:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
--- a/ChatQnA/docker_compose/amd/gpu/rocm/compose_faqgen.yaml
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/compose_faqgen.yaml
@@ -16,7 +16,7 @@ services:
      - chatqna-redis-vector-db
      - chatqna-tei-embedding-service
    ports:
-      - "${CHATQNA_REDIS_DATAPREP_PORT:-18103}:5000"
+      - "${CHATQNA_REDIS_DATAPREP_PORT}:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
--- a/ChatQnA/docker_compose/amd/gpu/rocm/compose_faqgen_vllm.yaml
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/compose_faqgen_vllm.yaml
@@ -16,7 +16,7 @@ services:
      - chatqna-redis-vector-db
      - chatqna-tei-embedding-service
    ports:
-      - "${CHATQNA_REDIS_DATAPREP_PORT:-18103}:5000"
+      - "${CHATQNA_REDIS_DATAPREP_PORT}:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
--- a/ChatQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
@@ -16,7 +16,7 @@ services:
      - chatqna-redis-vector-db
      - chatqna-tei-embedding-service
    ports:
-      - "${CHATQNA_REDIS_DATAPREP_PORT:-18103}:5000"
+      - "${CHATQNA_REDIS_DATAPREP_PORT:-5000}:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
--- a/ChatQnA/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/set_env.sh
@@ -2,17 +2,17 @@

 # Copyright (C) 2025 Advanced Micro Devices, Inc.

-export HOST_IP=${ip_address}
-export HOST_IP_EXTERNAL=${ip_address}
+export HOST_IP=''
+export HOST_IP_EXTERNAL=''

 export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
 export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"

-export CHATQNA_BACKEND_SERVICE_PORT=8888
-export CHATQNA_FRONTEND_SERVICE_PORT=5173
-export CHATQNA_NGINX_PORT=80
+export CHATQNA_BACKEND_SERVICE_PORT=18102
+export CHATQNA_FRONTEND_SERVICE_PORT=18101
+export CHATQNA_NGINX_PORT=18104
 export CHATQNA_REDIS_DATAPREP_PORT=18103
 export CHATQNA_REDIS_RETRIEVER_PORT=7000
 export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
--- a/ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen.sh
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen.sh
@@ -2,18 +2,18 @@

 # Copyright (C) 2025 Advanced Micro Devices, Inc.

-export HOST_IP=${ip_address}
-export HOST_IP_EXTERNAL=${ip_address}
+export HOST_IP=''
+export HOST_IP_EXTERNAL=''

 export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
 export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"

-export CHATQNA_BACKEND_SERVICE_PORT=8888
-export CHATQNA_FRONTEND_SERVICE_PORT=5173
+export CHATQNA_BACKEND_SERVICE_PORT=18102
+export CHATQNA_FRONTEND_SERVICE_PORT=18101
 export CHATQNA_LLM_FAQGEN_PORT=18011
-export CHATQNA_NGINX_PORT=80
+export CHATQNA_NGINX_PORT=18104
 export CHATQNA_REDIS_DATAPREP_PORT=18103
 export CHATQNA_REDIS_RETRIEVER_PORT=7000
 export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
--- a/ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen_vllm.sh
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen_vllm.sh
@@ -2,18 +2,18 @@

 # Copyright (C) 2025 Advanced Micro Devices, Inc.

-export HOST_IP=${ip_address}
-export HOST_IP_EXTERNAL=${ip_address}
+export HOST_IP=''
+export HOST_IP_EXTERNAL=''

 export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
 export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"

-export CHATQNA_BACKEND_SERVICE_PORT=8888
-export CHATQNA_FRONTEND_SERVICE_PORT=5173
+export CHATQNA_BACKEND_SERVICE_PORT=18102
+export CHATQNA_FRONTEND_SERVICE_PORT=18101
 export CHATQNA_LLM_FAQGEN_PORT=18011
-export CHATQNA_NGINX_PORT=80
+export CHATQNA_NGINX_PORT=18104
 export CHATQNA_REDIS_DATAPREP_PORT=18103
 export CHATQNA_REDIS_RETRIEVER_PORT=7000
 export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
--- a/ChatQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh
@@ -2,17 +2,17 @@

 # Copyright (C) 2025 Advanced Micro Devices, Inc.

-export HOST_IP=${ip_address}
-export HOST_IP_EXTERNAL=${ip_address}
+export HOST_IP=''
+export HOST_IP_EXTERNAL=''

 export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
 export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"

-export CHATQNA_BACKEND_SERVICE_PORT=8888
-export CHATQNA_FRONTEND_SERVICE_PORT=5173
-export CHATQNA_NGINX_PORT=80
+export CHATQNA_BACKEND_SERVICE_PORT=18102
+export CHATQNA_FRONTEND_SERVICE_PORT=18101
+export CHATQNA_NGINX_PORT=18104
 export CHATQNA_REDIS_DATAPREP_PORT=18103
 export CHATQNA_REDIS_RETRIEVER_PORT=7000
 export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
--- a/ChatQnA/docker_compose/intel/cpu/aipc/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/README.md
@@ -183,7 +183,7 @@ export https_proxy=${your_http_proxy}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export INDEX_NAME="rag-redis"
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export OLLAMA_HOST=${host_ip}
 export OLLAMA_MODEL="llama3.2"
 ```
@@ -194,7 +194,7 @@ export OLLAMA_MODEL="llama3.2"
 set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
 set RERANK_MODEL_ID=BAAI/bge-reranker-base
 set INDEX_NAME=rag-redis
-set HUGGINGFACEHUB_API_TOKEN=%HF_TOKEN%
+set HUGGINGFACEHUB_API_TOKEN=%HUGGINGFACEHUB_API_TOKEN%
 set OLLAMA_HOST=host.docker.internal
 set OLLAMA_MODEL="llama3.2"
 ```
--- a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -60,7 +60,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -76,7 +76,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
--- a/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh
@@ -9,7 +9,7 @@ popd > /dev/null

 export host_ip=$(hostname -I | awk '{print $1}')

-if [ -z "${HF_TOKEN}" ]; then
+if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then
    echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN."
 fi

@@ -17,7 +17,7 @@ if [ -z "${host_ip}" ]; then
    echo "Error: host_ip is not set. Please set host_ip first."
 fi

-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export INDEX_NAME="rag-redis"
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -31,7 +31,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -67,7 +67,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -83,7 +83,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
@@ -99,7 +99,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
      VLLM_CPU_KVCACHE_SPACE: 40
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -60,7 +60,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -76,7 +76,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen_tgi.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen_tgi.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -60,7 +60,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -76,7 +76,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_mariadb.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_mariadb.yaml
@@ -35,7 +35,7 @@ services:
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MARIADBVECTOR"
      MARIADB_CONNECTION_URL: mariadb+mariadbconnector://${MARIADB_USER}:${MARIADB_PASSWORD}@mariadb-server:3306/${MARIADB_DATABASE}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -69,7 +69,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      MARIADB_CONNECTION_URL: mariadb+mariadbconnector://${MARIADB_USER}:${MARIADB_PASSWORD}@mariadb-server:3306/${MARIADB_DATABASE}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_MARIADBVECTOR"
    restart: unless-stopped
@@ -85,7 +85,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
@@ -101,7 +101,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
      VLLM_CPU_KVCACHE_SPACE: 40
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml
@@ -75,7 +75,7 @@ services:
      MILVUS_HOST: ${host_ip}
      MILVUS_PORT: 19530
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
      LOGFLAG: ${LOGFLAG}
    healthcheck:
@@ -107,7 +107,7 @@ services:
      MILVUS_HOST: ${host_ip}
      MILVUS_PORT: 19530
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_MILVUS"
    restart: unless-stopped
@@ -138,7 +138,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
@@ -155,7 +155,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
    healthcheck:
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml
@@ -20,7 +20,7 @@ services:
      PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_PINECONE"
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
@@ -55,7 +55,7 @@ services:
      PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME}
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_PINECONE"
    restart: unless-stopped
@@ -71,7 +71,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
@@ -87,7 +87,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
    command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
@@ -24,7 +24,7 @@ services:
      QDRANT_PORT: 6333
      QDRANT_INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_QDRANT"
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
@@ -76,7 +76,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
@@ -92,7 +92,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
    command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml
@@ -31,7 +31,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
  tei-embedding-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
    container_name: tei-embedding-server
@@ -61,7 +61,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -77,7 +77,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -60,7 +60,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -76,7 +76,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
@@ -92,7 +92,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -60,7 +60,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -76,7 +76,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
    command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
--- a/ChatQnA/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh
@@ -1,8 +1,6 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
-if ls *.json 1> /dev/null 2>&1; then
-    rm *.json
-fi
+rm *.json
 wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/chatqna_megaservice_grafana.json
 wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/qdrant_grafana.json
 wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/milvus_grafana.json
--- a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -7,9 +7,6 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
-export HF_TOKEN=${HF_TOKEN}
-export host_ip=${ip_address}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
--- a/ChatQnA/docker_compose/intel/cpu/xeon/set_env_mariadb.sh
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/set_env_mariadb.sh
@@ -7,7 +7,7 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-if [ -z "${HF_TOKEN}" ]; then
+if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then
    echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN."
 fi

@@ -15,7 +15,7 @@ export host_ip=$(hostname -I | awk '{print $1}')
 export MARIADB_DATABASE="vectordb"
 export MARIADB_USER="chatqna"
 export MARIADB_PASSWORD="password"
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -43,7 +43,7 @@ Some HuggingFace resources, such as some models, are only accessible if you have

 ### Configure the Deployment Environment

-To set up environment variables for deploying ChatQnA services, source the _setup_env.sh_ script in this directory (If using faqgen or guardrails, source the _set_env_faqgen.sh_):
+To set up environment variables for deploying ChatQnA services, source the _setup_env.sh_ script in this directory:

 ```
 source ./set_env.sh
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -31,7 +31,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -67,7 +67,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  tei-reranking-service:
    image: ghcr.io/huggingface/tei-gaudi:1.5.0
@@ -101,7 +101,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      LLM_MODEL_ID: ${LLM_MODEL_ID}
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
@@ -61,7 +61,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  tei-reranking-service:
    image: ghcr.io/huggingface/tei-gaudi:1.5.0
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen_tgi.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen_tgi.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
@@ -61,7 +61,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  tei-reranking-service:
    image: ghcr.io/huggingface/tei-gaudi:1.5.0
@@ -95,7 +95,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -42,7 +42,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      GURADRAILS_MODEL_ID: ${GURADRAILS_MODEL_ID}
@@ -73,7 +73,7 @@ services:
      https_proxy: ${https_proxy}
      SAFETY_GUARD_MODEL_ID: ${GURADRAILS_MODEL_ID}
      SAFETY_GUARD_ENDPOINT: http://vllm-guardrails-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  tei-embedding-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -104,7 +104,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -140,7 +140,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      LLM_MODEL_ID: ${LLM_MODEL_ID}
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -60,7 +60,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -96,7 +96,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -60,7 +60,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -75,7 +75,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      LLM_MODEL_ID: ${LLM_MODEL_ID}
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md
@@ -123,7 +123,7 @@ View the docker input parameters in `./ChatQnA/docker_compose/intel/hpu/gaudi/co
    environment:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      ENABLE_HPU_GRAPH: true
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -4,20 +4,12 @@
 # SPDX-License-Identifier: Apache-2.0

 # Function to prompt for input and set environment variables
-NON_INTERACTIVE=${NON_INTERACTIVE:-false}
-
 prompt_for_env_var() {
  local var_name="$1"
  local prompt_message="$2"
  local default_value="$3"
  local mandatory="$4"

-  if [[ "$NON_INTERACTIVE" == "true" ]]; then
-    echo "Non-interactive environment detected. Setting $var_name to default: $default_value"
-    export "$var_name"="$default_value"
-    return
-  fi
-
  if [[ "$mandatory" == "true" ]]; then
    while [[ -z "$value" ]]; do
      read -p "$prompt_message [default: \"${default_value}\"]: " value
@@ -42,7 +34,7 @@ popd > /dev/null

 # Prompt the user for each required environment variable
 prompt_for_env_var "EMBEDDING_MODEL_ID" "Enter the EMBEDDING_MODEL_ID" "BAAI/bge-base-en-v1.5" false
-prompt_for_env_var "HUGGINGFACEHUB_API_TOKEN" "Enter the HUGGINGFACEHUB_API_TOKEN" "${HF_TOKEN}" true
+prompt_for_env_var "HUGGINGFACEHUB_API_TOKEN" "Enter the HUGGINGFACEHUB_API_TOKEN" "" true
 prompt_for_env_var "RERANK_MODEL_ID" "Enter the RERANK_MODEL_ID" "BAAI/bge-reranker-base" false
 prompt_for_env_var "LLM_MODEL_ID" "Enter the LLM_MODEL_ID" "meta-llama/Meta-Llama-3-8B-Instruct" false
 prompt_for_env_var "INDEX_NAME" "Enter the INDEX_NAME" "rag-redis" false
@@ -50,40 +42,34 @@ prompt_for_env_var "NUM_CARDS" "Enter the number of Gaudi devices" "1" false
 prompt_for_env_var "host_ip" "Enter the host_ip" "$(curl ifconfig.me)" false

 #Query for enabling http_proxy
-prompt_for_env_var "http_proxy" "Enter the http_proxy." "${http_proxy}" false
+prompt_for_env_var "http_proxy" "Enter the http_proxy." "" false

 #Query for enabling https_proxy
-prompt_for_env_var "http_proxy" "Enter the http_proxy." "${https_proxy}" false
+prompt_for_env_var "https_proxy" "Enter the https_proxy." "" false

 #Query for enabling no_proxy
-prompt_for_env_var "no_proxy" "Enter the no_proxy." "${no_proxy}" false
+prompt_for_env_var "no_proxy" "Enter the no_proxy." "" false

 # Query for enabling logging
-if [[ "$NON_INTERACTIVE" == "true" ]]; then
-  # Query for enabling logging
-  prompt_for_env_var "LOGFLAG" "Enable logging? (yes/no): " "true" false
-  export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
-  export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
-  export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
-  telemetry_flag=true
+read -p "Enable logging? (yes/no): " logging && logging=$(echo "$logging" | tr '[:upper:]' '[:lower:]')
+if [[ "$logging" == "yes" || "$logging" == "y" ]]; then
+  export LOGFLAG=true
 else
-  # Query for enabling logging
-  read -p "Enable logging? (yes/no): " logging && logging=$(echo "$logging" | tr '[:upper:]' '[:lower:]')
-  if [[ "$logging" == "yes" || "$logging" == "y" ]]; then
-    export LOGFLAG=true
-  else
-    export LOGFLAG=false
-  fi
-  # Query for enabling OpenTelemetry Tracing Endpoint
-  read -p "Enable OpenTelemetry Tracing Endpoint? (yes/no): " telemetry && telemetry=$(echo "$telemetry" | tr '[:upper:]' '[:lower:]')
-  if [[ "$telemetry" == "yes" || "$telemetry" == "y" ]]; then
-      export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
-      export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
-      export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
-      telemetry_flag=true
-  else
-      telemetry_flag=false
-  fi
+  export LOGFLAG=false
+fi
+
+# Query for enabling OpenTelemetry Tracing Endpoint
+read -p "Enable OpenTelemetry Tracing Endpoint? (yes/no): " telemetry && telemetry=$(echo "$telemetry" | tr '[:upper:]' '[:lower:]')
+if [[ "$telemetry" == "yes" || "$telemetry" == "y" ]]; then
+    export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
+    export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
+    export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
+    telemetry_flag=true
+    pushd "grafana/dashboards" > /dev/null
+    source download_opea_dashboard.sh
+    popd > /dev/null
+else
+    telemetry_flag=false
 fi

 # Generate the .env file
@@ -92,7 +78,7 @@ cat <<EOF > .env
 # Set all required ENV values
 export TAG=${TAG}
 export EMBEDDING_MODEL_ID=${EMBEDDING_MODEL_ID}
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
 export RERANK_MODEL_ID=${RERANK_MODEL_ID}
 export LLM_MODEL_ID=${LLM_MODEL_ID}
 export INDEX_NAME=${INDEX_NAME}
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env_faqgen.sh
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env_faqgen.sh
@@ -1,32 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-pushd "../../../../../" > /dev/null
-source .set_env.sh
-popd > /dev/null
-
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
-export HF_TOKEN=${HF_TOKEN}
-export host_ip=${ip_address}
-export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export RERANK_MODEL_ID="BAAI/bge-reranker-base"
-export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
-export INDEX_NAME="rag-redis"
-export NUM_CARDS=1
-export VLLM_SKIP_WARMUP=true
-export LOGFLAG=True
-export http_proxy=${http_proxy}
-export https_proxy=${https_proxy}
-export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-gaudi-backend-server,chatqna-gaudi-ui-server,chatqna-gaudi-nginx-server"
-
-export LLM_ENDPOINT_PORT=8010
-export LLM_SERVER_PORT=9001
-export CHATQNA_BACKEND_PORT=8888
-export CHATQNA_REDIS_VECTOR_PORT=6377
-export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
-export CHATQNA_FRONTEND_SERVICE_PORT=5175
-export NGINX_PORT=80
-export FAQGen_COMPONENT_NAME="OpeaFaqGenvLLM"
-export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
--- a/ChatQnA/docker_compose/nvidia/gpu/compose.yaml
+++ b/ChatQnA/docker_compose/nvidia/gpu/compose.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -76,7 +76,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
@@ -98,7 +98,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    ipc: host
--- a/ChatQnA/tests/README.md
+++ b/ChatQnA/tests/README.md
@@ -1,123 +0,0 @@
-# ChatQnA E2E test scripts
-
-## Set the required environment variable
-
-```bash
-export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
-```
-
-## Run test
-
-On Intel Xeon with TGI:
-
-```bash
-bash test_compose_tgi_on_xeon.sh
-```
-
-On Intel Xeon with vLLM:
-
-```bash
-bash test_compose_on_xeon.sh
-```
-
-On Intel Xeon with MariaDB Vector:
-
-```bash
-bash test_compose_mariadb_on_xeon.sh
-```
-
-On Intel Xeon with Pinecone:
-
-```bash
-bash test_compose_pinecone_on_xeon.sh
-```
-
-On Intel Xeon with Milvus
-
-```bash
-bash test_compose_milvus_on_xeon.sh
-```
-
-On Intel Xeon with Qdrant
-
-```bash
-bash test_compose_qdrant_on_xeon.sh
-```
-
-On Intel Xeon without Rerank:
-
-```bash
-bash test_compose_without_rerank_on_xeon.sh
-```
-
-On Intel Gaudi with TGI:
-
-```bash
-bash test_compose_tgi_on_gaudi.sh
-```
-
-On Intel Gaudi with vLLM:
-
-```bash
-bash test_compose_on_gaudi.sh
-```
-
-On Intel Gaudi with Guardrails:
-
-```bash
-bash test_compose_guardrails_on_gaudi.sh
-```
-
-On Intel Gaudi without Rerank:
-
-```bash
-bash test_compose_without_rerank_on_gaudi.sh
-```
-
-On AMD ROCm with TGI:
-
-```bash
-bash test_compose_on_rocm.sh
-```
-
-On AMD ROCm with vLLM:
-
-```bash
-bash test_compose_vllm_on_rocm.sh
-```
-
-Test FAQ Generation On Intel Xeon with TGI:
-
-```bash
-bash test_compose_faqgen_tgi_on_xeon.sh
-```
-
-Test FAQ Generation On Intel Xeon with vLLM:
-
-```bash
-bash test_compose_faqgen_on_xeon.sh
-```
-
-Test FAQ Generation On Intel Gaudi with TGI:
-
-```bash
-bash test_compose_faqgen_tgi_on_gaudi.sh
-```
-
-Test FAQ Generation On Intel Gaudi with vLLM:
-
-```bash
-bash test_compose_faqgen_on_gaudi.sh
-```
-
-Test FAQ Generation On AMD ROCm with TGI:
-
-```bash
-bash test_compose_faqgen_on_rocm.sh
-```
-
-Test FAQ Generation On AMD ROCm with vLLM:
-
-```bash
-bash test_compose_faqgen_vllm_on_rocm.sh
-```
--- a/ChatQnA/tests/test_compose_faqgen_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_faqgen_on_gaudi.sh
@@ -36,7 +36,27 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/intel/hpu/gaudi
-    source set_env_faqgen.sh
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+    export NUM_CARDS=1
+    export INDEX_NAME="rag-redis"
+    export host_ip=${ip_address}
+    export LLM_ENDPOINT_PORT=8010
+    export LLM_SERVER_PORT=9001
+    export CHATQNA_BACKEND_PORT=8888
+    export CHATQNA_REDIS_VECTOR_PORT=6377
+    export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
+    export CHATQNA_FRONTEND_SERVICE_PORT=5175
+    export NGINX_PORT=80
+    export FAQGen_COMPONENT_NAME="OpeaFaqGenvLLM"
+    export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
+    export HF_TOKEN=${HF_TOKEN}
+    export VLLM_SKIP_WARMUP=true
+    export LOGFLAG=True
+    export http_proxy=${http_proxy}
+    export https_proxy=${https_proxy}
+    export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-gaudi-backend-server,chatqna-gaudi-ui-server,chatqna-gaudi-nginx-server"

    # Start Docker Containers
    docker compose -f compose_faqgen.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
--- a/ChatQnA/tests/test_compose_faqgen_on_rocm.sh
+++ b/ChatQnA/tests/test_compose_faqgen_on_rocm.sh
@@ -15,7 +15,44 @@ WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')

-source $WORKPATH/docker_compose/amd/gpu/rocm/set_env_faqgen.sh
+export HOST_IP=${ip_address}
+export HOST_IP_EXTERNAL=${ip_address}
+
+export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
+
+export CHATQNA_BACKEND_SERVICE_PORT=8888
+export CHATQNA_FRONTEND_SERVICE_PORT=5173
+export CHATQNA_LLM_FAQGEN_PORT=18011
+export CHATQNA_NGINX_PORT=80
+export CHATQNA_REDIS_DATAPREP_PORT=18103
+export CHATQNA_REDIS_RETRIEVER_PORT=7000
+export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
+export CHATQNA_REDIS_VECTOR_PORT=6379
+export CHATQNA_TEI_EMBEDDING_PORT=18090
+export CHATQNA_TEI_RERANKING_PORT=18808
+export CHATQNA_TGI_SERVICE_PORT=18008
+
+export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
+export CHATQNA_BACKEND_SERVICE_IP=${HOST_IP}
+export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
+export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
+export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
+export CHATQNA_EMBEDDING_SERVICE_HOST_IP=${HOST_IP}
+export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
+export CHATQNA_LLM_SERVICE_HOST_IP=${HOST_IP}
+export CHATQNA_LLM_ENDPOINT="http://${HOST_IP}:${CHATQNA_TGI_SERVICE_PORT}"
+export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
+export CHATQNA_REDIS_URL="redis://${HOST_IP}:${CHATQNA_REDIS_VECTOR_PORT}"
+export CHATQNA_RERANK_SERVICE_HOST_IP=${HOST_IP}
+export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
+export CHATQNA_TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${CHATQNA_TEI_EMBEDDING_PORT}"
+
+export CHATQNA_BACKEND_SERVICE_NAME=chatqna
+export CHATQNA_INDEX_NAME="rag-redis"
+export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi"

 export PATH="~/miniconda3/bin:$PATH"

--- a/ChatQnA/tests/test_compose_faqgen_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_faqgen_on_xeon.sh
@@ -24,7 +24,7 @@ function build_docker_images() {
    docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
    popd && sleep 1s
    git clone https://github.com/vllm-project/vllm.git && cd vllm
-    VLLM_VER="v0.8.3"
+    VLLM_VER=v0.9.0
    echo "Check out vLLM tag ${VLLM_VER}"
    git checkout ${VLLM_VER} &> /dev/null && cd ../

@@ -37,16 +37,26 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+    export INDEX_NAME="rag-redis"
+    export host_ip=${ip_address}
+    export LLM_ENDPOINT_PORT=8010
    export LLM_SERVER_PORT=9001
+    export CHATQNA_BACKEND_PORT=8888
    export CHATQNA_REDIS_VECTOR_PORT=6377
    export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
    export CHATQNA_FRONTEND_SERVICE_PORT=5175
+    export NGINX_PORT=80
+    export FAQGen_COMPONENT_NAME="OpeaFaqGenvLLM"
+    export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
+    export HF_TOKEN=${HF_TOKEN}
    export VLLM_SKIP_WARMUP=true
    export LOGFLAG=True
    export http_proxy=${http_proxy}
    export https_proxy=${https_proxy}
    export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-xeon-backend-server,chatqna-xeon-ui-server,chatqna-xeon-nginx-server"
-    source set_env.sh

    # Start Docker Containers
    docker compose -f compose_faqgen.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
--- a/ChatQnA/tests/test_compose_faqgen_tgi_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_faqgen_tgi_on_gaudi.sh
@@ -33,8 +33,25 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/intel/hpu/gaudi
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+    export INDEX_NAME="rag-redis"
+    export host_ip=${ip_address}
+    export LLM_ENDPOINT_PORT=8010
+    export LLM_SERVER_PORT=9001
+    export CHATQNA_BACKEND_PORT=8888
+    export CHATQNA_REDIS_VECTOR_PORT=6377
+    export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
+    export CHATQNA_FRONTEND_SERVICE_PORT=5175
+    export NGINX_PORT=80
    export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi"
-    source set_env_faqgen.sh
+    export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
+    export HF_TOKEN=${HF_TOKEN}
+    export LOGFLAG=True
+    export http_proxy=${http_proxy}
+    export https_proxy=${https_proxy}
+    export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-gaudi-backend-server,chatqna-gaudi-ui-server,chatqna-gaudi-nginx-server"

    # Start Docker Containers
    docker compose -f compose_faqgen_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
--- a/ChatQnA/tests/test_compose_faqgen_tgi_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_faqgen_tgi_on_xeon.sh
@@ -24,7 +24,7 @@ function build_docker_images() {
    docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
    popd && sleep 1s
    git clone https://github.com/vllm-project/vllm.git && cd vllm
-    VLLM_VER="v0.8.3"
+    VLLM_VER=v0.9.0
    echo "Check out vLLM tag ${VLLM_VER}"
    git checkout ${VLLM_VER} &> /dev/null && cd ../

@@ -37,16 +37,25 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+    export INDEX_NAME="rag-redis"
+    export host_ip=${ip_address}
+    export LLM_ENDPOINT_PORT=8010
    export LLM_SERVER_PORT=9001
+    export CHATQNA_BACKEND_PORT=8888
    export CHATQNA_REDIS_VECTOR_PORT=6377
    export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
    export CHATQNA_FRONTEND_SERVICE_PORT=5175
+    export NGINX_PORT=80
    export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi"
+    export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
+    export HF_TOKEN=${HF_TOKEN}
    export LOGFLAG=True
    export http_proxy=${http_proxy}
    export https_proxy=${https_proxy}
    export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-xeon-backend-server,chatqna-xeon-ui-server,chatqna-xeon-nginx-server"
-    source set_env.sh

    # Start Docker Containers
    docker compose -f compose_faqgen_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
--- a/ChatQnA/tests/test_compose_faqgen_vllm_on_rocm.sh
+++ b/ChatQnA/tests/test_compose_faqgen_vllm_on_rocm.sh
@@ -14,7 +14,41 @@ WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')

-source $WORKPATH/docker_compose/amd/gpu/rocm/set_env_faqgen_vllm.sh
+export HOST_IP=${ip_address}
+export HOST_IP_EXTERNAL=${ip_address}
+
+export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
+
+export CHATQNA_BACKEND_SERVICE_PORT=8888
+export CHATQNA_FRONTEND_SERVICE_PORT=5173
+export CHATQNA_LLM_FAQGEN_PORT=18011
+export CHATQNA_NGINX_PORT=80
+export CHATQNA_REDIS_DATAPREP_PORT=18103
+export CHATQNA_REDIS_RETRIEVER_PORT=7000
+export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
+export CHATQNA_REDIS_VECTOR_PORT=6379
+export CHATQNA_TEI_EMBEDDING_PORT=18090
+export CHATQNA_TEI_RERANKING_PORT=18808
+export CHATQNA_VLLM_SERVICE_PORT=18008
+
+export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
+export CHATQNA_BACKEND_SERVICE_IP=${HOST_IP_EXTERNAL}
+export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
+export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
+export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
+export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
+export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
+export CHATQNA_REDIS_URL="redis://${HOST_IP}:${CHATQNA_REDIS_VECTOR_PORT}"
+export CHATQNA_TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${CHATQNA_TEI_EMBEDDING_PORT}"
+export LLM_ENDPOINT="http://${HOST_IP}:${CHATQNA_VLLM_SERVICE_PORT}"
+
+export CHATQNA_BACKEND_SERVICE_NAME=chatqna
+export CHATQNA_INDEX_NAME="rag-redis"
+export CHATQNA_TYPE="CHATQNA_FAQGEN"
+export FAQGen_COMPONENT_NAME="OpeaFaqGenvLLM"

 function build_docker_images() {
    opea_branch=${opea_branch:-"main"}
--- a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-set -xe
+set -e
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -36,8 +36,14 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/intel/hpu/gaudi
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+    export NUM_CARDS=1
+    export INDEX_NAME="rag-redis"
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export host_ip=${ip_address}
    export GURADRAILS_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
-    source set_env_faqgen.sh

    # Start Docker Containers
    docker compose -f compose_guardrails.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
--- a/ChatQnA/tests/test_compose_mariadb_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_mariadb_on_xeon.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2025 MariaDB Foundation
 # SPDX-License-Identifier: Apache-2.0

-set -xe
+set -e
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -24,7 +24,7 @@ function build_docker_images() {
    docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
    popd && sleep 1s
    git clone https://github.com/vllm-project/vllm.git && cd vllm
-    VLLM_VER="v0.8.3"
+    VLLM_VER=v0.9.0
    echo "Check out vLLM tag ${VLLM_VER}"
    git checkout ${VLLM_VER} &> /dev/null
    # make sure NOT change the pwd
@@ -39,8 +39,14 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon
+    export MARIADB_DATABASE="vectordb"
+    export MARIADB_USER="chatqna"
    export MARIADB_PASSWORD="test"
-    source set_env_mariadb.sh
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export host_ip=${ip_address}

    # Start Docker Containers
    docker compose -f compose_mariadb.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
@@ -134,7 +140,7 @@ function validate_megaservice() {

 function stop_docker() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon
-    docker compose -f compose_mariadb.yaml down
+    docker compose down
 }

 function main() {
--- a/ChatQnA/tests/test_compose_milvus_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_milvus_on_xeon.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-set -xe
+set -e
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -25,7 +25,7 @@ function build_docker_images() {
    docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
    popd && sleep 1s
    git clone https://github.com/vllm-project/vllm.git && cd vllm
-    VLLM_VER="v0.8.3"
+    VLLM_VER=v0.9.0
    echo "Check out vLLM tag ${VLLM_VER}"
    git checkout ${VLLM_VER} &> /dev/null
    # make sure NOT change the pwd
@@ -39,8 +39,11 @@ function build_docker_images() {
 }
 function start_services() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon/
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
    export LOGFLAG=true
-    source set_env.sh

    # Start Docker Containers
    docker compose -f compose_milvus.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
--- a/ChatQnA/tests/test_compose_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_on_gaudi.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-set -xe
+set -e
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -36,10 +36,16 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/intel/hpu/gaudi
-    export NON_INTERACTIVE=true
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+    export NUM_CARDS=1
+    export INDEX_NAME="rag-redis"
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
    export host_ip=${ip_address}
-    export telemetry=yes
-    source set_env.sh
+    export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
+    export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
+    export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces

    # Start Docker Containers
    docker compose -f compose.yaml -f compose.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
--- a/ChatQnA/tests/test_compose_on_rocm.sh
+++ b/ChatQnA/tests/test_compose_on_rocm.sh
@@ -15,7 +15,41 @@ WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')

-source $WORKPATH/docker_compose/amd/gpu/rocm/set_env.sh
+export HOST_IP=${ip_address}
+export HOST_IP_EXTERNAL=${ip_address}
+
+export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
+
+export CHATQNA_BACKEND_SERVICE_PORT=8888
+export CHATQNA_FRONTEND_SERVICE_PORT=5173
+export CHATQNA_NGINX_PORT=80
+export CHATQNA_REDIS_DATAPREP_PORT=18103
+export CHATQNA_REDIS_RETRIEVER_PORT=7000
+export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
+export CHATQNA_REDIS_VECTOR_PORT=6379
+export CHATQNA_TEI_EMBEDDING_PORT=18090
+export CHATQNA_TEI_RERANKING_PORT=18808
+export CHATQNA_TGI_SERVICE_PORT=18008
+
+export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
+export CHATQNA_BACKEND_SERVICE_IP=${HOST_IP}
+export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
+export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
+export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
+export CHATQNA_EMBEDDING_SERVICE_HOST_IP=${HOST_IP}
+export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
+export CHATQNA_LLM_SERVICE_HOST_IP=${HOST_IP}
+export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
+export CHATQNA_REDIS_URL="redis://${HOST_IP}:${CHATQNA_REDIS_VECTOR_PORT}"
+export CHATQNA_RERANK_SERVICE_HOST_IP=${HOST_IP}
+export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
+export CHATQNA_TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${CHATQNA_TEI_EMBEDDING_PORT}"
+
+export CHATQNA_BACKEND_SERVICE_NAME=chatqna
+export CHATQNA_INDEX_NAME="rag-redis"

 export PATH="~/miniconda3/bin:$PATH"

--- a/ChatQnA/tests/test_compose_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_on_xeon.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-set -xe
+set -e
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -24,7 +24,7 @@ function build_docker_images() {
    docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
    popd && sleep 1s
    git clone https://github.com/vllm-project/vllm.git && cd vllm
-    VLLM_VER="v0.8.3"
+    VLLM_VER=v0.9.0
    echo "Check out vLLM tag ${VLLM_VER}"
    git checkout ${VLLM_VER} &> /dev/null
    # make sure NOT change the pwd
@@ -40,7 +40,15 @@ function build_docker_images() {
 function start_services() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon

-    source set_env.sh
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+    export INDEX_NAME="rag-redis"
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export host_ip=${ip_address}
+    export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
+    export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
+    export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces

    # Start Docker Containers
    docker compose -f compose.yaml -f compose.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
--- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-set -xe
+set -e
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -25,7 +25,7 @@ function build_docker_images() {
    docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
    popd && sleep 1s
    git clone https://github.com/vllm-project/vllm.git && cd vllm
-    VLLM_VER="v0.8.3"
+    VLLM_VER=v0.9.0
    echo "Check out vLLM tag ${VLLM_VER}"
    git checkout ${VLLM_VER} &> /dev/null
    # Not change the pwd
@@ -41,11 +41,14 @@ function build_docker_images() {
 function start_services() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon/
    export no_proxy=${no_proxy},${ip_address}
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
    export PINECONE_API_KEY=${PINECONE_KEY_LANGCHAIN_TEST}
    export PINECONE_INDEX_NAME="langchain-test"
    export INDEX_NAME="langchain-test"
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
    export LOGFLAG=true
-    source set_env.sh

    # Start Docker Containers
    docker compose -f compose_pinecone.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
--- a/ChatQnA/tests/test_compose_qdrant_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_qdrant_on_xeon.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-set -xe
+set -e
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -24,7 +24,7 @@ function build_docker_images() {
    docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
    popd && sleep 1s
    git clone https://github.com/vllm-project/vllm.git && cd vllm
-    VLLM_VER="v0.8.3"
+    VLLM_VER=v0.9.0
    echo "Check out vLLM tag ${VLLM_VER}"
    git checkout ${VLLM_VER} &> /dev/null
    # Not change the pwd
@@ -40,8 +40,11 @@ function build_docker_images() {
 function start_services() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon

+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
    export INDEX_NAME="rag-qdrant"
-    source set_env.sh
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}

    sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

--- a/ChatQnA/tests/test_compose_tgi_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_tgi_on_gaudi.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-set -xe
+set -e
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -32,10 +32,15 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/intel/hpu/gaudi
-    export NON_INTERACTIVE=true
-    export host_ip=${ip_address}
-    export telemetry=yes
-    source set_env.sh
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+    export NUM_CARDS=1
+    export INDEX_NAME="rag-redis"
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
+    export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
+    export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces

    # Start Docker Containers
    docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
--- a/ChatQnA/tests/test_compose_tgi_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_tgi_on_xeon.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-set -xe
+set -e
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -33,7 +33,14 @@ function build_docker_images() {
 function start_services() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon

-    source set_env.sh
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+    export INDEX_NAME="rag-redis"
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
+    export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
+    export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces

    # Start Docker Containers
    docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
--- a/ChatQnA/tests/test_compose_vllm_on_rocm.sh
+++ b/ChatQnA/tests/test_compose_vllm_on_rocm.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-set -xe
+set -e
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -14,7 +14,42 @@ WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')

-source $WORKPATH/docker_compose/amd/gpu/rocm/set_env_vllm.sh
+export HOST_IP=${ip_address}
+export HOST_IP_EXTERNAL=${ip_address}
+
+export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
+
+export CHATQNA_BACKEND_SERVICE_PORT=8888
+export CHATQNA_FRONTEND_SERVICE_PORT=5173
+export CHATQNA_NGINX_PORT=80
+export CHATQNA_REDIS_DATAPREP_PORT=18103
+export CHATQNA_REDIS_RETRIEVER_PORT=7000
+export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
+export CHATQNA_REDIS_VECTOR_PORT=6379
+export CHATQNA_TEI_EMBEDDING_PORT=18090
+export CHATQNA_TEI_RERANKING_PORT=18808
+export CHATQNA_VLLM_SERVICE_PORT=18008
+
+export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
+export CHATQNA_BACKEND_SERVICE_IP=${HOST_IP_EXTERNAL}
+export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
+export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
+export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
+export CHATQNA_EMBEDDING_SERVICE_HOST_IP=${HOST_IP}
+export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
+export CHATQNA_LLM_SERVICE_HOST_IP=${HOST_IP}
+export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
+export CHATQNA_REDIS_URL="redis://${HOST_IP}:${CHATQNA_REDIS_VECTOR_PORT}"
+export CHATQNA_RERANK_SERVICE_HOST_IP=${HOST_IP}
+export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
+export CHATQNA_TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${CHATQNA_TEI_EMBEDDING_PORT}"
+
+export CHATQNA_BACKEND_SERVICE_NAME=chatqna
+export CHATQNA_INDEX_NAME="rag-redis"
+

 function build_docker_images() {
    opea_branch=${opea_branch:-"main"}
--- a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-set -xe
+set -e
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -36,8 +36,11 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/intel/hpu/gaudi
-    export NON_INTERACTIVE=true
-    source set_env.sh
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+    export NUM_CARDS=1
+    export INDEX_NAME="rag-redis"
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}

    # Start Docker Containers
    docker compose -f compose_without_rerank.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
--- a/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-set -xe
+set -e
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -25,7 +25,7 @@ function build_docker_images() {
    popd && sleep 1s
    git clone https://github.com/vllm-project/vllm.git && cd vllm

-    VLLM_VER="v0.8.3"
+    VLLM_VER=v0.9.0
    echo "Check out vLLM tag ${VLLM_VER}"
    git checkout ${VLLM_VER} &> /dev/null
    # Not change the pwd
@@ -41,7 +41,10 @@ function build_docker_images() {
 function start_services() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon

-    source set_env.sh
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+    export INDEX_NAME="rag-redis"
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}

    # Start Docker Containers
    docker compose -f compose_without_rerank.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
--- a/CodeGen/Dockerfile
+++ b/CodeGen/Dockerfile
@@ -1,9 +1,8 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-ARG IMAGE_REPO=opea
 ARG BASE_TAG=latest
-FROM $IMAGE_REPO/comps-base:$BASE_TAG
+FROM opea/comps-base:$BASE_TAG

 COPY ./codegen.py $HOME/codegen.py

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
xiguiw	a03feb700b	Merge branch 'main' into update_vLLM	2025-05-16 11:18:10 +08:00
xiguiw	94222d5783	Merge branch 'main' into update_vLLM	2025-05-16 09:04:30 +08:00
CICD-at-OPEA	274af9eabc	Update vLLM version to v0.9.0 Signed-off-by: CICD-at-OPEA <CICD@opea.dev>	2025-05-15 22:41:49 +00:00
CICD-at-OPEA	238fb52a92	Update vLLM version to v0.8.5 Signed-off-by: CICD-at-OPEA <CICD@opea.dev>	2025-05-13 22:42:16 +00:00
Ying Hu	4a17638b5c	Merge branch 'main' into update_vLLM	2025-05-13 16:00:56 +08:00
CICD-at-OPEA	2160d43a32	Update vLLM version to v0.8.5 Signed-off-by: CICD-at-OPEA <CICD@opea.dev>	2025-05-08 08:37:52 +00:00