update secrets token name

Signed-off-by: ZePan110 <ze.pan@intel.com>
Integrate CodeGen set_env to ut scripts. (#1976 )
2025-05-21 17:12:00 +08:00 · 2025-05-21 12:58:07 +08:00
145 changed files with 445 additions and 519 deletions
--- a/.github/workflows/_gmc-e2e.yml
+++ b/.github/workflows/_gmc-e2e.yml
@@ -55,7 +55,7 @@ jobs:
      - name: Run tests
        id: run-test
        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HF_TOKEN }}
          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
        run: |
--- a/.github/workflows/_helm-e2e.yml
+++ b/.github/workflows/_helm-e2e.yml
@@ -165,8 +165,8 @@ jobs:
        env:
          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
-          HFTOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HF_TOKEN }}
+          HFTOKEN: ${{ secrets.HF_TOKEN }}
          value_file: ${{ matrix.value_file }}
        run: |
          set -xe
--- a/.github/workflows/_run-docker-compose.yml
+++ b/.github/workflows/_run-docker-compose.yml
@@ -160,8 +160,8 @@ jobs:
      - name: Run test
        shell: bash
        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
-          HF_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
          PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
--- a/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml
@@ -13,8 +13,8 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TGI_LLM_ENDPOINT: "http://${ip_address}:${TGI_SERVICE_PORT}"
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    shm_size: 32g
    devices:
      - /dev/kfd:/dev/kfd
@@ -42,7 +42,7 @@ services:
      with_memory: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: tgi
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -76,7 +76,7 @@ services:
      use_hints: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -104,7 +104,7 @@ services:
      with_memory: true
      recursion_limit: ${recursion_limit_supervisor}
      llm_engine: tgi
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
--- a/AgentQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
@@ -10,8 +10,8 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      WILM_USE_TRITON_FLASH_ATTENTION: 0
@@ -46,7 +46,7 @@ services:
      with_memory: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -80,7 +80,7 @@ services:
      use_hints: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -108,7 +108,7 @@ services:
      with_memory: true
      recursion_limit: ${recursion_limit_supervisor}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
--- a/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh
@@ -19,7 +19,7 @@ export CRAG_SERVER_PORT="18114"

 export WORKPATH=$(dirname "$PWD")
 export WORKDIR=${WORKPATH}/../../../
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export HF_CACHE_DIR="./data"
 export MODEL_CACHE="./data"
@@ -39,7 +39,7 @@ export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
 export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
 export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
 export HF_CACHE_DIR=${HF_CACHE_DIR}
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export no_proxy=${no_proxy}
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
--- a/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_vllm_rocm.sh
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_vllm_rocm.sh
@@ -19,7 +19,7 @@ export CRAG_SERVER_PORT="18114"

 export WORKPATH=$(dirname "$PWD")
 export WORKDIR=${WORKPATH}/../../../
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export VLLM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export HF_CACHE_DIR="./data"
 export MODEL_CACHE="./data"
@@ -40,7 +40,7 @@ export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
 export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
 export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
 export HF_CACHE_DIR=${HF_CACHE_DIR}
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export no_proxy=${no_proxy}
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
--- a/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_vllm_rocm.sh
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_vllm_rocm.sh
@@ -20,8 +20,8 @@ export CRAG_SERVER_PORT="18114"

 export WORKPATH=$(dirname "$PWD")
 export WORKDIR=${WORKPATH}/../../../
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export VLLM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export HF_CACHE_DIR="./data"
 export MODEL_CACHE="./data"
@@ -42,7 +42,7 @@ export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
 export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
 export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
 export HF_CACHE_DIR=${HF_CACHE_DIR}
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export no_proxy=${no_proxy}
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
--- a/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -33,7 +33,7 @@ fi
 # retriever
 export host_ip=$(hostname -I | awk '{print $1}')
 export HF_CACHE_DIR=${HF_CACHE_DIR}
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export no_proxy=${no_proxy}
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -16,7 +16,7 @@ services:
      with_memory: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -50,7 +50,7 @@ services:
      use_hints: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -79,7 +79,7 @@ services:
      with_memory: true
      recursion_limit: ${recursion_limit_supervisor}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -122,7 +122,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      LLM_MODEL_ID: ${LLM_MODEL_ID}
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -16,8 +16,8 @@ export ip_address=$(hostname -I | awk '{print $1}')
 # LLM related environment variables
 export HF_CACHE_DIR=${HF_CACHE_DIR}
 ls $HF_CACHE_DIR
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HF_TOKEN=${HF_TOKEN}
 export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
 export NUM_SHARDS=4
 export LLM_ENDPOINT_URL="http://${ip_address}:8086"
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml
@@ -13,7 +13,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/AgentQnA/retrieval_tool/launch_retrieval_tool.sh
+++ b/AgentQnA/retrieval_tool/launch_retrieval_tool.sh
@@ -3,7 +3,7 @@

 host_ip=$(hostname -I | awk '{print $1}')
 export HF_CACHE_DIR=${HF_CACHE_DIR}
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export no_proxy=${no_proxy}
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
--- a/AgentQnA/tests/step2_start_retrieval_tool_rocm_vllm.sh
+++ b/AgentQnA/tests/step2_start_retrieval_tool_rocm_vllm.sh
@@ -20,7 +20,7 @@ function start_retrieval_tool() {
    cd $WORKPATH/../DocIndexRetriever/docker_compose/intel/cpu/xeon
    host_ip=$(hostname -I | awk '{print $1}')
    export HF_CACHE_DIR=${HF_CACHE_DIR}
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
    export no_proxy=${no_proxy}
    export http_proxy=${http_proxy}
    export https_proxy=${https_proxy}
--- a/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh
+++ b/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh
@@ -11,8 +11,8 @@ export ip_address=$(hostname -I | awk '{print $1}')
 export host_ip=$ip_address
 echo "ip_address=${ip_address}"
 export TOOLSET_PATH=$WORKPATH/tools/
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+HF_TOKEN=${HF_TOKEN}
 model="meta-llama/Llama-3.3-70B-Instruct" #"meta-llama/Meta-Llama-3.1-70B-Instruct"

 export HF_CACHE_DIR=${model_cache:-"/data2/huggingface"}
--- a/AgentQnA/tests/test_compose_on_gaudi.sh
+++ b/AgentQnA/tests/test_compose_on_gaudi.sh
@@ -7,7 +7,7 @@ WORKPATH=$(dirname "$PWD")
 export WORKDIR=$WORKPATH/../../
 echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
 export no_proxy="$no_proxy,rag-agent-endpoint,sql-agent-endpoint,react-agent-endpoint,agent-ui,vllm-gaudi-server,jaeger,grafana,prometheus,127.0.0.1,localhost,0.0.0.0,$ip_address"
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
--- a/AgentQnA/tests/test_compose_on_rocm.sh
+++ b/AgentQnA/tests/test_compose_on_rocm.sh
@@ -9,7 +9,7 @@ ls $WORKPATH
 export WORKDIR=$WORKPATH/../../
 echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export TOOLSET_PATH=$WORKPATH/tools/
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
--- a/AgentQnA/tests/test_compose_vllm_on_rocm.sh
+++ b/AgentQnA/tests/test_compose_vllm_on_rocm.sh
@@ -8,7 +8,7 @@ WORKPATH=$(dirname "$PWD")
 export WORKDIR=${WORKPATH}/../../
 echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export TOOLSET_PATH=$WORKPATH/tools/
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
--- a/AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml
@@ -40,7 +40,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${LLM_MODEL_ID}
--- a/AudioQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
+++ b/AudioQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
@@ -35,8 +35,8 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      WILM_USE_TRITON_FLASH_ATTENTION: 0
--- a/AudioQnA/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/AudioQnA/docker_compose/amd/gpu/rocm/set_env.sh
@@ -7,7 +7,7 @@
 # export host_ip=<your External Public IP>    # export host_ip=$(hostname -I | awk '{print $1}')

 export host_ip=${ip_address}
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 # <token>

 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
--- a/AudioQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh
+++ b/AudioQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh
@@ -8,7 +8,7 @@

 export host_ip=${ip_address}
 export external_host_ip=${ip_address}
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export HF_CACHE_DIR="./data"
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export VLLM_SERVICE_PORT="8081"
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -36,7 +36,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
      LLM_SERVER_PORT: ${LLM_SERVER_PORT}
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
@@ -40,7 +40,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
      LLM_SERVER_PORT: ${LLM_SERVER_PORT}
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
@@ -36,7 +36,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LLM_SERVER_PORT: ${LLM_SERVER_PORT}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"]
--- a/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -5,7 +5,7 @@

 # export host_ip=<your External Public IP>
 export host_ip=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 # <token>

 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -45,7 +45,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
@@ -45,7 +45,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -5,7 +5,7 @@

 # export host_ip=<your External Public IP>
 export host_ip=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 # <token>

 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
--- a/AvatarChatbot/docker_compose/amd/gpu/rocm/README.md
+++ b/AvatarChatbot/docker_compose/amd/gpu/rocm/README.md
@@ -68,7 +68,7 @@ Then run the command `docker images`, you will have following images ready:
 Before starting the services with `docker compose`, you have to recheck the following environment variables.

 ```bash
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export host_ip=$(hostname -I | awk '{print $1}')

 export TGI_SERVICE_PORT=3006
--- a/AvatarChatbot/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/AvatarChatbot/docker_compose/amd/gpu/rocm/compose.yaml
@@ -52,8 +52,8 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    shm_size: 1g
    devices:
      - /dev/kfd:/dev/kfd
--- a/AvatarChatbot/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/AvatarChatbot/docker_compose/amd/gpu/rocm/set_env.sh
@@ -3,7 +3,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export OPENAI_API_KEY=${OPENAI_API_KEY}
 export host_ip=$(hostname -I | awk '{print $1}')

--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
@@ -37,7 +37,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://${host_ip}:3006/health || exit 1"]
      interval: 10s
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/set_env.sh
@@ -6,7 +6,7 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export host_ip=$(hostname -I | awk '{print $1}')
 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
 export WAV2LIP_ENDPOINT=http://$host_ip:7860
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -48,7 +48,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -6,7 +6,7 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export host_ip=$(hostname -I | awk '{print $1}')

 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
--- a/ChatQnA/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/set_env.sh
@@ -6,7 +6,7 @@ export HOST_IP=${ip_address}
 export HOST_IP_EXTERNAL=${ip_address}

 export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
 export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"

--- a/ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen.sh
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen.sh
@@ -6,7 +6,7 @@ export HOST_IP=${ip_address}
 export HOST_IP_EXTERNAL=${ip_address}

 export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
 export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"

--- a/ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen_vllm.sh
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen_vllm.sh
@@ -6,7 +6,7 @@ export HOST_IP=${ip_address}
 export HOST_IP_EXTERNAL=${ip_address}

 export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
 export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"

--- a/ChatQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh
@@ -6,7 +6,7 @@ export HOST_IP=${ip_address}
 export HOST_IP_EXTERNAL=${ip_address}

 export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
 export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"

--- a/ChatQnA/docker_compose/intel/cpu/aipc/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/README.md
@@ -183,7 +183,7 @@ export https_proxy=${your_http_proxy}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export INDEX_NAME="rag-redis"
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export OLLAMA_HOST=${host_ip}
 export OLLAMA_MODEL="llama3.2"
 ```
@@ -194,7 +194,7 @@ export OLLAMA_MODEL="llama3.2"
 set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
 set RERANK_MODEL_ID=BAAI/bge-reranker-base
 set INDEX_NAME=rag-redis
-set HUGGINGFACEHUB_API_TOKEN=%HUGGINGFACEHUB_API_TOKEN%
+set HUGGINGFACEHUB_API_TOKEN=%HF_TOKEN%
 set OLLAMA_HOST=host.docker.internal
 set OLLAMA_MODEL="llama3.2"
 ```
--- a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -60,7 +60,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -76,7 +76,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
--- a/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh
@@ -9,7 +9,7 @@ popd > /dev/null

 export host_ip=$(hostname -I | awk '{print $1}')

-if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then
+if [ -z "${HF_TOKEN}" ]; then
    echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN."
 fi

@@ -17,7 +17,7 @@ if [ -z "${host_ip}" ]; then
    echo "Error: host_ip is not set. Please set host_ip first."
 fi

-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export INDEX_NAME="rag-redis"
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -31,7 +31,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -67,7 +67,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -83,7 +83,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
@@ -99,7 +99,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
      VLLM_CPU_KVCACHE_SPACE: 40
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -60,7 +60,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -76,7 +76,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen_tgi.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen_tgi.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -60,7 +60,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -76,7 +76,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_mariadb.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_mariadb.yaml
@@ -35,7 +35,7 @@ services:
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MARIADBVECTOR"
      MARIADB_CONNECTION_URL: mariadb+mariadbconnector://${MARIADB_USER}:${MARIADB_PASSWORD}@mariadb-server:3306/${MARIADB_DATABASE}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -69,7 +69,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      MARIADB_CONNECTION_URL: mariadb+mariadbconnector://${MARIADB_USER}:${MARIADB_PASSWORD}@mariadb-server:3306/${MARIADB_DATABASE}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_MARIADBVECTOR"
    restart: unless-stopped
@@ -85,7 +85,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
@@ -101,7 +101,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
      VLLM_CPU_KVCACHE_SPACE: 40
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml
@@ -75,7 +75,7 @@ services:
      MILVUS_HOST: ${host_ip}
      MILVUS_PORT: 19530
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
      LOGFLAG: ${LOGFLAG}
    healthcheck:
@@ -107,7 +107,7 @@ services:
      MILVUS_HOST: ${host_ip}
      MILVUS_PORT: 19530
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_MILVUS"
    restart: unless-stopped
@@ -138,7 +138,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
@@ -155,7 +155,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
    healthcheck:
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml
@@ -20,7 +20,7 @@ services:
      PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_PINECONE"
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
@@ -55,7 +55,7 @@ services:
      PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME}
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_PINECONE"
    restart: unless-stopped
@@ -71,7 +71,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
@@ -87,7 +87,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
    command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
@@ -24,7 +24,7 @@ services:
      QDRANT_PORT: 6333
      QDRANT_INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_QDRANT"
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
@@ -76,7 +76,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
@@ -92,7 +92,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
    command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml
@@ -31,7 +31,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
  tei-embedding-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
    container_name: tei-embedding-server
@@ -61,7 +61,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -77,7 +77,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -60,7 +60,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -76,7 +76,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
@@ -92,7 +92,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -60,7 +60,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -76,7 +76,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
    command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
--- a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -7,7 +7,7 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export HF_TOKEN=${HF_TOKEN}
 export host_ip=${ip_address}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
--- a/ChatQnA/docker_compose/intel/cpu/xeon/set_env_mariadb.sh
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/set_env_mariadb.sh
@@ -7,7 +7,7 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then
+if [ -z "${HF_TOKEN}" ]; then
    echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN."
 fi

@@ -15,7 +15,7 @@ export host_ip=$(hostname -I | awk '{print $1}')
 export MARIADB_DATABASE="vectordb"
 export MARIADB_USER="chatqna"
 export MARIADB_PASSWORD="password"
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -31,7 +31,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -67,7 +67,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    restart: unless-stopped
  tei-reranking-service:
    image: ghcr.io/huggingface/tei-gaudi:1.5.0
@@ -101,7 +101,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      LLM_MODEL_ID: ${LLM_MODEL_ID}
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
@@ -61,7 +61,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    restart: unless-stopped
  tei-reranking-service:
    image: ghcr.io/huggingface/tei-gaudi:1.5.0
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen_tgi.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen_tgi.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
@@ -61,7 +61,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    restart: unless-stopped
  tei-reranking-service:
    image: ghcr.io/huggingface/tei-gaudi:1.5.0
@@ -95,7 +95,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -42,7 +42,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      GURADRAILS_MODEL_ID: ${GURADRAILS_MODEL_ID}
@@ -73,7 +73,7 @@ services:
      https_proxy: ${https_proxy}
      SAFETY_GUARD_MODEL_ID: ${GURADRAILS_MODEL_ID}
      SAFETY_GUARD_ENDPOINT: http://vllm-guardrails-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    restart: unless-stopped
  tei-embedding-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -104,7 +104,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -140,7 +140,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      LLM_MODEL_ID: ${LLM_MODEL_ID}
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -60,7 +60,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -96,7 +96,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -60,7 +60,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -75,7 +75,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      LLM_MODEL_ID: ${LLM_MODEL_ID}
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md
@@ -123,7 +123,7 @@ View the docker input parameters in `./ChatQnA/docker_compose/intel/hpu/gaudi/co
    environment:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      ENABLE_HPU_GRAPH: true
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -92,7 +92,7 @@ cat <<EOF > .env
 # Set all required ENV values
 export TAG=${TAG}
 export EMBEDDING_MODEL_ID=${EMBEDDING_MODEL_ID}
-export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export RERANK_MODEL_ID=${RERANK_MODEL_ID}
 export LLM_MODEL_ID=${LLM_MODEL_ID}
 export INDEX_NAME=${INDEX_NAME}
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env_faqgen.sh
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env_faqgen.sh
@@ -7,7 +7,7 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export HF_TOKEN=${HF_TOKEN}
 export host_ip=${ip_address}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
--- a/ChatQnA/docker_compose/nvidia/gpu/compose.yaml
+++ b/ChatQnA/docker_compose/nvidia/gpu/compose.yaml
@@ -24,7 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -76,7 +76,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
@@ -98,7 +98,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    ipc: host
--- a/CodeGen/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/CodeGen/docker_compose/amd/gpu/rocm/set_env.sh
@@ -5,14 +5,14 @@
 # SPDX-License-Identifier: Apache-2.0

 ### The IP address or domain name of the server on which the application is running
-export HOST_IP=''
-export EXTERNAL_HOST_IP=''
+export HOST_IP=${ip_address}
+export EXTERNAL_HOST_IP=${ip_address}

 ### The port of the TGI service. On this port, the TGI service will accept connections
 export CODEGEN_TGI_SERVICE_PORT=8028

 ### A token for accessing repositories with models
-export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}

 ### Model ID
 export CODEGEN_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
@@ -27,7 +27,7 @@ export CODEGEN_TGI_LLM_ENDPOINT="http://${HOST_IP}:${CODEGEN_TGI_SERVICE_PORT}"
 export CODEGEN_MEGA_SERVICE_HOST_IP=${HOST_IP}

 ### The port for CodeGen backend service
-export CODEGEN_BACKEND_SERVICE_PORT=18150
+export CODEGEN_BACKEND_SERVICE_PORT=7778

 ### The URL of CodeGen backend service, used by the frontend service
 export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
@@ -36,4 +36,4 @@ export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND
 export CODEGEN_LLM_SERVICE_HOST_IP=${HOST_IP}

 ### The CodeGen service UI port
-export CODEGEN_UI_SERVICE_PORT=18151
+export CODEGEN_UI_SERVICE_PORT=5173
--- a/CodeGen/docker_compose/amd/gpu/rocm/set_env_vllm.sh
+++ b/CodeGen/docker_compose/amd/gpu/rocm/set_env_vllm.sh
@@ -5,15 +5,15 @@
 # SPDX-License-Identifier: Apache-2.0

 ### The IP address or domain name of the server on which the application is running
-export HOST_IP=''
-export EXTERNAL_HOST_IP=''
+export HOST_IP=${ip_address}
+export EXTERNAL_HOST_IP=${ip_address}

 ### The port of the vLLM service. On this port, the TGI service will accept connections
 export CODEGEN_VLLM_SERVICE_PORT=8028
 export CODEGEN_VLLM_ENDPOINT="http://${HOST_IP}:${CODEGEN_VLLM_SERVICE_PORT}"

 ### A token for accessing repositories with models
-export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}

 ### Model ID
 export CODEGEN_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
@@ -25,7 +25,7 @@ export CODEGEN_LLM_SERVICE_PORT=9000
 export CODEGEN_MEGA_SERVICE_HOST_IP=${HOST_IP}

 ### The port for CodeGen backend service
-export CODEGEN_BACKEND_SERVICE_PORT=18150
+export CODEGEN_BACKEND_SERVICE_PORT=7778

 ### The URL of CodeGen backend service, used by the frontend service
 export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
@@ -34,4 +34,4 @@ export CODEGEN_BACKEND_SERVICE_URL="http://${EXTERNAL_HOST_IP}:${CODEGEN_BACKEND
 export CODEGEN_LLM_SERVICE_HOST_IP=${HOST_IP}

 ### The CodeGen service UI port
-export CODEGEN_UI_SERVICE_PORT=18151
+export CODEGEN_UI_SERVICE_PORT=5173
--- a/CodeGen/docker_compose/intel/cpu/xeon/README.md
+++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md
@@ -6,22 +6,10 @@ This README provides instructions for deploying the CodeGen application using Do

 - [Overview](#overview)
 - [Prerequisites](#prerequisites)
- [Quick Start](#quick-start)
- [Available Deployment Options](#available-deployment-options)
-  - [Default: vLLM-based Deployment (`--profile codegen-xeon-vllm`)](#default-vllm-based-deployment---profile-codegen-xeon-vllm)
-  - [TGI-based Deployment (`--profile codegen-xeon-tgi`)](#tgi-based-deployment---profile-codegen-xeon-tgi)
- [Configuration Parameters](#configuration-parameters)
-  - [Environment Variables](#environment-variables)
-  - [Compose Profiles](#compose-profiles)
+- [Quick Start Deployment](#quick-start-deployment)
 - [Building Custom Images (Optional)](#building-custom-images-optional)
 - [Validate Services](#validate-services)
-  - [Check Container Status](#check-container-status)
-  - [Run Validation Script/Commands](#run-validation-scriptcommands)
 - [Accessing the User Interface (UI)](#accessing-the-user-interface-ui)
-  - [Gradio UI (Default)](#gradio-ui-default)
-  - [Svelte UI (Optional)](#svelte-ui-optional)
-  - [React UI (Optional)](#react-ui-optional)
-  - [VS Code Extension (Optional)](#vs-code-extension-optional)
 - [Troubleshooting](#troubleshooting)
 - [Stopping the Application](#stopping-the-application)
 - [Next Steps](#next-steps)
@@ -43,38 +31,37 @@ This guide focuses on running the pre-configured CodeGen service using Docker Co
  cd GenAIExamples/CodeGen/docker_compose/intel/cpu/xeon
  ```

-## Quick Start
+## Quick Start Deployment

 This uses the default vLLM-based deployment profile (`codegen-xeon-vllm`).

 1.  **Configure Environment:**
    Set required environment variables in your shell:

-    ```bash
-    # Replace with your host's external IP address (do not use localhost or 127.0.0.1)
-    export host_ip="your_external_ip_address"
-    # Replace with your Hugging Face Hub API token
-    export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token"
+        ```bash
+        # Replace with your host's external IP address (do not use localhost or 127.0.0.1)
+        export HOST_IP="your_external_ip_address"
+        # Replace with your Hugging Face Hub API token
+        export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token"

-    # Optional: Configure proxy if needed
-    # export http_proxy="your_http_proxy"
-    # export https_proxy="your_https_proxy"
-    # export no_proxy="localhost,127.0.0.1,${host_ip}" # Add other hosts if necessary
-    source ../../../set_env.sh
-    ```
+        # Optional: Configure proxy if needed
+        # export http_proxy="your_http_proxy"
+        # export https_proxy="your_https_proxy"
+        # export no_proxy="localhost,127.0.0.1,${HOST_IP}" # Add other hosts if necessary
+        source ../../set_env.sh
+        ```

-    _Note: The compose file might read additional variables from set_env.sh. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._
-    like
+        _Note: The compose file might read additional variables from set_env.sh. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._

-    ```
-    export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
-    ```
+    For instance, edit the set_env.sh to change the LLM model

-    can be changed to small model if needed
-
-    ```
-    export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
-    ```
+        ```
+        export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
+        ```
+        can be changed to other model if needed
+        ```
+        export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
+        ```

 2.  **Start Services (vLLM Profile):**

@@ -85,45 +72,45 @@ This uses the default vLLM-based deployment profile (`codegen-xeon-vllm`).
 3.  **Validate:**
    Wait several minutes for models to download (especially the first time) and services to initialize. Check container logs (`docker compose logs -f <service_name>`) or proceed to the validation steps below.

-## Available Deployment Options
+### Available Deployment Options

 The `compose.yaml` file uses Docker Compose profiles to select the LLM serving backend.

-### Default: vLLM-based Deployment (`--profile codegen-xeon-vllm`)
+#### Default: vLLM-based Deployment (`--profile codegen-xeon-vllm`)

 - **Profile:** `codegen-xeon-vllm`
 - **Description:** Uses vLLM optimized for Intel CPUs as the LLM serving engine. This is the default profile used in the Quick Start.
 - **Services Deployed:** `codegen-vllm-server`, `codegen-llm-server`, `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`.

-### TGI-based Deployment (`--profile codegen-xeon-tgi`)
+#### TGI-based Deployment (`--profile codegen-xeon-tgi`)

 - **Profile:** `codegen-xeon-tgi`
 - **Description:** Uses Hugging Face Text Generation Inference (TGI) optimized for Intel CPUs as the LLM serving engine.
 - **Services Deployed:** `codegen-tgi-server`, `codegen-llm-server`, `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`.
 - **To Run:**
  ```bash
-  # Ensure environment variables (host_ip, HUGGINGFACEHUB_API_TOKEN) are set
+  # Ensure environment variables (HOST_IP, HUGGINGFACEHUB_API_TOKEN) are set
  docker compose --profile codegen-xeon-tgi up -d
  ```

-## Configuration Parameters
+### Configuration Parameters

-### Environment Variables
+#### Environment Variables

 Key parameters are configured via environment variables set before running `docker compose up`.

-| Environment Variable                    | Description                                                                                                         | Default (Set Externally)                                                                         |
-| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------------------------------- |
-| `host_ip`                               | External IP address of the host machine. **Required.**                                                              | `your_external_ip_address`                                                                       |
-| `HUGGINGFACEHUB_API_TOKEN`              | Your Hugging Face Hub token for model access. **Required.**                                                         | `your_huggingface_token`                                                                         |
-| `LLM_MODEL_ID`                          | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct`                                                                 |
-| `EMBEDDING_MODEL_ID`                    | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment.  | `BAAI/bge-base-en-v1.5`                                                                          |
-| `LLM_ENDPOINT`                          | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`.             | `http://codegen-tgi-server:80/generate` or `http://codegen-vllm-server:8000/v1/chat/completions` |
-| `TEI_EMBEDDING_ENDPOINT`                | Internal URL for the Embedding service. Configured in `compose.yaml`.                                               | `http://codegen-tei-embedding-server:80/embed`                                                   |
-| `DATAPREP_ENDPOINT`                     | Internal URL for the Data Preparation service. Configured in `compose.yaml`.                                        | `http://codegen-dataprep-server:80/dataprep`                                                     |
-| `BACKEND_SERVICE_ENDPOINT`              | External URL for the CodeGen Gateway (MegaService). Derived from `host_ip` and port `7778`.                         | `http://${host_ip}:7778/v1/codegen`                                                              |
-| `*_PORT` (Internal)                     | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`.                                           | N/A                                                                                              |
-| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required).                                                                               | `""`                                                                                             |
+| Environment Variable                    | Description                                                                                                         | Default (Set Externally)                       |
+| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :--------------------------------------------- | ------------------------------------ |
+| `HOST_IP`                               | External IP address of the host machine. **Required.**                                                              | `your_external_ip_address`                     |
+| `HUGGINGFACEHUB_API_TOKEN`              | Your Hugging Face Hub token for model access. **Required.**                                                         | `your_huggingface_token`                       |
+| `LLM_MODEL_ID`                          | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct`               |
+| `EMBEDDING_MODEL_ID`                    | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment.  | `BAAI/bge-base-en-v1.5`                        |
+| `LLM_ENDPOINT`                          | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`.             | `http://codegen-vllm                           | tgi-server:9000/v1/chat/completions` |
+| `TEI_EMBEDDING_ENDPOINT`                | Internal URL for the Embedding service. Configured in `compose.yaml`.                                               | `http://codegen-tei-embedding-server:80/embed` |
+| `DATAPREP_ENDPOINT`                     | Internal URL for the Data Preparation service. Configured in `compose.yaml`.                                        | `http://codegen-dataprep-server:80/dataprep`   |
+| `BACKEND_SERVICE_ENDPOINT`              | External URL for the CodeGen Gateway (MegaService). Derived from `HOST_IP` and port `7778`.                         | `http://${HOST_IP}:7778/v1/codegen`            |
+| `*_PORT` (Internal)                     | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`.                                           | N/A                                            |
+| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required).                                                                               | `""`                                           |

 Most of these parameters are in `set_env.sh`, you can either modify this file or overwrite the env variables by setting them.

@@ -131,7 +118,7 @@ Most of these parameters are in `set_env.sh`, you can either modify this file or
 source CodeGen/docker_compose/set_env.sh
 ```

-### Compose Profiles
+#### Compose Profiles

 Docker Compose profiles (`codegen-xeon-vllm`, `codegen-xeon-tgi`) control which LLM serving backend (vLLM or TGI) and its associated dependencies are started. Only one profile should typically be active.

@@ -161,23 +148,23 @@ Check logs for specific services: `docker compose logs <service_name>`

 ### Run Validation Script/Commands

-Use `curl` commands to test the main service endpoints. Ensure `host_ip` is correctly set in your environment.
+Use `curl` commands to test the main service endpoints. Ensure `HOST_IP` is correctly set in your environment.

 1.  **Validate LLM Serving Endpoint (Example for vLLM on default port 9000 internally, exposed differently):**

    ```bash
    # This command structure targets the OpenAI-compatible vLLM endpoint
-    curl http://${host_ip}:9000/v1/chat/completions \
+    curl http://${HOST_IP}:9000/v1/chat/completions \
       -X POST \
       -H 'Content-Type: application/json' \
-       -d '{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}'
+       -d '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}'
    ```

    - **Expected Output:** A JSON response with generated code in `choices[0].message.content`.

 2.  **Validate CodeGen Gateway (MegaService on default port 7778):**
    ```bash
-    curl http://${host_ip}:7778/v1/codegen \
+    curl http://${HOST_IP}:7778/v1/codegen \
      -H "Content-Type: application/json" \
      -d '{"messages": "Write a Python function that adds two numbers."}'
    ```
@@ -190,7 +177,7 @@ Multiple UI options can be configured via the `compose.yaml`.
 ### Gradio UI (Default)

 Access the default Gradio UI by navigating to:
-`http://{host_ip}:5173`
+`http://{HOST_IP}:5173`
 _(Port `5173` is the default host mapping for `codegen-gradio-ui-server`)_

 ![Gradio UI - Code Generation](../../../../assets/img/codegen_gradio_ui_main.png)
@@ -200,7 +187,7 @@ _(Port `5173` is the default host mapping for `codegen-gradio-ui-server`)_

 1.  Modify `compose.yaml`: Comment out the `codegen-gradio-ui-server` service and uncomment/add the `codegen-xeon-ui-server` (Svelte) service definition, ensuring the port mapping is correct (e.g., `"- 5173:5173"`).
 2.  Restart Docker Compose: `docker compose --profile <profile_name> up -d`
-3.  Access: `http://{host_ip}:5173` (or the host port you mapped).
+3.  Access: `http://{HOST_IP}:5173` (or the host port you mapped).

 ![Svelte UI Init](../../../../assets/img/codeGen_ui_init.jpg)

@@ -208,7 +195,7 @@ _(Port `5173` is the default host mapping for `codegen-gradio-ui-server`)_

 1.  Modify `compose.yaml`: Comment out the default UI service and uncomment/add the `codegen-xeon-react-ui-server` definition, ensuring correct port mapping (e.g., `"- 5174:80"`).
 2.  Restart Docker Compose: `docker compose --profile <profile_name> up -d`
-3.  Access: `http://{host_ip}:5174` (or the host port you mapped).
+3.  Access: `http://{HOST_IP}:5174` (or the host port you mapped).

 ![React UI](../../../../assets/img/codegen_react.png)

@@ -218,7 +205,7 @@ Users can interact with the backend service using the `Neural Copilot` VS Code e

 1.  **Install:** Find and install `Neural Copilot` from the VS Code Marketplace.
    ![Install Copilot](../../../../assets/img/codegen_copilot.png)
-2.  **Configure:** Set the "Service URL" in the extension settings to your CodeGen backend endpoint: `http://${host_ip}:7778/v1/codegen` (use the correct port if changed).
+2.  **Configure:** Set the "Service URL" in the extension settings to your CodeGen backend endpoint: `http://${HOST_IP}:7778/v1/codegen` (use the correct port if changed).
    ![Configure Endpoint](../../../../assets/img/codegen_endpoint.png)
 3.  **Usage:**
    - **Inline Suggestion:** Type a comment describing the code you want (e.g., `# Python function to read a file`) and wait for suggestions.
@@ -229,7 +216,7 @@ Users can interact with the backend service using the `Neural Copilot` VS Code e
 ## Troubleshooting

 - **Model Download Issues:** Check `HUGGINGFACEHUB_API_TOKEN`. Ensure internet connectivity or correct proxy settings. Check logs of `tgi-service`/`vllm-service` and `tei-embedding-server`. Gated models need prior Hugging Face access.
- **Connection Errors:** Verify `host_ip` is correct and accessible. Check `docker ps` for port mappings. Ensure `no_proxy` includes `host_ip` if using a proxy. Check logs of the service failing to connect (e.g., `codegen-backend-server` logs if it can't reach `codegen-llm-server`).
+- **Connection Errors:** Verify `HOST_IP` is correct and accessible. Check `docker ps` for port mappings. Ensure `no_proxy` includes `HOST_IP` if using a proxy. Check logs of the service failing to connect (e.g., `codegen-backend-server` logs if it can't reach `codegen-llm-server`).
 - **"Container name is in use"**: Stop existing containers (`docker compose down`) or change `container_name` in `compose.yaml`.
 - **Resource Issues:** CodeGen models can be memory-intensive. Monitor host RAM usage. Increase Docker resources if needed.

--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -17,7 +17,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      host_ip: ${host_ip}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
@@ -39,7 +39,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      host_ip: ${host_ip}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
@@ -56,7 +56,7 @@ services:
      https_proxy: ${https_proxy}
      LLM_ENDPOINT: ${LLM_ENDPOINT}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    restart: unless-stopped
  llm-tgi-service:
    extends: llm-base
@@ -140,7 +140,7 @@ services:
      REDIS_URL: ${REDIS_URL}
      REDIS_HOST: ${host_ip}
      INDEX_NAME: ${INDEX_NAME}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: true
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
@@ -162,7 +162,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      host_ip: ${host_ip}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:80/health"]
      interval: 10s
@@ -202,7 +202,7 @@ services:
      REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT}
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
    restart: unless-stopped
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml
@@ -59,7 +59,7 @@ services:
      REDIS_URL: ${REDIS_URL}
      REDIS_HOST: ${host_ip}
      INDEX_NAME: ${INDEX_NAME}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: true
    restart: unless-stopped
  tei-embedding-serving:
@@ -76,7 +76,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      host_ip: ${host_ip}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"]
      interval: 10s
@@ -116,7 +116,7 @@ services:
      REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT}
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
    restart: unless-stopped
--- a/CodeGen/docker_compose/intel/hpu/gaudi/README.md
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/README.md
@@ -6,23 +6,10 @@ This README provides instructions for deploying the CodeGen application using Do

 - [Overview](#overview)
 - [Prerequisites](#prerequisites)
- [Quick Start](#quick-start)
- [Available Deployment Options](#available-deployment-options)
-  - [Default: vLLM-based Deployment (`--profile codegen-gaudi-vllm`)](#default-vllm-based-deployment---profile-codegen-gaudi-vllm)
-  - [TGI-based Deployment (`--profile codegen-gaudi-tgi`)](#tgi-based-deployment---profile-codegen-gaudi-tgi)
- [Configuration Parameters](#configuration-parameters)
-  - [Environment Variables](#environment-variables)
-  - [Compose Profiles](#compose-profiles)
-  - [Docker Compose Gaudi Configuration](#docker-compose-gaudi-configuration)
+- [Quick Start Deployment](#quick-start-deployment)
 - [Building Custom Images (Optional)](#building-custom-images-optional)
 - [Validate Services](#validate-services)
-  - [Check Container Status](#check-container-status)
-  - [Run Validation Script/Commands](#run-validation-scriptcommands)
 - [Accessing the User Interface (UI)](#accessing-the-user-interface-ui)
-  - [Gradio UI (Default)](#gradio-ui-default)
-  - [Svelte UI (Optional)](#svelte-ui-optional)
-  - [React UI (Optional)](#react-ui-optional)
-  - [VS Code Extension (Optional)](#vs-code-extension-optional)
 - [Troubleshooting](#troubleshooting)
 - [Stopping the Application](#stopping-the-application)
 - [Next Steps](#next-steps)
@@ -44,7 +31,7 @@ This guide focuses on running the pre-configured CodeGen service using Docker Co
  cd GenAIExamples/CodeGen/docker_compose/intel/hpu/gaudi
  ```

-## Quick Start
+## Quick Start Deployment

 This uses the default vLLM-based deployment profile (`codegen-gaudi-vllm`).

@@ -53,30 +40,30 @@ This uses the default vLLM-based deployment profile (`codegen-gaudi-vllm`).

    ```bash
    # Replace with your host's external IP address (do not use localhost or 127.0.0.1)
-    export host_ip="your_external_ip_address"
+    export HOST_IP="your_external_ip_address"
    # Replace with your Hugging Face Hub API token
    export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token"

    # Optional: Configure proxy if needed
    # export http_proxy="your_http_proxy"
    # export https_proxy="your_https_proxy"
-    # export no_proxy="localhost,127.0.0.1,${host_ip}" # Add other hosts if necessary
-    source ../../../set_env.sh
+    # export no_proxy="localhost,127.0.0.1,${HOST_IP}" # Add other hosts if necessary
+    source ../../set_env.sh
    ```

    _Note: The compose file might read additional variables from set_env.sh. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._
-    like
-
-    ```
-    export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
-    ```
-
-    can be changed to small model if needed
+    For instance, edit the set_env.sh to change the LLM model

    ```
    export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
    ```

+    can be changed to other model if needed
+
+    ```
+    export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
+    ```
+
 2.  **Start Services (vLLM Profile):**

    ```bash
@@ -105,7 +92,7 @@ The `compose.yaml` file uses Docker Compose profiles to select the LLM serving b
 - **Other Services:** Same CPU-based services as the vLLM profile.
 - **To Run:**
  ```bash
-  # Ensure environment variables (host_ip, HUGGINGFACEHUB_API_TOKEN) are set
+  # Ensure environment variables (HOST_IP, HUGGINGFACEHUB_API_TOKEN) are set
  docker compose --profile codegen-gaudi-tgi up -d
  ```

@@ -115,18 +102,18 @@ The `compose.yaml` file uses Docker Compose profiles to select the LLM serving b

 Key parameters are configured via environment variables set before running `docker compose up`.

-| Environment Variable                    | Description                                                                                                         | Default (Set Externally)                                                                         |
-| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------------------------------- |
-| `host_ip`                               | External IP address of the host machine. **Required.**                                                              | `your_external_ip_address`                                                                       |
-| `HUGGINGFACEHUB_API_TOKEN`              | Your Hugging Face Hub token for model access. **Required.**                                                         | `your_huggingface_token`                                                                         |
-| `LLM_MODEL_ID`                          | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-32B-Instruct`                                                                |
-| `EMBEDDING_MODEL_ID`                    | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment.  | `BAAI/bge-base-en-v1.5`                                                                          |
-| `LLM_ENDPOINT`                          | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`.             | `http://codegen-tgi-server:80/generate` or `http://codegen-vllm-server:8000/v1/chat/completions` |
-| `TEI_EMBEDDING_ENDPOINT`                | Internal URL for the Embedding service. Configured in `compose.yaml`.                                               | `http://codegen-tei-embedding-server:80/embed`                                                   |
-| `DATAPREP_ENDPOINT`                     | Internal URL for the Data Preparation service. Configured in `compose.yaml`.                                        | `http://codegen-dataprep-server:80/dataprep`                                                     |
-| `BACKEND_SERVICE_ENDPOINT`              | External URL for the CodeGen Gateway (MegaService). Derived from `host_ip` and port `7778`.                         | `http://${host_ip}:7778/v1/codegen`                                                              |
-| `*_PORT` (Internal)                     | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`.                                           | N/A                                                                                              |
-| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required).                                                                               | `""`                                                                                             |
+| Environment Variable                    | Description                                                                                                         | Default (Set Externally)                       |
+| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :--------------------------------------------- | ------------------------------------ |
+| `HOST_IP`                               | External IP address of the host machine. **Required.**                                                              | `your_external_ip_address`                     |
+| `HUGGINGFACEHUB_API_TOKEN`              | Your Hugging Face Hub token for model access. **Required.**                                                         | `your_huggingface_token`                       |
+| `LLM_MODEL_ID`                          | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct`               |
+| `EMBEDDING_MODEL_ID`                    | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment.  | `BAAI/bge-base-en-v1.5`                        |
+| `LLM_ENDPOINT`                          | Internal URL for the LLM serving endpoint (used by `llm-codegen-vllm-server`). Configured in `compose.yaml`.        | http://codegen-vllm                            | tgi-server:9000/v1/chat/completions` |
+| `TEI_EMBEDDING_ENDPOINT`                | Internal URL for the Embedding service. Configured in `compose.yaml`.                                               | `http://codegen-tei-embedding-server:80/embed` |
+| `DATAPREP_ENDPOINT`                     | Internal URL for the Data Preparation service. Configured in `compose.yaml`.                                        | `http://codegen-dataprep-server:80/dataprep`   |
+| `BACKEND_SERVICE_ENDPOINT`              | External URL for the CodeGen Gateway (MegaService). Derived from `HOST_IP` and port `7778`.                         | `http://${HOST_IP}:7778/v1/codegen`            |
+| `*_PORT` (Internal)                     | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`.                                           | N/A                                            |
+| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required).                                                                               | `""`                                           |

 Most of these parameters are in `set_env.sh`, you can either modify this file or overwrite the env variables by setting them.

@@ -181,21 +168,21 @@ Check logs: `docker compose logs <service_name>`. Pay attention to `vllm-gaudi-s

 ### Run Validation Script/Commands

-Use `curl` commands targeting the main service endpoints. Ensure `host_ip` is correctly set.
+Use `curl` commands targeting the main service endpoints. Ensure `HOST_IP` is correctly set.

 1.  **Validate LLM Serving Endpoint (Example for vLLM on default port 9000 internally, exposed differently):**

    ```bash
    # This command structure targets the OpenAI-compatible vLLM endpoint
-    curl http://${host_ip}:9000/v1/chat/completions \
+    curl http://${HOST_IP}:9000/v1/chat/completions \
       -X POST \
       -H 'Content-Type: application/json' \
-       -d '{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}'
+       -d '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}'
    ```

 2.  **Validate CodeGen Gateway (MegaService, default host port 7778):**
    ```bash
-    curl http://${host_ip}:7778/v1/codegen \
+    curl http://${HOST_IP}:7778/v1/codegen \
      -H "Content-Type: application/json" \
      -d '{"messages": "Implement a sorting algorithm in Python."}'
    ```
@@ -208,7 +195,7 @@ UI options are similar to the Xeon deployment.
 ### Gradio UI (Default)

 Access the default Gradio UI:
-`http://{host_ip}:5173`
+`http://{HOST_IP}:5173`
 _(Port `5173` is the default host mapping)_

 ![Gradio UI](../../../../assets/img/codegen_gradio_ui_main.png)
@@ -217,17 +204,17 @@ _(Port `5173` is the default host mapping)_

 1.  Modify `compose.yaml`: Swap Gradio service for Svelte (`codegen-gaudi-ui-server`), check port map (e.g., `5173:5173`).
 2.  Restart: `docker compose --profile <profile_name> up -d`
-3.  Access: `http://{host_ip}:5173`
+3.  Access: `http://{HOST_IP}:5173`

 ### React UI (Optional)

 1.  Modify `compose.yaml`: Swap Gradio service for React (`codegen-gaudi-react-ui-server`), check port map (e.g., `5174:80`).
 2.  Restart: `docker compose --profile <profile_name> up -d`
-3.  Access: `http://{host_ip}:5174`
+3.  Access: `http://{HOST_IP}:5174`

 ### VS Code Extension (Optional)

-Use the `Neural Copilot` extension configured with the CodeGen backend URL: `http://${host_ip}:7778/v1/codegen`. (See Xeon README for detailed setup screenshots).
+Use the `Neural Copilot` extension configured with the CodeGen backend URL: `http://${HOST_IP}:7778/v1/codegen`. (See Xeon README for detailed setup screenshots).

 ## Troubleshooting

@@ -237,7 +224,7 @@ Use the `Neural Copilot` extension configured with the CodeGen backend URL: `htt
  - Verify `runtime: habana` and volume mounts in `compose.yaml`.
  - Gaudi initialization can take significant time and memory. Monitor resource usage.
 - **Model Download Issues:** Check `HUGGINGFACEHUB_API_TOKEN`, internet access, proxy settings. Check LLM service logs.
- **Connection Errors:** Verify `host_ip`, ports, and proxy settings. Use `docker ps` and check service logs.
+- **Connection Errors:** Verify `HOST_IP`, ports, and proxy settings. Use `docker ps` and check service logs.

 ## Stopping the Application

--- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -17,7 +17,7 @@ services:
      https_proxy: ${https_proxy}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
      ENABLE_HPU_GRAPH: true
      LIMIT_HPU_GRAPH: true
      USE_FLASH_ATTENTION: true
@@ -46,7 +46,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
@@ -71,7 +71,7 @@ services:
      https_proxy: ${https_proxy}
      LLM_ENDPOINT: ${LLM_ENDPOINT}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    restart: unless-stopped
  llm-tgi-service:
    extends: llm-base
@@ -156,7 +156,7 @@ services:
      REDIS_URL: ${REDIS_URL}
      REDIS_HOST: ${host_ip}
      INDEX_NAME: ${INDEX_NAME}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: true
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
@@ -178,7 +178,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      host_ip: ${host_ip}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:80/health"]
      interval: 10s
@@ -218,7 +218,7 @@ services:
      REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT}
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
    restart: unless-stopped
--- a/CodeGen/docker_compose/intel/set_env.sh
+++ b/CodeGen/docker_compose/intel/set_env.sh
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+pushd "../../" > /dev/null
+source .set_env.sh
+popd > /dev/null
+
+export HOST_IP=$(hostname -I | awk '{print $1}')
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+if [ -z "${HF_TOKEN}" ]; then
+    echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN"
+fi
+
+if [ -z "${HOST_IP}" ]; then
+    echo "Error: HOST_IP is not set. Please set HOST_IP first."
+fi
+
+export no_proxy=${no_proxy},${HOST_IP}
+export http_proxy=${http_proxy}
+export https_proxy=${https_proxy}
+
+export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
+export LLM_SERVICE_PORT=9000
+export LLM_ENDPOINT="http://${HOST_IP}:8028"
+export LLM_SERVICE_HOST_IP=${HOST_IP}
+export TGI_LLM_ENDPOINT="http://${HOST_IP}:8028"
+
+export MEGA_SERVICE_PORT=7778
+export MEGA_SERVICE_HOST_IP=${HOST_IP}
+export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:7778/v1/codegen"
+
+export REDIS_DB_PORT=6379
+export REDIS_INSIGHTS_PORT=8001
+export REDIS_RETRIEVER_PORT=7000
+export REDIS_URL="redis://${HOST_IP}:${REDIS_DB_PORT}"
+export RETRIEVAL_SERVICE_HOST_IP=${HOST_IP}
+export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS"
+export INDEX_NAME="CodeGen"
+
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export EMBEDDER_PORT=6000
+export TEI_EMBEDDER_PORT=8090
+export TEI_EMBEDDING_HOST_IP=${HOST_IP}
+export TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${TEI_EMBEDDER_PORT}"
+
+export DATAPREP_REDIS_PORT=6007
+export DATAPREP_ENDPOINT="http://${HOST_IP}:${DATAPREP_REDIS_PORT}/v1/dataprep"
+export LOGFLAG=false
+export MODEL_CACHE=${model_cache:-"./data"}
+export NUM_CARDS=1
--- a/CodeGen/docker_compose/set_env.sh
+++ b/CodeGen/docker_compose/set_env.sh
@@ -1,50 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-pushd "../../" > /dev/null
-source .set_env.sh
-popd > /dev/null
-
-export host_ip=$(hostname -I | awk '{print $1}')
-if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then
-    echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN"
-fi
-
-if [ -z "${host_ip}" ]; then
-    echo "Error: host_ip is not set. Please set host_ip first."
-fi
-
-export no_proxy=${no_proxy},${host_ip}
-export http_proxy=${http_proxy}
-export https_proxy=${https_proxy}
-
-export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
-export LLM_SERVICE_PORT=9000
-export LLM_ENDPOINT="http://${host_ip}:8028"
-export LLM_SERVICE_HOST_IP=${host_ip}
-export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
-
-export MEGA_SERVICE_PORT=7778
-export MEGA_SERVICE_HOST_IP=${host_ip}
-export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen"
-
-export REDIS_DB_PORT=6379
-export REDIS_INSIGHTS_PORT=8001
-export REDIS_RETRIEVER_PORT=7000
-export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}"
-export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
-export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS"
-export INDEX_NAME="CodeGen"
-
-export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export EMBEDDER_PORT=6000
-export TEI_EMBEDDER_PORT=8090
-export TEI_EMBEDDING_HOST_IP=${host_ip}
-export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
-
-export DATAPREP_REDIS_PORT=6007
-export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep"
-export LOGFLAG=false
-export MODEL_CACHE="./data"
-export NUM_CARDS=1
--- a/CodeGen/tests/README.md
+++ b/CodeGen/tests/README.md
@@ -0,0 +1,33 @@
+# CodeGen E2E test scripts
+
+## Set the required environment variable
+
+```bash
+export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+```
+
+## Run test
+
+On Intel Xeon with TGI:
+
+```bash
+bash test_compose_on_xeon.sh
+```
+
+On Intel Gaudi with TGI:
+
+```bash
+bash test_compose_on_gaudi.sh
+```
+
+On AMD ROCm with TGI:
+
+```bash
+bash test_compose_on_rocm.sh
+```
+
+On AMD ROCm with vLLM:
+
+```bash
+bash test_compose_vllm_on_rocm.sh
+```
--- a/CodeGen/tests/test_compose_on_gaudi.sh
+++ b/CodeGen/tests/test_compose_on_gaudi.sh
@@ -10,21 +10,11 @@ echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
 export REGISTRY=${IMAGE_REPO}
 export TAG=${IMAGE_TAG}
 export MODEL_CACHE=${model_cache:-"./data"}
-export REDIS_DB_PORT=6379
-export REDIS_INSIGHTS_PORT=8001
-export REDIS_RETRIEVER_PORT=7000
-export EMBEDDER_PORT=6000
-export TEI_EMBEDDER_PORT=8090
-export DATAPREP_REDIS_PORT=6007

 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')
-
-export http_proxy=${http_proxy}
-export https_proxy=${https_proxy}
-export no_proxy=${no_proxy},${ip_address}
-
+source $WORKPATH/docker_compose/intel/set_env.sh
 function build_docker_images() {
    opea_branch=${opea_branch:-"main"}

@@ -54,28 +44,6 @@ function start_services() {

    cd $WORKPATH/docker_compose/intel/hpu/gaudi

-    export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
-    export LLM_ENDPOINT="http://${ip_address}:8028"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export MEGA_SERVICE_PORT=7778
-    export MEGA_SERVICE_HOST_IP=${ip_address}
-    export LLM_SERVICE_HOST_IP=${ip_address}
-    export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:${MEGA_SERVICE_PORT}/v1/codegen"
-    export NUM_CARDS=1
-    export host_ip=${ip_address}
-
-    export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}"
-    export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
-    export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS"
-    export INDEX_NAME="CodeGen"
-
-    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-    export TEI_EMBEDDING_HOST_IP=${host_ip}
-    export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
-    export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep"
-
-    export INDEX_NAME="CodeGen"
-
    # Start Docker Containers
    docker compose --profile ${compose_profile} up -d | tee ${LOG_PATH}/start_services_with_compose.log

--- a/CodeGen/tests/test_compose_on_rocm.sh
+++ b/CodeGen/tests/test_compose_on_rocm.sh
@@ -35,18 +35,7 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/amd/gpu/rocm/
-
-    export CODEGEN_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
-    export CODEGEN_TGI_SERVICE_PORT=8028
-    export CODEGEN_TGI_LLM_ENDPOINT="http://${ip_address}:${CODEGEN_TGI_SERVICE_PORT}"
-    export CODEGEN_LLM_SERVICE_PORT=9000
-    export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export CODEGEN_MEGA_SERVICE_HOST_IP=${ip_address}
-    export CODEGEN_LLM_SERVICE_HOST_IP=${ip_address}
-    export CODEGEN_BACKEND_SERVICE_PORT=7778
-    export CODEGEN_BACKEND_SERVICE_URL="http://${ip_address}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
-    export CODEGEN_UI_SERVICE_PORT=5173
-    export HOST_IP=${ip_address}
+    source set_env.sh

    sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

--- a/CodeGen/tests/test_compose_on_xeon.sh
+++ b/CodeGen/tests/test_compose_on_xeon.sh
@@ -10,20 +10,11 @@ echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
 export REGISTRY=${IMAGE_REPO}
 export TAG=${IMAGE_TAG}
 export MODEL_CACHE=${model_cache:-"./data"}
-export REDIS_DB_PORT=6379
-export REDIS_INSIGHTS_PORT=8001
-export REDIS_RETRIEVER_PORT=7000
-export EMBEDDER_PORT=6000
-export TEI_EMBEDDER_PORT=8090
-export DATAPREP_REDIS_PORT=6007

 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')
-
-export http_proxy=${http_proxy}
-export https_proxy=${https_proxy}
-export no_proxy=${no_proxy},${ip_address}
+source $WORKPATH/docker_compose/intel/set_env.sh

 function build_docker_images() {
    opea_branch=${opea_branch:-"main"}
@@ -56,25 +47,6 @@ function start_services() {

    cd $WORKPATH/docker_compose/intel/cpu/xeon/

-    export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
-    export LLM_ENDPOINT="http://${ip_address}:8028"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export MEGA_SERVICE_PORT=7778
-    export MEGA_SERVICE_HOST_IP=${ip_address}
-    export LLM_SERVICE_HOST_IP=${ip_address}
-    export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:${MEGA_SERVICE_PORT}/v1/codegen"
-    export host_ip=${ip_address}
-
-    export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}"
-    export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
-    export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS"
-    export INDEX_NAME="CodeGen"
-
-    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-    export TEI_EMBEDDING_HOST_IP=${host_ip}
-    export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
-    export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep"
-
    # Start Docker Containers
    docker compose --profile ${compose_profile} up -d > ${LOG_PATH}/start_services_with_compose.log

--- a/CodeGen/tests/test_compose_vllm_on_rocm.sh
+++ b/CodeGen/tests/test_compose_vllm_on_rocm.sh
@@ -34,18 +34,7 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/amd/gpu/rocm/
-
-    export CODEGEN_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
-    export CODEGEN_VLLM_SERVICE_PORT=8028
-    export CODEGEN_VLLM_ENDPOINT="http://${ip_address}:${CODEGEN_VLLM_SERVICE_PORT}"
-    export CODEGEN_LLM_SERVICE_PORT=9000
-    export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export CODEGEN_MEGA_SERVICE_HOST_IP=${ip_address}
-    export CODEGEN_LLM_SERVICE_HOST_IP=${ip_address}
-    export CODEGEN_BACKEND_SERVICE_PORT=7778
-    export CODEGEN_BACKEND_SERVICE_URL="http://${ip_address}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
-    export CODEGEN_UI_SERVICE_PORT=5173
-    export HOST_IP=${ip_address}
+    source set_env_vllm.sh

    sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

--- a/CodeTrans/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/CodeTrans/docker_compose/amd/gpu/rocm/set_env.sh
@@ -21,7 +21,7 @@ export CODETRANS_TGI_SERVICE_PORT=8008
 export CODETRANS_TGI_LLM_ENDPOINT="http://${HOST_IP}:${CODETRANS_TGI_SERVICE_PORT}"

 ### A token for accessing repositories with models
-export CODETRANS_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export CODETRANS_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}

 ### The port of the LLM service. On this port, the LLM service will accept connections
 export CODETRANS_LLM_SERVICE_PORT=9000
--- a/CodeTrans/docker_compose/amd/gpu/rocm/set_env_vllm.sh
+++ b/CodeTrans/docker_compose/amd/gpu/rocm/set_env_vllm.sh
@@ -21,7 +21,7 @@ export CODETRANS_VLLM_SERVICE_PORT=8008
 export CODETRANS_LLM_ENDPOINT="http://${HOST_IP}:${CODETRANS_VLLM_SERVICE_PORT}"

 ### A token for accessing repositories with models
-export CODETRANS_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export CODETRANS_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}

 ### The port of the LLM service. On this port, the LLM service will accept connections
 export CODETRANS_LLM_SERVICE_PORT=9000
--- a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
@@ -14,7 +14,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
    healthcheck:
@@ -39,7 +39,7 @@ services:
      LLM_ENDPOINT: ${LLM_ENDPOINT}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
    restart: unless-stopped
  codetrans-xeon-backend-server:
    image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}
--- a/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi.yaml
+++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi.yaml
@@ -14,7 +14,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      host_ip: ${host_ip}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"]
@@ -38,7 +38,7 @@ services:
      LLM_ENDPOINT: ${LLM_ENDPOINT}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    restart: unless-stopped
  codetrans-xeon-backend-server:
    image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}
--- a/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -13,7 +13,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      LLM_MODEL_ID: ${LLM_MODEL_ID}
@@ -45,7 +45,7 @@ services:
      LLM_ENDPOINT: ${LLM_ENDPOINT}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
    restart: unless-stopped
  codetrans-gaudi-backend-server:
    image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}
--- a/CodeTrans/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
+++ b/CodeTrans/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
@@ -13,7 +13,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
@@ -42,7 +42,7 @@ services:
      LLM_ENDPOINT: ${LLM_ENDPOINT}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    restart: unless-stopped
  codetrans-gaudi-backend-server:
    image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}
--- a/CodeTrans/tests/test_compose_on_gaudi.sh
+++ b/CodeTrans/tests/test_compose_on_gaudi.sh
@@ -38,7 +38,7 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/intel
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
    export NGINX_PORT=80
    export host_ip=${ip_address}
    source set_env.sh
--- a/CodeTrans/tests/test_compose_on_xeon.sh
+++ b/CodeTrans/tests/test_compose_on_xeon.sh
@@ -40,7 +40,7 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/intel
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}

    export NGINX_PORT=80
    export host_ip=${ip_address}
--- a/CodeTrans/tests/test_compose_tgi_on_gaudi.sh
+++ b/CodeTrans/tests/test_compose_tgi_on_gaudi.sh
@@ -35,7 +35,7 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/intel
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}

    export NGINX_PORT=80
    export host_ip=${ip_address}
--- a/CodeTrans/tests/test_compose_tgi_on_xeon.sh
+++ b/CodeTrans/tests/test_compose_tgi_on_xeon.sh
@@ -35,7 +35,7 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/intel
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}

    export NGINX_PORT=80
    export host_ip=${ip_address}
--- a/DBQnA/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/DBQnA/docker_compose/amd/gpu/rocm/set_env.sh
@@ -9,7 +9,7 @@ source .set_env.sh
 popd > /dev/null

 export host_ip=${ip_address}
-export DBQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export DBQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export DBQNA_TGI_SERVICE_PORT=8008
 export DBQNA_TGI_LLM_ENDPOINT="http://${host_ip}:${DBQNA_TGI_SERVICE_PORT}"
 export DBQNA_LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
--- a/DBQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/DBQnA/docker_compose/intel/cpu/xeon/README.md
@@ -61,7 +61,7 @@ export https_proxy=${https_proxy}

 export TGI_PORT=8008
 export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_PORT}
-export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HF_TOKEN=${HF_TOKEN}
 export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
 export POSTGRES_USER=postgres
 export POSTGRES_PASSWORD=testpwd
@@ -108,7 +108,7 @@ docker run --name test-text2sql-postgres --ipc=host -e POSTGRES_USER=${POSTGRES_

 ```bash

-docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.4.1 --model-id $model
+docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.4.1 --model-id $model
 ```

 - Start Text-to-SQL Service
--- a/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -13,8 +13,8 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    shm_size: 1g
    command: --model-id ${LLM_MODEL_ID}

--- a/DBQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/DBQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -9,8 +9,8 @@ popd > /dev/null

 export host_ip=${ip_address}
 export no_proxy=$no_proxy,$host_ip,dbqna-xeon-react-ui-server,text2sql-service,tgi-service,postgres-container
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HF_TOKEN=${HF_TOKEN}
 export POSTGRES_USER=postgres
 export POSTGRES_PASSWORD=testpwd
 export POSTGRES_DB=chinook
--- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml
@@ -28,7 +28,7 @@ services:
      REDIS_HOST: ${REDIS_HOST}
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
@@ -49,7 +49,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      host_ip: ${host_ip}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
@@ -69,7 +69,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
      LOGFLAG: ${LOGFLAG}
    restart: unless-stopped
@@ -87,7 +87,7 @@ services:
      https_proxy: ${https_proxy}
      REDIS_URL: ${REDIS_URL}
      INDEX_NAME: ${INDEX_NAME}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
@@ -105,7 +105,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      host_ip: ${host_ip}
@@ -129,7 +129,7 @@ services:
      https_proxy: ${https_proxy}
      RERANK_TYPE: ${RERANK_TYPE}
      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      LOGFLAG: ${LOGFLAG}
--- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_milvus.yaml
+++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_milvus.yaml
@@ -76,7 +76,7 @@ services:
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MILVUS"
      MILVUS_HOST: ${MILVUS_HOST}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
@@ -107,7 +107,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      host_ip: ${host_ip}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
@@ -130,7 +130,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
      LOGFLAG: ${LOGFLAG}
    restart: unless-stopped
@@ -148,7 +148,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      MILVUS_HOST: ${host_ip}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_MILVUS"
@@ -167,7 +167,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      host_ip: ${host_ip}
@@ -194,7 +194,7 @@ services:
      https_proxy: ${https_proxy}
      RERANK_TYPE: ${RERANK_TYPE}
      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      LOGFLAG: ${LOGFLAG}
--- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
+++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
@@ -25,7 +25,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME:-rag-redis}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
@@ -46,7 +46,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      host_ip: ${host_ip}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
@@ -66,7 +66,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
      LOGFLAG: ${LOGFLAG}
    restart: unless-stopped
@@ -84,7 +84,7 @@ services:
      https_proxy: ${https_proxy}
      REDIS_URL: redis://redis-vector-db:6379
      INDEX_NAME: ${INDEX_NAME:-rag-redis}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
--- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/set_env.sh
@@ -12,7 +12,7 @@ export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006"
 export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
 export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export MEGA_SERVICE_HOST_IP=${ip_address}
 export EMBEDDING_SERVICE_HOST_IP=${ip_address}
 export RETRIEVER_SERVICE_HOST_IP=${ip_address}
--- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -28,7 +28,7 @@ services:
      REDIS_URL: ${REDIS_URL}
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
      interval: 10s
@@ -76,7 +76,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
    restart: unless-stopped
  retriever:
@@ -96,7 +96,7 @@ services:
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    restart: unless-stopped
  tei-reranking-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
@@ -111,7 +111,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      host_ip: ${host_ip}
@@ -135,7 +135,7 @@ services:
      https_proxy: ${https_proxy}
      RERANK_TYPE: ${RERANK_TYPE}
      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      LOGFLAG: ${LOGFLAG}
--- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose_milvus.yaml
+++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose_milvus.yaml
@@ -76,7 +76,7 @@ services:
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MILVUS"
      MILVUS_HOST: ${MILVUS_HOST}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      LOGFLAG: ${LOGFLAG}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
@@ -136,7 +136,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
      LOGFLAG: ${LOGFLAG}
    restart: unless-stopped
@@ -154,7 +154,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      MILVUS_HOST: ${host_ip}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_MILVUS"
@@ -173,7 +173,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      host_ip: ${host_ip}
@@ -200,7 +200,7 @@ services:
      https_proxy: ${https_proxy}
      RERANK_TYPE: ${RERANK_TYPE}
      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      LOGFLAG: ${LOGFLAG}
--- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -11,7 +11,7 @@ export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:8090"
 export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export MEGA_SERVICE_HOST_IP=${ip_address}
 export EMBEDDING_SERVICE_HOST_IP=${ip_address}
 export RETRIEVER_SERVICE_HOST_IP=${ip_address}
--- a/Show More
+++ b/Show More