Provide unified scalable deployment and benchmarking support for exam… (#1315 )

Signed-off-by: Cathy Zhang <cathy.zhang@intel.com> Signed-off-by: letonghan <letong.han@intel.com> Co-authored-by: letonghan <letong.han@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit ed163087ba)
Freeze OPEA images tag
2025-01-24 22:55:38 +08:00 · 2025-01-24 08:31:22 +00:00
115 changed files with 190 additions and 25866 deletions
--- a/.github/workflows/_helm-e2e.yml
+++ b/.github/workflows/_helm-e2e.yml
@@ -97,7 +97,6 @@ jobs:

  helm-test:
    needs: [get-test-case]
-    if: ${{ fromJSON(needs.get-test-case.outputs.value_files).length != 0 }}
    strategy:
      matrix:
        value_file: ${{ fromJSON(needs.get-test-case.outputs.value_files) }}
--- a/.github/workflows/_run-docker-compose.yml
+++ b/.github/workflows/_run-docker-compose.yml
@@ -91,7 +91,6 @@ jobs:

  compose-test:
    needs: [get-test-case]
-    if: ${{ fromJSON(needs.get-test-case.outputs.test_cases).length != 0 || needs.get-test-case.outputs.test_cases != '' }}
    strategy:
      matrix:
        test_case: ${{ fromJSON(needs.get-test-case.outputs.test_cases) }}
--- a/.github/workflows/manual-docker-publish.yml
+++ b/.github/workflows/manual-docker-publish.yml
@@ -41,11 +41,9 @@ jobs:

  publish:
    needs: [get-image-list]
-    if: ${{ needs.get-image-list.outputs.matrix != '' }}
    strategy:
      matrix:
        image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
-      fail-fast: false
    runs-on: "docker-build-${{ inputs.node }}"
    steps:
      - uses: docker/login-action@v3.2.0
--- a/.github/workflows/manual-docker-scan.yml
+++ b/.github/workflows/manual-docker-scan.yml
@@ -47,7 +47,6 @@ jobs:
  scan-docker:
    needs: get-image-list
    runs-on: "docker-build-${{ inputs.node }}"
-    if: ${{ fromJSON(needs.get-image-list.outputs.matrix).length != 0 }}
    strategy:
      matrix:
        image: ${{ fromJson(needs.get-image-list.outputs.matrix) }}
--- a/.github/workflows/manual-example-workflow.yml
+++ b/.github/workflows/manual-example-workflow.yml
@@ -76,7 +76,7 @@ jobs:

  build-deploy-gmc:
    needs: [get-test-matrix]
-    if: ${{ fromJSON(inputs.deploy_gmc) }} && ${{ fromJSON(needs.get-test-matrix.outputs.nodes).length != 0 }}
+    if: ${{ fromJSON(inputs.deploy_gmc) }}
    strategy:
      matrix:
        node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
@@ -90,7 +90,7 @@ jobs:

  run-examples:
    needs: [get-test-matrix, build-deploy-gmc]
-    if: always() && ${{ fromJSON(needs.get-test-matrix.outputs.examples).length != 0 }}
+    if: always()
    strategy:
      matrix:
        example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }}
--- a/.github/workflows/manual-image-build.yml
+++ b/.github/workflows/manual-image-build.yml
@@ -51,7 +51,6 @@ jobs:

  image-build:
    needs: get-test-matrix
-    if: ${{ fromJSON(needs.get-test-matrix.outputs.nodes).length != 0 }}
    strategy:
      matrix:
        node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
--- a/.github/workflows/manual-reset-local-registry.yml
+++ b/.github/workflows/manual-reset-local-registry.yml
@@ -33,7 +33,6 @@ jobs:

  clean-up:
    needs: get-build-matrix
-    if: ${{ fromJSON(needs.get-build-matrix.outputs.nodes).length != 0 }}
    strategy:
      matrix:
        node: ${{ fromJson(needs.get-build-matrix.outputs.nodes) }}
@@ -48,7 +47,6 @@ jobs:

  build:
    needs: [get-build-matrix, clean-up]
-    if: ${{ fromJSON(needs.get-build-matrix.outputs.nodes).length != 0 }}
    strategy:
      matrix:
        example: ${{ fromJson(needs.get-build-matrix.outputs.examples) }}
--- a/.github/workflows/nightly-docker-build-publish.yml
+++ b/.github/workflows/nightly-docker-build-publish.yml
@@ -34,7 +34,6 @@ jobs:

  build-and-test:
    needs: get-build-matrix
-    if: ${{ needs.get-build-matrix.outputs.examples_json != '' }}
    strategy:
      matrix:
        example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
@@ -54,11 +53,9 @@ jobs:

  publish:
    needs: [get-build-matrix, get-image-list, build-and-test]
-    if: ${{ needs.get-image-list.outputs.matrix != '' }}
    strategy:
      matrix:
        image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
-      fail-fast: false
    runs-on: "docker-build-gaudi"
    steps:
      - uses: docker/login-action@v3.2.0
--- a/.github/workflows/pr-chart-e2e.yml
+++ b/.github/workflows/pr-chart-e2e.yml
@@ -65,7 +65,7 @@ jobs:

  helm-chart-test:
    needs: [job1]
-    if: always() && ${{ fromJSON(needs.job1.outputs.run_matrix).length != 0 }}
+    if: always() && ${{ needs.job1.outputs.run_matrix.example.length > 0 }}
    uses: ./.github/workflows/_helm-e2e.yml
    strategy:
      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
--- a/.github/workflows/pr-docker-compose-e2e.yml
+++ b/.github/workflows/pr-docker-compose-e2e.yml
@@ -28,14 +28,14 @@ jobs:
    if: ${{ !github.event.pull_request.draft }}
    uses: ./.github/workflows/_get-test-matrix.yml
    with:
-      diff_excluded_files: '\.md|\.txt|kubernetes|gmc|assets|benchmark' #\.github|
+      diff_excluded_files: '\.github|\.md|\.txt|kubernetes|gmc|assets|benchmark'

  example-test:
    needs: [get-test-matrix]
-    if: ${{ fromJSON(needs.get-test-matrix.outputs.run_matrix).length != 0 }}
    strategy:
      matrix: ${{ fromJSON(needs.get-test-matrix.outputs.run_matrix) }}
      fail-fast: false
+    if: ${{ !github.event.pull_request.draft }}
    uses: ./.github/workflows/_run-docker-compose.yml
    with:
      registry: "opea"
--- a/.github/workflows/push-image-build.yml
+++ b/.github/workflows/push-image-build.yml
@@ -24,7 +24,6 @@ jobs:

  image-build:
    needs: job1
-    if: ${{ fromJSON(needs.job1.outputs.run_matrix).length != 0 }}
    strategy:
      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
      fail-fast: false
--- a/AgentQnA/ui/svelte/README.md
+++ b/AgentQnA/ui/svelte/README.md
@@ -18,7 +18,7 @@ Here're some of the project's features:
 2. cd command to the current folder.

   ```
-   cd AgentQnA/ui/svelte
+   cd AgentQnA/ui
   ```

 3. Modify the required .env variables.
@@ -41,7 +41,7 @@ Here're some of the project's features:
  npm run dev
  ```

- The application will be available at `http://localhost:5173`.
+- The application will be available at `http://localhost:3000`.

 5. **For Docker Setup:**

@@ -54,7 +54,7 @@ Here're some of the project's features:
 - Run the Docker container:

  ```
-  docker run -d -p 5173:5173 --name agent-ui opea:agent-ui
+  docker run -d -p 3000:3000 --name agent-ui opea:agent-ui
  ```

- The application will be available at `http://localhost:5173`.
+- The application will be available at `http://localhost:3000`.
--- a/AudioQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/README.md
@@ -49,7 +49,7 @@ Before starting the services with `docker compose`, you have to recheck the foll

 ```bash
 export host_ip=<your External Public IP>    # export host_ip=$(hostname -I | awk '{print $1}')
-export HF_TOKEN=<your HF token>
+export HUGGINGFACEHUB_API_TOKEN=<your HF token>

 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3

--- a/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -5,11 +5,7 @@

 # export host_ip=<your External Public IP>
 export host_ip=$(hostname -I | awk '{print $1}')
-
-if [ -z "$HF_TOKEN" ]; then
-    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
-    return -1
-fi
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 # <token>

 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -49,7 +49,7 @@ Before starting the services with `docker compose`, you have to recheck the foll

 ```bash
 export host_ip=<your External Public IP>    # export host_ip=$(hostname -I | awk '{print $1}')
-export HF_TOKEN=<your HF token>
+export HUGGINGFACEHUB_API_TOKEN=<your HF token>

 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3

--- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -45,8 +45,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -5,13 +5,7 @@

 # export host_ip=<your External Public IP>
 export host_ip=$(hostname -I | awk '{print $1}')
-
-if [ -z "$HF_TOKEN" ]; then
-    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
-    return -1
-fi
-
-export HF_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 # <token>

 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
--- a/AvatarChatbot/docker_compose/amd/gpu/rocm/README.md
+++ b/AvatarChatbot/docker_compose/amd/gpu/rocm/README.md
@@ -1,209 +0,0 @@
-# Build Mega Service of AvatarChatbot on AMD GPU
-
-This document outlines the deployment process for a AvatarChatbot application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server.
-
-## 🚀 Build Docker images
-
-### 1. Source Code install GenAIComps
-
-```bash
-git clone https://github.com/opea-project/GenAIComps.git
-cd GenAIComps
-```
-
-### 2. Build ASR Image
-
-```bash
-docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
-
-
-docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile .
-```
-
-### 3. Build LLM Image
-
-```bash
-docker build --no-cache -t opea/llm-textgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile .
-```
-
-### 4. Build TTS Image
-
-```bash
-docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile .
-
-docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/Dockerfile .
-```
-
-### 5. Build Animation Image
-
-```bash
-docker build -t opea/wav2lip:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/wav2lip/src/Dockerfile .
-
-docker build -t opea/animation:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/src/Dockerfile .
-```
-
-### 6. Build MegaService Docker Image
-
-To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
-
-```bash
-git clone https://github.com/opea-project/GenAIExamples.git
-cd GenAIExamples/AvatarChatbot/
-docker build --no-cache -t opea/avatarchatbot:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
-```
-
-Then run the command `docker images`, you will have following images ready:
-
-1. `opea/whisper:latest`
-2. `opea/asr:latest`
-3. `opea/llm-tgi:latest`
-4. `opea/speecht5:latest`
-5. `opea/tts:latest`
-6. `opea/wav2lip:latest`
-7. `opea/animation:latest`
-8. `opea/avatarchatbot:latest`
-
-## 🚀 Set the environment variables
-
-Before starting the services with `docker compose`, you have to recheck the following environment variables.
-
-```bash
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-export host_ip=$(hostname -I | awk '{print $1}')
-
-export TGI_SERVICE_PORT=3006
-export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_SERVICE_PORT}
-export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-
-export ASR_ENDPOINT=http://${host_ip}:7066
-export TTS_ENDPOINT=http://${host_ip}:7055
-export WAV2LIP_ENDPOINT=http://${host_ip}:7860
-
-export MEGA_SERVICE_HOST_IP=${host_ip}
-export ASR_SERVICE_HOST_IP=${host_ip}
-export TTS_SERVICE_HOST_IP=${host_ip}
-export LLM_SERVICE_HOST_IP=${host_ip}
-export ANIMATION_SERVICE_HOST_IP=${host_ip}
-
-export MEGA_SERVICE_PORT=8888
-export ASR_SERVICE_PORT=3001
-export TTS_SERVICE_PORT=3002
-export LLM_SERVICE_PORT=3007
-export ANIMATION_SERVICE_PORT=3008
-
-export DEVICE="cpu"
-export WAV2LIP_PORT=7860
-export INFERENCE_MODE='wav2lip+gfpgan'
-export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
-export FACE="assets/img/avatar5.png"
-# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
-export AUDIO='None'
-export FACESIZE=96
-export OUTFILE="/outputs/result.mp4"
-export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
-export UPSCALE_FACTOR=1
-export FPS=10
-```
-
-Warning!!! - The Wav2lip service works in this solution using only the CPU. To use AMD GPUs and achieve operational performance, the Wav2lip image needs to be modified to adapt to AMD hardware and the ROCm framework.
-
-## 🚀 Start the MegaService
-
-```bash
-cd GenAIExamples/AvatarChatbot/docker_compose/intel/cpu/xeon/
-docker compose -f compose.yaml up -d
-```
-
-## 🚀 Test MicroServices
-
-```bash
-# whisper service
-curl http://${host_ip}:7066/v1/asr \
-  -X POST \
-  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-  -H 'Content-Type: application/json'
-
-# asr microservice
-curl http://${host_ip}:3001/v1/audio/transcriptions \
-  -X POST \
-  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-  -H 'Content-Type: application/json'
-
-# tgi service
-curl http://${host_ip}:3006/generate \
-  -X POST \
-  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-  -H 'Content-Type: application/json'
-
-# llm microservice
-curl http://${host_ip}:3007/v1/chat/completions\
-  -X POST \
-  -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
-  -H 'Content-Type: application/json'
-
-# speecht5 service
-curl http://${host_ip}:7055/v1/tts \
-  -X POST \
-  -d '{"text": "Who are you?"}' \
-  -H 'Content-Type: application/json'
-
-# tts microservice
-curl http://${host_ip}:3002/v1/audio/speech \
-  -X POST \
-  -d '{"text": "Who are you?"}' \
-  -H 'Content-Type: application/json'
-
-# wav2lip service
-cd ../../../..
-curl http://${host_ip}:7860/v1/wav2lip \
-  -X POST \
-  -d @assets/audio/sample_minecraft.json \
-  -H 'Content-Type: application/json'
-
-# animation microservice
-curl http://${host_ip}:3008/v1/animation \
-  -X POST \
-  -d @assets/audio/sample_question.json \
-  -H "Content-Type: application/json"
-
-```
-
-## 🚀 Test MegaService
-
-```bash
-curl http://${host_ip}:3009/v1/avatarchatbot \
-  -X POST \
-  -d @assets/audio/sample_whoareyou.json \
-  -H 'Content-Type: application/json'
-```
-
-If the megaservice is running properly, you should see the following output:
-
-```bash
-"/outputs/result.mp4"
-```
-
-The output file will be saved in the current working directory, as `${PWD}` is mapped to `/outputs` inside the wav2lip-service Docker container.
-
-## Gradio UI
-
-```bash
-cd $WORKPATH/GenAIExamples/AvatarChatbot
-python3 ui/gradio/app_gradio_demo_avatarchatbot.py
-```
-
-The UI can be viewed at http://${host_ip}:7861  
-<img src="../../../../assets/img/UI.png" alt="UI Example" width="60%">  
-In the current version v1.0, you need to set the avatar figure image/video and the DL model choice in the environment variables before starting AvatarChatbot backend service and running the UI. Please just customize the audio question in the UI.  
-\*\* We will enable change of avatar figure between runs in v2.0
-
-## Troubleshooting
-
-```bash
-cd GenAIExamples/AvatarChatbot/tests
-export IMAGE_REPO="opea"
-export IMAGE_TAG="latest"
-export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
-
-test_avatarchatbot_on_xeon.sh
-```
--- a/AvatarChatbot/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/AvatarChatbot/docker_compose/amd/gpu/rocm/compose.yaml
@@ -1,158 +0,0 @@
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-services:
-  whisper-service:
-    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
-    container_name: whisper-service
-    ports:
-      - "7066:7066"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-    restart: unless-stopped
-  asr:
-    image: ${REGISTRY:-opea}/asr:${TAG:-latest}
-    container_name: asr-service
-    ports:
-      - "3001:9099"
-    ipc: host
-    environment:
-      ASR_ENDPOINT: ${ASR_ENDPOINT}
-  speecht5-service:
-    image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
-    container_name: speecht5-service
-    ports:
-      - "7055:7055"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-    restart: unless-stopped
-  tts:
-    image: ${REGISTRY:-opea}/tts:${TAG:-latest}
-    container_name: tts-service
-    ports:
-      - "3002:9088"
-    ipc: host
-    environment:
-      TTS_ENDPOINT: ${TTS_ENDPOINT}
-  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
-    container_name: tgi-service
-    ports:
-      - "${TGI_SERVICE_PORT:-3006}:80"
-    volumes:
-      - "./data:/data"
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-    shm_size: 1g
-    devices:
-      - /dev/kfd:/dev/kfd
-      - /dev/dri/:/dev/dri/
-    cap_add:
-      - SYS_PTRACE
-    group_add:
-      - video
-    security_opt:
-      - seccomp:unconfined
-    ipc: host
-    command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192
-  llm:
-    image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
-    container_name: llm-tgi-server
-    depends_on:
-      - tgi-service
-    ports:
-      - "3007:9000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      OPENAI_API_KEY: ${OPENAI_API_KEY}
-    restart: unless-stopped
-  wav2lip-service:
-    image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
-    container_name: wav2lip-service
-    ports:
-      - "7860:7860"
-    ipc: host
-    volumes:
-      - ${PWD}:/outputs
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      DEVICE: ${DEVICE}
-      INFERENCE_MODE: ${INFERENCE_MODE}
-      CHECKPOINT_PATH: ${CHECKPOINT_PATH}
-      FACE: ${FACE}
-      AUDIO: ${AUDIO}
-      FACESIZE: ${FACESIZE}
-      OUTFILE: ${OUTFILE}
-      GFPGAN_MODEL_VERSION: ${GFPGAN_MODEL_VERSION}
-      UPSCALE_FACTOR: ${UPSCALE_FACTOR}
-      FPS: ${FPS}
-      WAV2LIP_PORT: ${WAV2LIP_PORT}
-    restart: unless-stopped
-  animation:
-    image: ${REGISTRY:-opea}/animation:${TAG:-latest}
-    container_name: animation-server
-    ports:
-      - "3008:9066"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      WAV2LIP_ENDPOINT: ${WAV2LIP_ENDPOINT}
-    restart: unless-stopped
-  avatarchatbot-backend-server:
-    image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
-    container_name: avatarchatbot-backend-server
-    depends_on:
-      - asr
-      - llm
-      - tts
-      - animation
-    ports:
-      - "3009:8888"
-    environment:
-      no_proxy: ${no_proxy}
-      https_proxy: ${https_proxy}
-      http_proxy: ${http_proxy}
-      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
-      MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT}
-      ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP}
-      ASR_SERVICE_PORT: ${ASR_SERVICE_PORT}
-      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP}
-      LLM_SERVICE_PORT: ${LLM_SERVICE_PORT}
-      LLM_SERVER_HOST_IP: ${LLM_SERVICE_HOST_IP}
-      LLM_SERVER_PORT: ${LLM_SERVICE_PORT}
-      TTS_SERVICE_HOST_IP: ${TTS_SERVICE_HOST_IP}
-      TTS_SERVICE_PORT: ${TTS_SERVICE_PORT}
-      ANIMATION_SERVICE_HOST_IP: ${ANIMATION_SERVICE_HOST_IP}
-      ANIMATION_SERVICE_PORT: ${ANIMATION_SERVICE_PORT}
-      WHISPER_SERVER_HOST_IP: ${WHISPER_SERVER_HOST_IP}
-      WHISPER_SERVER_PORT: ${WHISPER_SERVER_PORT}
-      SPEECHT5_SERVER_HOST_IP: ${SPEECHT5_SERVER_HOST_IP}
-      SPEECHT5_SERVER_PORT: ${SPEECHT5_SERVER_PORT}
-    ipc: host
-    restart: always
-
-networks:
-  default:
-    driver: bridge
--- a/AvatarChatbot/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/AvatarChatbot/docker_compose/amd/gpu/rocm/set_env.sh
@@ -1,47 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-export OPENAI_API_KEY=${OPENAI_API_KEY}
-export host_ip=$(hostname -I | awk '{print $1}')
-
-export TGI_SERVICE_PORT=3006
-export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_SERVICE_PORT}
-export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-
-export ASR_ENDPOINT=http://${host_ip}:7066
-export TTS_ENDPOINT=http://${host_ip}:7055
-export WAV2LIP_ENDPOINT=http://${host_ip}:7860
-
-export WHISPER_SERVER_HOST_IP=${host_ip}
-export WHISPER_SERVER_PORT=7066
-
-export SPEECHT5_SERVER_HOST_IP=${host_ip}
-export SPEECHT5_SERVER_PORT=7055
-
-export MEGA_SERVICE_HOST_IP=${host_ip}
-export ASR_SERVICE_HOST_IP=${host_ip}
-export TTS_SERVICE_HOST_IP=${host_ip}
-export LLM_SERVICE_HOST_IP=${host_ip}
-export ANIMATION_SERVICE_HOST_IP=${host_ip}
-
-export MEGA_SERVICE_PORT=8888
-export ASR_SERVICE_PORT=3001
-export TTS_SERVICE_PORT=3002
-export LLM_SERVICE_PORT=3007
-export ANIMATION_SERVICE_PORT=3008
-
-export DEVICE="cpu"
-export WAV2LIP_PORT=7860
-export INFERENCE_MODE='wav2lip+gfpgan'
-export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
-export FACE="/home/user/comps/animation/src/assets/img/avatar5.png"
-# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
-export AUDIO='None'
-export FACESIZE=96
-export OUTFILE="/outputs/result.mp4"
-export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
-export UPSCALE_FACTOR=1
-export FPS=10
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
@@ -58,7 +58,7 @@ Then run the command `docker images`, you will have following images ready:
 Before starting the services with `docker compose`, you have to recheck the following environment variables.

 ```bash
-export HF_TOKEN=<your_hf_token>
+export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
 export host_ip=$(hostname -I | awk '{print $1}')

 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
@@ -173,7 +173,7 @@ In the current version v1.0, you need to set the avatar figure image/video and t
 cd GenAIExamples/AvatarChatbot/tests
 export IMAGE_REPO="opea"
 export IMAGE_TAG="latest"
-export HF_TOKEN=<your_hf_token>
+export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>

 test_avatarchatbot_on_xeon.sh
 ```
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
@@ -37,7 +37,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://${host_ip}:3006/health || exit 1"]
      interval: 10s
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
@@ -58,7 +58,7 @@ Then run the command `docker images`, you will have following images ready:
 Before starting the services with `docker compose`, you have to recheck the following environment variables.

 ```bash
-export HF_TOKEN=<your_hf_token>
+export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
 export host_ip=$(hostname -I | awk '{print $1}')

 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
@@ -183,7 +183,7 @@ In the current version v1.0, you need to set the avatar figure image/video and t
 cd GenAIExamples/AvatarChatbot/tests
 export IMAGE_REPO="opea"
 export IMAGE_TAG="latest"
-export HF_TOKEN=<your_hf_token>
+export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>

 test_avatarchatbot_on_gaudi.sh
 ```
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -38,7 +38,7 @@ services:
      - SYS_NICE
    restart: unless-stopped
  tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
    container_name: tgi-gaudi-server
    ports:
      - "3006:80"
@@ -48,8 +48,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/AvatarChatbot/tests/test_compose_on_rocm.sh
+++ b/AvatarChatbot/tests/test_compose_on_rocm.sh
@@ -1,170 +0,0 @@
-#!/bin/bash
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-set -e
-IMAGE_REPO=${IMAGE_REPO:-"opea"}
-IMAGE_TAG=${IMAGE_TAG:-"latest"}
-echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
-echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
-export REGISTRY=${IMAGE_REPO}
-export TAG=${IMAGE_TAG}
-
-WORKPATH=$(dirname "$PWD")
-LOG_PATH="$WORKPATH/tests"
-if ls $LOG_PATH/*.log 1> /dev/null 2>&1; then
-    rm $LOG_PATH/*.log
-    echo "Log files removed."
-else
-    echo "No log files to remove."
-fi
-ip_address=$(hostname -I | awk '{print $1}')
-
-
-function build_docker_images() {
-    cd $WORKPATH/docker_image_build
-    git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
-
-    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="avatarchatbot whisper asr llm-textgen speecht5 tts wav2lip animation"
-    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
-
-    docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
-
-    docker images && sleep 3s
-}
-
-
-function start_services() {
-    cd $WORKPATH/docker_compose/amd/gpu/rocm
-
-    export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
-    export OPENAI_API_KEY=$OPENAI_API_KEY
-    export host_ip=${ip_address}
-
-    export TGI_SERVICE_PORT=3006
-    export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_SERVICE_PORT}
-    export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-
-    export ASR_ENDPOINT=http://${host_ip}:7066
-    export TTS_ENDPOINT=http://${host_ip}:7055
-    export WAV2LIP_ENDPOINT=http://${host_ip}:7860
-
-    export MEGA_SERVICE_HOST_IP=${host_ip}
-    export ASR_SERVICE_HOST_IP=${host_ip}
-    export TTS_SERVICE_HOST_IP=${host_ip}
-    export LLM_SERVICE_HOST_IP=${host_ip}
-    export ANIMATION_SERVICE_HOST_IP=${host_ip}
-    export WHISPER_SERVER_HOST_IP=${host_ip}
-    export WHISPER_SERVER_PORT=7066
-
-    export SPEECHT5_SERVER_HOST_IP=${host_ip}
-    export SPEECHT5_SERVER_PORT=7055
-
-    export MEGA_SERVICE_PORT=8888
-    export ASR_SERVICE_PORT=3001
-    export TTS_SERVICE_PORT=3002
-    export LLM_SERVICE_PORT=3007
-    export ANIMATION_SERVICE_PORT=3008
-
-    export DEVICE="cpu"
-    export WAV2LIP_PORT=7860
-    export INFERENCE_MODE='wav2lip+gfpgan'
-    export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
-    export FACE="/home/user/comps/animation/src/assets/img/avatar5.png"
-    # export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
-    export AUDIO='None'
-    export FACESIZE=96
-    export OUTFILE="./outputs/result.mp4"
-    export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
-    export UPSCALE_FACTOR=1
-    export FPS=5
-
-    # Start Docker Containers
-    docker compose up -d --force-recreate
-
-    echo "Check tgi-service status"
-
-    n=0
-    until [[ "$n" -ge 100 ]]; do
-       docker logs tgi-service > $LOG_PATH/tgi_service_start.log
-       if grep -q Connected $LOG_PATH/tgi_service_start.log; then
-           break
-       fi
-       sleep 5s
-       n=$((n+1))
-    done
-    echo "tgi-service are up and running"
-    sleep 5s
-
-    echo "Check wav2lip-service status"
-
-    n=0
-    until [[ "$n" -ge 100 ]]; do
-       docker logs wav2lip-service >& $LOG_PATH/wav2lip-service_start.log
-       if grep -q "Application startup complete" $LOG_PATH/wav2lip-service_start.log; then
-           break
-       fi
-       sleep 5s
-       n=$((n+1))
-    done
-    echo "wav2lip-service are up and running"
-    sleep 5s
-}
-
-
-function validate_megaservice() {
-    cd $WORKPATH
-    ls
-    result=$(http_proxy="" curl http://${ip_address}:3009/v1/avatarchatbot -X POST -d @assets/audio/sample_whoareyou.json -H 'Content-Type: application/json')
-    echo "result is === $result"
-    if [[ $result == *"mp4"* ]]; then
-        echo "Result correct."
-    else
-        docker logs whisper-service > $LOG_PATH/whisper-service.log
-        docker logs asr-service > $LOG_PATH/asr-service.log
-        docker logs speecht5-service > $LOG_PATH/speecht5-service.log
-        docker logs tts-service > $LOG_PATH/tts-service.log
-        docker logs tgi-service > $LOG_PATH/tgi-service.log
-        docker logs llm-tgi-server > $LOG_PATH/llm-tgi-server.log
-        docker logs wav2lip-service > $LOG_PATH/wav2lip-service.log
-        docker logs animation-server > $LOG_PATH/animation-server.log
-
-        echo "Result wrong."
-        exit 1
-    fi
-
-}
-
-
-#function validate_frontend() {
-
-#}
-
-
-function stop_docker() {
-    cd $WORKPATH/docker_compose/amd/gpu/rocm
-    docker compose down && docker compose rm -f
-}
-
-
-function main() {
-
-    echo $OPENAI_API_KEY
-    echo $OPENAI_KEY
-
-    stop_docker
-    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
-    start_services
-    # validate_microservices
-    sleep 30
-    validate_megaservice
-    # validate_frontend
-    stop_docker
-
-    echo y | docker system prune
-
-}
-
-
-main
--- a/ChatQnA/README.md
+++ b/ChatQnA/README.md
@@ -357,17 +357,8 @@ Users could also get the external IP via below command.
 ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+'
 ```

-Access the Jaeger dashboard UI at http://{EXTERNAL_IP}:16686
-
 For TGI serving on Gaudi, users could see different services like opea, TEI and TGI.
 ![Screenshot from 2024-12-27 11-58-18](https://github.com/user-attachments/assets/6126fa70-e830-4780-bd3f-83cb6eff064e)

 Here is a screenshot for one tracing of TGI serving request.
 ![Screenshot from 2024-12-27 11-26-25](https://github.com/user-attachments/assets/3a7c51c6-f422-41eb-8e82-c3df52cd48b8)
-
-There are also OPEA related tracings. Users could understand the time breakdown of each service request by looking into each opea:schedule operation.
-![image](https://github.com/user-attachments/assets/6137068b-b374-4ff8-b345-993343c0c25f)
-
-There could be async function such as `llm/MicroService_asyn_generate` and user needs to check the trace of the async function in another operation like
-opea:llm_generate_stream.
-![image](https://github.com/user-attachments/assets/a973d283-198f-4ce2-a7eb-58515b77503e)
--- a/ChatQnA/docker_compose/intel/cpu/aipc/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/README.md
@@ -105,7 +105,7 @@ export https_proxy=${your_http_proxy}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export INDEX_NAME="rag-redis"
-export HF_TOKEN=${your_hf_api_token}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export OLLAMA_HOST=${host_ip}
 export OLLAMA_MODEL="llama3.2"
 ```
@@ -116,7 +116,7 @@ export OLLAMA_MODEL="llama3.2"
 set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
 set RERANK_MODEL_ID=BAAI/bge-reranker-base
 set INDEX_NAME=rag-redis
-set HF_TOKEN=%your_hf_api_token%
+set HUGGINGFACEHUB_API_TOKEN=%your_hf_api_token%
 set OLLAMA_HOST=host.docker.internal
 set OLLAMA_MODEL="llama3.2"
 ```
--- a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
@@ -24,8 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
  tei-embedding-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
    container_name: tei-embedding-server
@@ -55,8 +54,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -72,8 +70,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
--- a/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh
@@ -7,11 +7,15 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-if [ -z "${HF_TOKEN}" ]; then
-    echo "Error: HF_TOKEN is not set. Please set HF_TOKEN."
+if [ -z "${your_hf_api_token}" ]; then
+    echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set your_hf_api_token."
 fi

-export host_ip=$(hostname -I | awk '{print $1}')
+if [ -z "${host_ip}" ]; then
+    echo "Error: host_ip is not set. Please set host_ip first."
+fi
+
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export INDEX_NAME="rag-redis"
--- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
@@ -21,7 +21,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste
   ```bash
   # Example: host_ip="192.168.1.1"
   export host_ip="External_Public_IP"
-   export HF_TOKEN="Your_Huggingface_API_Token"
+   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   ```

 2. If you are in a proxy environment, also set the proxy-related environment variables:
@@ -228,7 +228,7 @@ For users in China who are unable to download models directly from Huggingface,
   ```bash
   # Example: host_ip="192.168.1.1"
   export host_ip="External_Public_IP"
-   export HF_TOKEN="Your_Huggingface_API_Token"
+   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   # Example: NGINX_PORT=80
   export NGINX_PORT=${your_nginx_port}
   ```
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -24,8 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
  tei-embedding-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
    container_name: tei-embedding-server
@@ -55,8 +54,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -72,8 +70,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
--- a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -7,11 +7,6 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-if [ -z "$HF_TOKEN" ]; then
-    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
-    return -1
-fi
-
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -10,7 +10,7 @@ Quick Start:
 2. Run Docker Compose.
 3. Consume the ChatQnA Service.

-Note: The default LLM is `meta-llama/Meta-Llama-3-8B-Instruct`. Before deploying the application, please make sure either you've requested and been granted the access to it on [Huggingface](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) or you've downloaded the model locally from [ModelScope](https://www.modelscope.cn/models). We now support running the latest DeepSeek models, including [deepseek-ai/DeepSeek-R1-Distill-Llama-70B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B) and [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) on Gaudi accelerators. To run `deepseek-ai/DeepSeek-R1-Distill-Llama-70B`, update the `LLM_MODEL_ID` and configure `NUM_CARDS` to 8 in the [set_env.sh](./set_env.sh) script. To run `deepseek-ai/DeepSeek-R1-Distill-Qwen-32B`, update the `LLM_MODEL_ID` and configure `NUM_CARDS` to 4 in the [set_env.sh](./set_env.sh) script.
+Note: The default LLM is `meta-llama/Meta-Llama-3-8B-Instruct`. Before deploying the application, please make sure either you've requested and been granted the access to it on [Huggingface](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) or you've downloaded the model locally from [ModelScope](https://www.modelscope.cn/models).

 ## Quick Start: 1.Setup Environment Variable

@@ -21,7 +21,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste
   ```bash
   # Example: host_ip="192.168.1.1"
   export host_ip="External_Public_IP"
-   export HF_TOKEN="Your_Huggingface_API_Token"
+   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   ```

 2. If you are in a proxy environment, also set the proxy-related environment variables:
@@ -197,9 +197,9 @@ For users in China who are unable to download models directly from Huggingface,
   export HF_ENDPOINT="https://hf-mirror.com"
   model_name="meta-llama/Meta-Llama-3-8B-Instruct"
   # Start vLLM LLM Service
-   docker run -p 8007:80 -v ./data:/data --name vllm-gaudi-server -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_TOKEN=$HF_TOKEN -e VLLM_TORCH_PROFILER_DIR="/mnt" --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model $model_name --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
+   docker run -p 8007:80 -v ./data:/data --name vllm-gaudi-server -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e VLLM_TORCH_PROFILER_DIR="/mnt" --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model $model_name --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
   # Start TGI LLM Service
-   docker run -p 8005:80 -v ./data:/data --name tgi-gaudi-server -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id $model_name --max-input-tokens 1024 --max-total-tokens 2048
+   docker run -p 8005:80 -v ./data:/data --name tgi-gaudi-server -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id $model_name --max-input-tokens 1024 --max-total-tokens 2048
   ```

 2. Offline
@@ -214,9 +214,9 @@ For users in China who are unable to download models directly from Huggingface,
     export HF_TOKEN=${your_hf_token}
     export model_path="/path/to/model"
     # Start vLLM LLM Service
-     docker run -p 8007:80 -v $model_path:/data --name vllm-gaudi-server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_TOKEN=$HF_TOKEN -e VLLM_TORCH_PROFILER_DIR="/mnt" --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model /data --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
+     docker run -p 8007:80 -v $model_path:/data --name vllm-gaudi-server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e VLLM_TORCH_PROFILER_DIR="/mnt" --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model /data --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
     # Start TGI LLM Service
-     docker run -p 8005:80 -v $model_path:/data --name tgi-gaudi-server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id /data --max-input-tokens 1024 --max-total-tokens 2048
+     docker run -p 8005:80 -v $model_path:/data --name tgi-gaudi-server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id /data --max-input-tokens 1024 --max-total-tokens 2048
     ```

 ### Setup Environment Variables
@@ -226,7 +226,7 @@ For users in China who are unable to download models directly from Huggingface,
   ```bash
   # Example: host_ip="192.168.1.1"
   export host_ip="External_Public_IP"
-   export HF_TOKEN="Your_Huggingface_API_Token"
+   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   # Example: NGINX_PORT=80
   export NGINX_PORT=${your_nginx_port}
   ```
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -24,8 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
  tei-embedding-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
    container_name: tei-embedding-gaudi-server
@@ -55,8 +54,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  tei-reranking-service:
    image: ghcr.io/huggingface/tei-gaudi:1.5.0
@@ -90,11 +88,10 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      LLM_MODEL_ID: ${LLM_MODEL_ID}
-      NUM_CARDS: ${NUM_CARDS}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://$host_ip:8007/health || exit 1"]
@@ -105,7 +102,7 @@ services:
    cap_add:
      - SYS_NICE
    ipc: host
-    command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
+    command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
  chatqna-gaudi-backend-server:
    image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
    container_name: chatqna-gaudi-backend-server
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
@@ -133,13 +133,12 @@ services:
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      LLM_MODEL_ID: ${LLM_MODEL_ID}
-      NUM_CARDS: ${NUM_CARDS}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
    runtime: habana
    cap_add:
      - SYS_NICE
    ipc: host
-    command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
+    command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
  chatqna-gaudi-backend-server:
    image: ${REGISTRY:-opea}/chatqna-guardrails:${TAG:-latest}
    container_name: chatqna-gaudi-guardrails-server
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
@@ -101,12 +101,11 @@ services:
      LIMIT_HPU_GRAPH: true
      USE_FLASH_ATTENTION: true
      FLASH_ATTENTION_RECOMPUTE: true
-      NUM_CARDS: ${NUM_CARDS}
    runtime: habana
    cap_add:
      - SYS_NICE
    ipc: host
-    command: --model-id ${LLM_MODEL_ID} --num-shard ${NUM_CARDS} --max-input-length 2048 --max-total-tokens 4096 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
  jaeger:
    image: jaegertracing/all-in-one:latest
    container_name: jaeger
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml
@@ -73,13 +73,12 @@ services:
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      LLM_MODEL_ID: ${LLM_MODEL_ID}
-      NUM_CARDS: ${NUM_CARDS}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
    runtime: habana
    cap_add:
      - SYS_NICE
    ipc: host
-    command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
+    command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
  chatqna-gaudi-backend-server:
    image: ${REGISTRY:-opea}/chatqna-without-rerank:${TAG:-latest}
    container_name: chatqna-gaudi-backend-server
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -6,16 +6,11 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-if [ -z "$HF_TOKEN" ]; then
-    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
-    return -1
-fi

 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
 export INDEX_NAME="rag-redis"
-export NUM_CARDS=1
 # Set it as a non-null string, such as true, if you want to enable logging facility,
 # otherwise, keep it as "" to disable it.
 export LOGFLAG=""
--- a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh
@@ -47,7 +47,6 @@ function start_services() {
    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
-    export NUM_CARDS=1
    export INDEX_NAME="rag-redis"
    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
    export GURADRAILS_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
--- a/ChatQnA/tests/test_compose_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_on_gaudi.sh
@@ -45,7 +45,6 @@ function start_services() {
    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
-    export NUM_CARDS=1
    export INDEX_NAME="rag-redis"
    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
    export host_ip=${ip_address}
--- a/ChatQnA/tests/test_compose_tgi_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_tgi_on_gaudi.sh
@@ -46,7 +46,6 @@ function start_services() {
    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
-    export NUM_CARDS=1
    export INDEX_NAME="rag-redis"
    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
    export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
--- a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh
@@ -45,7 +45,6 @@ function start_services() {
    cd $WORKPATH/docker_compose/intel/hpu/gaudi
    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
-    export NUM_CARDS=1
    export INDEX_NAME="rag-redis"
    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}

--- a/CodeGen/docker_compose/intel/cpu/xeon/README.md
+++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md
@@ -101,7 +101,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
-export HF_TOKEN=${your_hf_api_token}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen"
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -14,7 +14,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      host_ip: ${host_ip}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
@@ -37,8 +37,7 @@ services:
      https_proxy: ${https_proxy}
      LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  codegen-xeon-backend-server:
    image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
--- a/CodeGen/docker_compose/intel/hpu/gaudi/README.md
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/README.md
@@ -87,7 +87,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
-export HF_TOKEN=${your_hf_api_token}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen"
--- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -15,8 +15,7 @@ services:
      https_proxy: ${https_proxy}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      ENABLE_HPU_GRAPH: true
      LIMIT_HPU_GRAPH: true
      USE_FLASH_ATTENTION: true
@@ -46,8 +45,7 @@ services:
      https_proxy: ${https_proxy}
      LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  codegen-gaudi-backend-server:
    image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
--- a/CodeGen/docker_compose/set_env.sh
+++ b/CodeGen/docker_compose/set_env.sh
@@ -6,12 +6,7 @@ pushd "../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-if [ -z "$HF_TOKEN" ]; then
-    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
-    return -1
-fi

-export host_ip=$(hostname -I | awk '{print $1}')
 export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
 export MEGA_SERVICE_HOST_IP=${host_ip}
--- a/CodeTrans/docker_compose/intel/cpu/xeon/README.md
+++ b/CodeTrans/docker_compose/intel/cpu/xeon/README.md
@@ -72,7 +72,7 @@ Change the `LLM_MODEL_ID` below for your needs.
   export host_ip="External_Public_IP"
   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
   export no_proxy="Your_No_Proxy"
-   export HF_TOKEN="Your_Huggingface_API_Token"
+   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   # Example: NGINX_PORT=80
   export NGINX_PORT=${your_nginx_port}
   ```
--- a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
@@ -14,7 +14,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      host_ip: ${host_ip}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"]
@@ -37,8 +37,7 @@ services:
      https_proxy: ${https_proxy}
      LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  codetrans-xeon-backend-server:
    image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}
--- a/CodeTrans/docker_compose/intel/hpu/gaudi/README.md
+++ b/CodeTrans/docker_compose/intel/hpu/gaudi/README.md
@@ -64,7 +64,7 @@ Change the `LLM_MODEL_ID` below for your needs.
   export host_ip="External_Public_IP"
   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
   export no_proxy="Your_No_Proxy"
-   export HF_TOKEN="Your_Huggingface_API_Token"
+   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   # Example: NGINX_PORT=80
   export NGINX_PORT=${your_nginx_port}
   ```
--- a/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
    container_name: codetrans-tgi-service
    ports:
      - "8008:80"
@@ -15,8 +15,7 @@ services:
      https_proxy: ${https_proxy}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      ENABLE_HPU_GRAPH: true
      LIMIT_HPU_GRAPH: true
      USE_FLASH_ATTENTION: true
@@ -46,8 +45,7 @@ services:
      https_proxy: ${https_proxy}
      LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  codetrans-gaudi-backend-server:
    image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}
--- a/CodeTrans/docker_compose/set_env.sh
+++ b/CodeTrans/docker_compose/set_env.sh
@@ -6,10 +6,6 @@ pushd "../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-if [ -z "$HF_TOKEN" ]; then
-    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
-    return -1
-fi

 export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
--- a/DBQnA/docker_compose/amd/gpu/rocm/README.md
+++ b/DBQnA/docker_compose/amd/gpu/rocm/README.md
@@ -1,126 +0,0 @@
-# Deploy on AMD GPU
-
-This document outlines the deployment process for DBQnA application which helps generating a SQL query and its output given a NLP question, utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on an AMD GPU. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices. We will publish the Docker images to Docker Hub soon, which will simplify the deployment process for this service.
-
-## 🚀 Build Docker Images
-
-First of all, you need to build Docker Images locally. This step can be ignored once the Docker images are published to Docker hub.
-
-### 1.1 Build Text to SQL service Image
-
-```bash
-git clone https://github.com/opea-project/GenAIComps.git
-cd GenAIComps
-docker build --no-cache -t opea/texttosql:latest -f comps/text2sql/src/Dockerfile .
-```
-
-### 1.2 Build react UI Docker Image
-
-Build the frontend Docker image based on react framework via below command:
-
-```bash
-cd GenAIExamples/DBQnA/ui
-docker build --no-cache -t opea/dbqna-react-ui:latest --build-arg texttosql_url=$textToSql_host:$textToSql_port/v1 -f docker/Dockerfile.react .
-```
-
-Attention! Replace $textToSql_host and $textToSql_port with your own value.
-
-Then run the command `docker images`, you will have the following Docker Images:
-
-1. `opea/texttosql:latest`
-2. `opea/dbqna-react-ui:latest`
-
-## 🚀 Start Microservices
-
-### Required Models
-
-We set default model as "mistralai/Mistral-7B-Instruct-v0.3", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models.
-
-If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.
-
-### 2.1 Setup Environment Variables
-
-Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
-
-```bash
-export host_ip="host_ip_address_or_dns_name"
-export DBQNA_HUGGINGFACEHUB_API_TOKEN=""
-export DBQNA_TGI_SERVICE_PORT=8008
-export DBQNA_TGI_LLM_ENDPOINT="http://${host_ip}:${DBQNA_TGI_SERVICE_PORT}"
-export DBQNA_LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
-export MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
-export POSTGRES_USER="postgres"
-export POSTGRES_PASSWORD="testpwd"
-export POSTGRES_DB="chinook"
-export DBQNA_TEXT_TO_SQL_PORT=18142
-export DBQNA_UI_PORT=18143
-```
-
-Note: Please replace with `host_ip_address_or_dns_name` with your external IP address or DNS name, do not use localhost.
-
-### 2.2 Start Microservice Docker Containers
-
-There are 2 options to start the microservice
-
-#### 2.2.1 Start the microservice using docker compose
-
-```bash
-cd GenAIExamples/DBQnA/docker_compose/amd/gpu/rocm
-docker compose up -d
-```
-
-## 🚀 Validate Microservices
-
-### 3.1 TGI Service
-
-```bash
-curl http://${host_ip}:$DBQNA_TGI_SERVICE_PORT/generate \
-    -X POST \
-    -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-    -H 'Content-Type: application/json'
-```
-
-### 3.2 Postgres Microservice
-
-Once Text-to-SQL microservice is started, user can use below command
-
-#### 3.2.1 Test the Database connection
-
-```bash
-curl --location http://${host_ip}:${DBQNA_TEXT_TO_SQL_PORT}/v1/postgres/health \
-    --header 'Content-Type: application/json' \
-    --data '{"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${host_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}'
-```
-
-#### 3.2.2 Invoke the microservice.
-
-```bash
-curl http://${host_ip}:${DBQNA_TEXT_TO_SQL_PORT}/v1/texttosql \
-    -X POST \
-    -d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${host_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}}' \
-    -H 'Content-Type: application/json'
-```
-
-### 3.3 Frontend validation
-
-We test the API in frontend validation to check if API returns HTTP_STATUS: 200 and validates if API response returns SQL query and output
-
-The test is present in App.test.tsx under react root folder ui/react/
-
-Command to run the test
-
-```bash
-npm run test
-```
-
-## 🚀 Launch the React UI
-
-Open this URL `http://${host_ip}:${DBQNA_UI_PORT}` in your browser to access the frontend.
-
-![project-screenshot](../../../../assets/img/dbQnA_ui_init.png)
-
-Test DB Connection
-![project-screenshot](../../../../assets/img/dbQnA_ui_successful_db_connection.png)
-
-Create SQL query and output for given NLP question
-![project-screenshot](../../../../assets/img/dbQnA_ui_succesful_sql_output_generation.png)
--- a/DBQnA/docker_compose/amd/gpu/rocm/chinook.sql
+++ b/DBQnA/docker_compose/amd/gpu/rocm/chinook.sql
--- a/DBQnA/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/DBQnA/docker_compose/amd/gpu/rocm/compose.yaml
@@ -1,75 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-# SPDX-License-Identifier: Apache-2.0
-
-version: "3.8"
-
-services:
-  dbqna-tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
-    container_name: dbqna-tgi-service
-    ports:
-      - "${DBQNA_TGI_SERVICE_PORT:-8008}:80"
-    volumes:
-      - "./data:/data"
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      TGI_SERVICE_PORT: ${DBQNA_TGI_SERVICE_PORT}
-      MODEL_ID: ${DBQNA_LLM_MODEL_ID}
-      HUGGING_FACE_HUB_TOKEN: ${DBQNA_HUGGINGFACEHUB_API_TOKEN}
-      HUGGINGFACEHUB_API_TOKEN: ${DBQNA_HUGGINGFACEHUB_API_TOKEN}
-    shm_size: 1g
-    devices:
-      - /dev/kfd:/dev/kfd
-      - /dev/dri/:/dev/dri/
-    cap_add:
-      - SYS_PTRACE
-    group_add:
-      - video
-    security_opt:
-      - seccomp:unconfined
-    ipc: host
-    command: --model-id ${MODEL_ID} --max-input-length 2048 --max-total-tokens 4096
-
-  postgres:
-    image: postgres:latest
-    container_name: postgres-container
-    restart: always
-    environment:
-      POSTGRES_USER: ${POSTGRES_USER}
-      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
-      POSTGRES_DB: ${POSTGRES_DB}
-    ports:
-      - '5442:5432'
-    volumes:
-      - ./chinook.sql:/docker-entrypoint-initdb.d/chinook.sql
-
-  text2sql:
-    image: opea/text2sql:latest
-    container_name: text2sql
-    ports:
-      - "${DBQNA_TEXT_TO_SQL_PORT:-9090}:8080"
-    environment:
-      TGI_LLM_ENDPOINT: ${DBQNA_TGI_LLM_ENDPOINT}
-
-  text2sql-react-ui:
-    image: opea/text2sql-react-ui:latest
-    container_name: text2sql-react-ui
-    depends_on:
-      - text2sql
-    ports:
-      - "${DBQNA_UI_PORT:-5174}:80"
-    environment:
-      no_proxy: ${no_proxy}
-      https_proxy: ${https_proxy}
-      http_proxy: ${http_proxy}
-      texttosql_port: ${texttosql_port}
-    ipc: host
-    restart: always
-
-networks:
-  default:
-    driver: bridge
--- a/DBQnA/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/DBQnA/docker_compose/amd/gpu/rocm/set_env.sh
@@ -1,16 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-export host_ip=""
-export DBQNA_HUGGINGFACEHUB_API_TOKEN=""
-export DBQNA_TGI_SERVICE_PORT=8008
-export DBQNA_TGI_LLM_ENDPOINT="http://${host_ip}:${DBQNA_TGI_SERVICE_PORT}"
-export DBQNA_LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
-export MODEL_ID=${DBQNA_LLM_MODEL_ID}
-export POSTGRES_USER="postgres"
-export POSTGRES_PASSWORD="testpwd"
-export POSTGRES_DB="chinook"
-export DBQNA_TEXT_TO_SQL_PORT=9090
-export DBQNA_UI_PORT=5174
--- a/DBQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/DBQnA/docker_compose/intel/cpu/xeon/README.md
@@ -36,7 +36,7 @@ Then run the command `docker images`, you will have the following Docker Images:

 We set default model as "mistralai/Mistral-7B-Instruct-v0.3", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models.

-If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HF_TOKEN" environment variable.
+If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.

 ### 2.1 Setup Environment Variables

@@ -57,7 +57,7 @@ export https_proxy=${your_http_proxy}

 export TGI_PORT=8008
 export TGI_LLM_ENDPOINT=http://${your_ip}:${TGI_PORT}
-export HF_TOKEN=${HF_TOKEN}
+export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
 export POSTGRES_USER=postgres
 export POSTGRES_PASSWORD=testpwd
@@ -97,7 +97,7 @@ docker run --name test-text2sql-postgres --ipc=host -e POSTGRES_USER=${POSTGRES_

 ```bash

-docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $model
+docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $model
 ```

 - Start Text-to-SQL Service
--- a/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -15,8 +15,8 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    shm_size: 1g
    command: --model-id ${LLM_MODEL_ID}

--- a/DBQnA/docker_compose/set_env.sh
+++ b/DBQnA/docker_compose/set_env.sh
@@ -6,11 +6,6 @@ pushd "../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-if [ -z "$HF_TOKEN" ]; then
-    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
-    return -1
-fi
-
 export TGI_PORT=8008
 export TGI_LLM_ENDPOINT="http://${your_ip}:${TGI_PORT}"
 export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
--- a/DBQnA/docker_image_build/build.yaml
+++ b/DBQnA/docker_image_build/build.yaml
@@ -19,5 +19,4 @@ services:
        http_proxy: ${http_proxy}
        https_proxy: ${https_proxy}
        no_proxy: ${no_proxy}
-        texttosql_url: ${build_texttosql_url}
    image: ${REGISTRY:-opea}/text2sql-react-ui:${TAG:-latest}
--- a/DBQnA/tests/test_compose_on_rocm.sh
+++ b/DBQnA/tests/test_compose_on_rocm.sh
@@ -1,120 +0,0 @@
-#!/bin/bash
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-set -xe
-
-WORKPATH=$(dirname "$PWD")
-LOG_PATH="$WORKPATH/tests"
-ip_address=$(hostname -I | awk '{print $1}')
-tgi_port=8008
-tgi_volume=$WORKPATH/data
-
-export host_ip=${ip_address}
-export DBQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-export DBQNA_TGI_SERVICE_PORT=8008
-export DBQNA_TGI_LLM_ENDPOINT="http://${host_ip}:${DBQNA_TGI_SERVICE_PORT}"
-export DBQNA_LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
-export MODEL_ID=${DBQNA_LLM_MODEL_ID}
-export POSTGRES_USER="postgres"
-export POSTGRES_PASSWORD="testpwd"
-export POSTGRES_DB="chinook"
-export DBQNA_TEXT_TO_SQL_PORT=9090
-export DBQNA_UI_PORT=5174
-export build_texttosql_url="${ip_address}:${DBQNA_TEXT_TO_SQL_PORT}/v1"
-
-function build_docker_images() {
-    cd "$WORKPATH"/docker_image_build
-    git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
-
-    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="text2sql text2sql-react-ui"
-
-    docker compose -f build.yaml build ${service_list} --no-cache > "${LOG_PATH}"/docker_image_build.log
-    docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
-    docker images && sleep 1s
-}
-
-function start_service() {
-    cd "$WORKPATH"/docker_compose/amd/gpu/rocm
-    # Start Docker Containers
-    docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log
-    n=0
-    until [[ "$n" -ge 100 ]]; do
-        docker logs dbqna-tgi-service > "${LOG_PATH}"/tgi_service_start.log
-        if grep -q Connected "${LOG_PATH}"/tgi_service_start.log; then
-            break
-        fi
-        sleep 5s
-        n=$((n+1))
-    done
-}
-
-function validate_microservice() {
-    result=$(http_proxy="" curl --connect-timeout 5 --max-time 120000 http://${ip_address}:${DBQNA_TEXT_TO_SQL_PORT}/v1/text2sql \
-        -X POST \
-        -d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${ip_address}'", "port": "5442", "database": "'${POSTGRES_DB}'" }}' \
-        -H 'Content-Type: application/json')
-
-    if [[ $result == *"output"* ]]; then
-        echo $result
-        echo "Result correct."
-    else
-        echo "Result wrong. Received was $result"
-        docker logs text2sql > ${LOG_PATH}/text2sql.log
-        docker logs dbqna-tgi-service > ${LOG_PATH}/tgi.log
-        exit 1
-    fi
-
-}
-
-function validate_frontend() {
-    echo "[ TEST INFO ]: --------- frontend test started ---------"
-    cd $WORKPATH/ui/react
-    local conda_env_name="OPEA_e2e"
-    export PATH=${HOME}/miniconda3/bin/:$PATH
-    if conda info --envs | grep -q "$conda_env_name"; then
-        echo "$conda_env_name exist!"
-    else
-        conda create -n ${conda_env_name} python=3.12 -y
-    fi
-
-    source activate ${conda_env_name}
-    echo "[ TEST INFO ]: --------- conda env activated ---------"
-
-    conda install -c conda-forge nodejs=22.6.0 -y
-    npm install && npm ci
-    node -v && npm -v && pip list
-
-    exit_status=0
-    npm run test || exit_status=$?
-
-    if [ $exit_status -ne 0 ]; then
-        echo "[TEST INFO]: ---------frontend test failed---------"
-        exit $exit_status
-    else
-        echo "[TEST INFO]: ---------frontend test passed---------"
-    fi
-}
-
-function stop_docker() {
-    cd $WORKPATH/docker_compose/amd/gpu/rocm/
-    docker compose stop && docker compose rm -f
-}
-
-function main() {
-
-    stop_docker
-
-    build_docker_images
-    start_service
-    sleep 10s
-    validate_microservice
-    validate_frontend
-
-    stop_docker
-    echo y | docker system prune
-
-}
-
-main
--- a/DBQnA/ui/docker/Dockerfile.react
+++ b/DBQnA/ui/docker/Dockerfile.react
@@ -3,13 +3,8 @@

 # Stage 1: Build the React application using Node.js
 # Use Node 20.11.1 as the base image for the build step
-
 FROM node:20.11.1 AS vite-app

-ARG texttosql_url
-
-ENV TEXT_TO_SQL_URL=$texttosql_url
-
 WORKDIR /usr/app/react

 COPY react /usr/app/react
@@ -21,10 +16,6 @@ RUN ["npm", "run", "build"]

 FROM nginx:alpine

-ARG texttosql_url
-
-ENV TEXT_TO_SQL_URL=$texttosql_url
-
 EXPOSE 80

 COPY --from=vite-app /usr/app/react/dist /usr/share/nginx/html
--- a/DBQnA/ui/react/.env
+++ b/DBQnA/ui/react/.env
@@ -1 +1 @@
-VITE_TEXT_TO_SQL_URL=${TEXT_TO_SQL_URL}
+VITE_TEXT_TO_SQL_URL=http://${HOSTNAME}:9090/v1
--- a/DBQnA/ui/react/src/App.test.tsx
+++ b/DBQnA/ui/react/src/App.test.tsx
@@ -26,7 +26,7 @@ test('testing api with dynamic host', async () => {
  const formData = {
    user: 'postgres',
    database: 'chinook',
-    host: host,
+    host: host,  // Dynamic IP
    password: 'testpwd',
    port: '5442',
  };
--- a/DBQnA/ui/react/vite.config.ts
+++ b/DBQnA/ui/react/vite.config.ts
@@ -24,7 +24,7 @@ export default defineConfig({
  },
  define: {
    // Dynamically set the hostname for the VITE_TEXT_TO_SQL_URL
-    "import.meta.env.VITE_TEXT_TO_SQL_URL": JSON.stringify(`http://${process.env.TEXT_TO_SQL_URL}`),
+    "import.meta.env.VITE_TEXT_TO_SQL_URL": JSON.stringify(`http://${os.hostname()}:9090/v1`),
    "import.meta.env": process.env,
  },
 });
--- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md
+++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md
@@ -43,7 +43,7 @@ docker build --no-cache -t opea/doc-index-retriever:latest --build-arg https_pro

 ```bash
 export host_ip="YOUR IP ADDR"
-export HF_TOKEN=${your_hf_api_token}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
@@ -67,7 +67,7 @@ In that case, start Docker Containers with compose_without_rerank.yaml

 ```bash
 export host_ip="YOUR IP ADDR"
-export HF_TOKEN=${your_hf_api_token}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 cd GenAIExamples/DocIndexRetriever/intel/cpu/xoen/
 docker compose -f compose_without_rerank.yaml up -d
--- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml
@@ -27,8 +27,7 @@ services:
      REDIS_HOST: ${REDIS_HOST}
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
  tei-embedding-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -43,8 +42,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      host_ip: ${host_ip}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://$host_ip:6006/health || exit 1"]
@@ -64,7 +62,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
      LOGFLAG: ${LOGFLAG}
    restart: unless-stopped
@@ -82,8 +80,7 @@ services:
      https_proxy: ${https_proxy}
      REDIS_URL: ${REDIS_URL}
      INDEX_NAME: ${INDEX_NAME}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
@@ -101,8 +98,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      host_ip: ${host_ip}
@@ -126,8 +122,7 @@ services:
      https_proxy: ${https_proxy}
      RERANK_TYPE: ${RERANK_TYPE}
      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      LOGFLAG: ${LOGFLAG}
--- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/set_env.sh
@@ -5,24 +5,3 @@
 pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
-
-if [ -z "$HF_TOKEN" ]; then
-    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
-    return -1
-fi
-
-export host_ip=$(hostname -I | awk '{print $1}')
-export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export RERANK_MODEL_ID="BAAI/bge-reranker-base"
-export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
-export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
-export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
-export REDIS_URL="redis://${host_ip}:6379"
-export INDEX_NAME="rag-redis"
-export MEGA_SERVICE_HOST_IP=${host_ip}
-export EMBEDDING_SERVICE_HOST_IP=${host_ip}
-export RETRIEVER_SERVICE_HOST_IP=${host_ip}
-export RERANK_SERVICE_HOST_IP=${host_ip}
-export LLM_SERVICE_HOST_IP=${host_ip}
-export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8000/v1/retrievaltool"
-export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
--- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md
+++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md
@@ -43,7 +43,7 @@ docker build --no-cache -t opea/doc-index-retriever:latest --build-arg https_pro

 ```bash
 export host_ip="YOUR IP ADDR"
-export HF_TOKEN=${your_hf_api_token}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
--- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -26,10 +26,9 @@ services:
      REDIS_URL: ${REDIS_URL}
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
  tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:1.5.2
+    image: ghcr.io/huggingface/tei-gaudi:1.5.0
    entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate"
    container_name: tei-embedding-gaudi-server
    ports:
@@ -69,7 +68,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
    restart: unless-stopped
  retriever:
@@ -102,8 +101,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HHF_TOKE: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      host_ip: ${host_ip}
@@ -127,8 +125,7 @@ services:
      https_proxy: ${https_proxy}
      RERANK_TYPE: ${RERANK_TYPE}
      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HHF_TOKE: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      LOGFLAG: ${LOGFLAG}
--- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -5,24 +5,3 @@
 pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
-
-if [ -z "$HF_TOKEN" ]; then
-    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
-    return -1
-fi
-
-export host_ip=$(hostname -I | awk '{print $1}')
-export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export RERANK_MODEL_ID="BAAI/bge-reranker-base"
-export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
-export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
-export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
-export REDIS_URL="redis://${host_ip}:6379"
-export INDEX_NAME="rag-redis"
-export MEGA_SERVICE_HOST_IP=${host_ip}
-export EMBEDDING_SERVICE_HOST_IP=${host_ip}
-export RETRIEVER_SERVICE_HOST_IP=${host_ip}
-export RERANK_SERVICE_HOST_IP=${host_ip}
-export LLM_SERVICE_HOST_IP=${host_ip}
-export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8000/v1/retrievaltool"
-export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
--- a/DocSum/docker_compose/intel/cpu/xeon/README.md
+++ b/DocSum/docker_compose/intel/cpu/xeon/README.md
@@ -83,7 +83,7 @@ Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" environment
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 ```

-When using gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) to "HF_TOKEN" environment variable.
+When using gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.

 ### Setup Environment Variable

@@ -96,7 +96,7 @@ To set up environment variables for deploying Document Summarization services, f
   export host_ip="External_Public_IP"
   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
   export no_proxy="Your_No_Proxy"
-   export HF_TOKEN="Your_Huggingface_API_Token"
+   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   ```

 2. If you are in a proxy environment, also set the proxy-related environment variables:
--- a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
@@ -12,8 +12,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      host_ip: ${host_ip}
      LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
    healthcheck:
@@ -40,8 +39,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      LLM_ENDPOINT: ${LLM_ENDPOINT}
-      HF_TOKEN: ${HF_TOKEN}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
      MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
--- a/DocSum/docker_compose/intel/hpu/gaudi/README.md
+++ b/DocSum/docker_compose/intel/hpu/gaudi/README.md
@@ -75,7 +75,7 @@ Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" environment
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 ```

-When using gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) to "HF_TOKEN" environment variable.
+When using gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.

 ### Setup Environment Variable

@@ -88,7 +88,7 @@ To set up environment variables for deploying Document Summarization services, f
   export host_ip="External_Public_IP"
   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
   export no_proxy="Your_No_Proxy"
-   export HF_TOKEN="Your_Huggingface_API_Token"
+   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   ```

 2. If you are in a proxy environment, also set the proxy-related environment variables:
--- a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -8,12 +8,12 @@ services:
    ports:
      - ${LLM_ENDPOINT_PORT:-8008}:80
    volumes:
-      - "${DATA_PATH:-./data}:/data"
+      - "${DATA_PATH:-data}:/data"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
@@ -48,7 +48,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
      MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
      LLM_ENDPOINT: ${LLM_ENDPOINT}
--- a/DocSum/docker_compose/set_env.sh
+++ b/DocSum/docker_compose/set_env.sh
@@ -9,12 +9,6 @@ popd > /dev/null
 export MAX_INPUT_TOKENS=1024
 export MAX_TOTAL_TOKENS=2048

-if [ -z "$HF_TOKEN" ]; then
-    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
-    return -1
-fi
-
-export host_ip=$(hostname -I | awk '{print $1}')
 export no_proxy="${no_proxy},${host_ip}"
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
--- a/DocSum/tests/test_compose_on_gaudi.sh
+++ b/DocSum/tests/test_compose_on_gaudi.sh
@@ -17,7 +17,7 @@ export TAG=${IMAGE_TAG}
 export MAX_INPUT_TOKENS=2048
 export MAX_TOTAL_TOKENS=4096
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export HF_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export ASR_SERVICE_HOST_IP=${host_ip}
--- a/DocSum/tests/test_compose_on_xeon.sh
+++ b/DocSum/tests/test_compose_on_xeon.sh
@@ -17,7 +17,7 @@ export TAG=${IMAGE_TAG}
 export MAX_INPUT_TOKENS=2048
 export MAX_TOTAL_TOKENS=4096
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export HF_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export ASR_SERVICE_HOST_IP=${host_ip}
--- a/DocSum/ui/gradio/docsum_ui_gradio.py
+++ b/DocSum/ui/gradio/docsum_ui_gradio.py
@@ -313,8 +313,6 @@ app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
    import argparse

-    import nltk
-
    parser = argparse.ArgumentParser()
    parser.add_argument("--host", type=str, default="0.0.0.0")
    parser.add_argument("--port", type=int, default=5173)
@@ -322,8 +320,4 @@ if __name__ == "__main__":
    args = parser.parse_args()
    logger.info(">>> Starting server at %s:%d", args.host, args.port)

-    # Needed for UnstructuredURLLoader when reading content from a URL
-    nltk.download("punkt_tab")
-    nltk.download("averaged_perceptron_tagger_eng")
-
    uvicorn.run(app, host=args.host, port=args.port)
--- a/FaqGen/docker_compose/intel/cpu/xeon/README.md
+++ b/FaqGen/docker_compose/intel/cpu/xeon/README.md
@@ -64,7 +64,7 @@ Then run the command `docker images`, you will have the following Docker Images:

 We set default model as "meta-llama/Meta-Llama-3-8B-Instruct", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models.

-If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HF_TOKEN" environment variable.
+If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.

 ### Setup Environment Variables

@@ -79,7 +79,7 @@ export LLM_ENDPOINT_PORT=8008
 export LLM_SERVICE_PORT=9000
 export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi"
 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
-export HF_TOKEN=${your_hf_api_token}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
--- a/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -14,8 +14,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      host_ip: ${host_ip}
      LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
    healthcheck:
@@ -39,8 +38,7 @@ services:
      https_proxy: ${https_proxy}
      LLM_ENDPOINT: ${LLM_ENDPOINT}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME}
      LOGFLAG: ${LOGFLAG:-False}
    restart: unless-stopped
--- a/FaqGen/docker_compose/intel/hpu/gaudi/README.md
+++ b/FaqGen/docker_compose/intel/hpu/gaudi/README.md
@@ -17,7 +17,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste
   ```bash
   # Example: host_ip="192.168.1.1"
   export host_ip=$(hostname -I | awk '{print $1}')
-   export HF_TOKEN="Your_Huggingface_API_Token"
+   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   ```

 2. If you are in a proxy environment, also set the proxy-related environment variables:
@@ -144,7 +144,7 @@ Then run the command `docker images`, you will have the following Docker Images:

 We set default model as "meta-llama/Meta-Llama-3-8B-Instruct", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models.

-If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HF_TOKEN" environment variable.
+If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.

 ### Setup Environment Variables

@@ -159,7 +159,7 @@ export LLM_ENDPOINT_PORT=8008
 export LLM_SERVICE_PORT=9000
 export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi"
 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
-export HF_TOKEN=${your_hf_api_token}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
--- a/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -8,13 +8,12 @@ services:
    ports:
      - ${LLM_ENDPOINT_PORT:-8008}:80
    volumes:
-      - "${DATA_PATH:-./data}:/data"
+      - "${DATA_PATH:-data}:/data"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
@@ -52,8 +51,7 @@ services:
      https_proxy: ${https_proxy}
      LLM_ENDPOINT: ${LLM_ENDPOINT}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME}
      LOGFLAG: ${LOGFLAG:-False}
    restart: unless-stopped
--- a/FaqGen/kubernetes/gmc/README.md
+++ b/FaqGen/kubernetes/gmc/README.md
--- a/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -35,12 +35,11 @@ services:
      NO_PROXY: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    ipc: host
    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
  tgi-gaudi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
    container_name: tgi-gaudi-server
    ports:
      - "6005:80"
--- a/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -10,12 +10,6 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-if [ -z "$HF_TOKEN" ]; then
-    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
-    return -1
-fi
-
-export host_ip=$(hostname -I | awk '{print $1}')
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export OPENAI_EMBEDDING_MODEL="text-embedding-3-small"
 export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
--- a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -36,8 +36,7 @@ services:
      DATAPREP_MMR_PORT: ${DATAPREP_MMR_PORT}
      INDEX_NAME: ${INDEX_NAME}
      LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:${LVM_PORT}/v1/lvm"
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      MULTIMODAL_DATAPREP: true
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS"
    restart: unless-stopped
--- a/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -6,11 +6,6 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-if [ -z "$HF_TOKEN" ]; then
-    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
-    return -1
-fi
-
 export host_ip=$(hostname -I | awk '{print $1}')

 export no_proxy=${your_no_proxy}
--- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -38,8 +38,7 @@ services:
      DATAPREP_MMR_PORT: ${DATAPREP_MMR_PORT}
      INDEX_NAME: ${INDEX_NAME}
      LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:${LVM_PORT}/v1/lvm"
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      MULTIMODAL_DATAPREP: true
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS"
    restart: unless-stopped
--- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -6,11 +6,6 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-if [ -z "$HF_TOKEN" ]; then
-    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
-    return -1
-fi
-
 export host_ip=$(hostname -I | awk '{print $1}')

 export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip}
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
@@ -143,7 +143,7 @@ export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
 export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
 export REDIS_URL="redis://${host_ip}:6379"
 export INDEX_NAME="rag-redis"
-export HF_TOKEN=${your_hf_api_token}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export RETRIEVER_SERVICE_HOST_IP=${host_ip}
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
@@ -65,7 +65,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
    restart: unless-stopped
  retriever:
--- a/README.md
+++ b/README.md
@@ -50,7 +50,7 @@ Deployment are based on released docker images by default, check [docker image l
 | CodeTrans         | [Xeon Instructions](CodeTrans/docker_compose/intel/cpu/xeon/README.md)         | [Gaudi Instructions](CodeTrans/docker_compose/intel/hpu/gaudi/README.md)     | [ROCm Instructions](CodeTrans/docker_compose/amd/gpu/rocm/README.md)     | [CodeTrans with Helm Charts](CodeTrans/kubernetes/helm/README.md)   | [CodeTrans with GMC](CodeTrans/kubernetes/gmc/README.md)     |
 | DocSum            | [Xeon Instructions](DocSum/docker_compose/intel/cpu/xeon/README.md)            | [Gaudi Instructions](DocSum/docker_compose/intel/hpu/gaudi/README.md)        | [ROCm Instructions](DocSum/docker_compose/amd/gpu/rocm/README.md)        | [DocSum with Helm Charts](DocSum/kubernetes/helm/README.md)         | [DocSum with GMC](DocSum/kubernetes/gmc/README.md)           |
 | SearchQnA         | [Xeon Instructions](SearchQnA/docker_compose/intel/cpu/xeon/README.md)         | [Gaudi Instructions](SearchQnA/docker_compose/intel/hpu/gaudi/README.md)     | Not Supported                                                            | [SearchQnA with Helm Charts](SearchQnA/kubernetes/helm/README.md)   | [SearchQnA with GMC](SearchQnA/kubernetes/gmc/README.md)     |
-| FaqGen            | [Xeon Instructions](FaqGen/docker_compose/intel/cpu/xeon/README.md)            | [Gaudi Instructions](FaqGen/docker_compose/intel/hpu/gaudi/README.md)        | [ROCm Instructions](FaqGen/docker_compose/amd/gpu/rocm/README.md)        | [FaqGen with Helm Charts](FaqGen/kubernetes/helm/README.md)         | Not supported                                                |
+| FaqGen            | [Xeon Instructions](FaqGen/docker_compose/intel/cpu/xeon/README.md)            | [Gaudi Instructions](FaqGen/docker_compose/intel/hpu/gaudi/README.md)        | [ROCm Instructions](FaqGen/docker_compose/amd/gpu/rocm/README.md)        | [FaqGen with Helm Charts](FaqGen/kubernetes/helm/README.md)         | [FaqGen with GMC](FaqGen/kubernetes/gmc/README.md)           |
 | Translation       | [Xeon Instructions](Translation/docker_compose/intel/cpu/xeon/README.md)       | [Gaudi Instructions](Translation/docker_compose/intel/hpu/gaudi/README.md)   | [ROCm Instructions](Translation/docker_compose/amd/gpu/rocm/README.md)   | Not Supported                                                       | [Translation with GMC](Translation/kubernetes/gmc/README.md) |
 | AudioQnA          | [Xeon Instructions](AudioQnA/docker_compose/intel/cpu/xeon/README.md)          | [Gaudi Instructions](AudioQnA/docker_compose/intel/hpu/gaudi/README.md)      | [ROCm Instructions](AudioQnA/docker_compose/amd/gpu/rocm/README.md)      | [AudioQnA with Helm Charts](AudioQnA/kubernetes/helm/README.md)     | [AudioQnA with GMC](AudioQnA/kubernetes/gmc/README.md)       |
 | VisualQnA         | [Xeon Instructions](VisualQnA/docker_compose/intel/cpu/xeon/README.md)         | [Gaudi Instructions](VisualQnA/docker_compose/intel/hpu/gaudi/README.md)     | [ROCm Instructions](VisualQnA/docker_compose/amd/gpu/rocm/README.md)     | [VisualQnA with Helm Charts](VisualQnA/kubernetes/helm/README.md)   | [VisualQnA with GMC](VisualQnA/kubernetes/gmc/README.md)     |
--- a/RerankFinetuning/docker_image_build/build.yaml
+++ b/RerankFinetuning/docker_image_build/build.yaml
@@ -11,12 +11,3 @@ services:
      context: GenAIComps
      dockerfile: comps/finetuning/src/Dockerfile
    image: ${REGISTRY:-opea}/finetuning:${TAG:-latest}
-  finetuning-gaudi:
-    build:
-      args:
-        http_proxy: ${http_proxy}
-        https_proxy: ${https_proxy}
-        no_proxy: ${no_proxy}
-      context: GenAIComps
-      dockerfile: comps/finetuning/src/Dockerfile.intel_hpu
-    image: ${REGISTRY:-opea}/finetuning-gaudi:${TAG:-latest}
--- a/RerankFinetuning/tests/test_compose_on_gaudi.sh
+++ b/RerankFinetuning/tests/test_compose_on_gaudi.sh
@@ -1,131 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-set -x
-IMAGE_REPO=${IMAGE_REPO:-"opea"}
-IMAGE_TAG=${IMAGE_TAG:-"latest"}
-echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
-echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
-export REGISTRY=${IMAGE_REPO}
-export TAG=${IMAGE_TAG}
-
-WORKPATH=$(dirname "$PWD")
-LOG_PATH="$WORKPATH/tests"
-ip_address=$(hostname -I | awk '{print $1}')
-finetuning_service_port=8015
-ray_port=8265
-service_name=finetuning-gaudi
-
-function build_docker_images() {
-    cd $WORKPATH/docker_image_build
-    if [ ! -d "GenAIComps" ] ; then
-        git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git
-    fi
-    docker compose -f build.yaml build ${service_name} --no-cache > ${LOG_PATH}/docker_image_build.log
-}
-
-function start_service() {
-    export no_proxy="localhost,127.0.0.1,"${ip_address}
-    docker run -d --name="finetuning-server" -p $finetuning_service_port:$finetuning_service_port -p $ray_port:$ray_port --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy ${IMAGE_REPO}/finetuning-gaudi:${IMAGE_TAG}
-    sleep 1m
-}
-
-function validate_microservice() {
-    cd $LOG_PATH
-    export no_proxy="localhost,127.0.0.1,"${ip_address}
-
-    # test /v1/dataprep upload file
-    URL="http://${ip_address}:$finetuning_service_port/v1/files"
-    cat <<EOF > test_data.json
-{"query": "Five women walk along a beach wearing flip-flops.", "pos": ["Some women with flip-flops on, are walking along the beach"], "neg": ["The 4 women are sitting on the beach.", "There was a reform in 1996.", "She's not going to court to clear her record.", "The man is talking about hawaii.", "A woman is standing outside.", "The battle was over. ", "A group of people plays volleyball."]}
-{"query": "A woman standing on a high cliff on one leg looking over a river.", "pos": ["A woman is standing on a cliff."], "neg": ["A woman sits on a chair.", "George Bush told the Republicans there was no way he would let them even consider this foolish idea, against his top advisors advice.", "The family was falling apart.", "no one showed up to the meeting", "A boy is sitting outside playing in the sand.", "Ended as soon as I received the wire.", "A child is reading in her bedroom."]}
-{"query": "Two woman are playing instruments; one a clarinet, the other a violin.", "pos": ["Some people are playing a tune."], "neg": ["Two women are playing a guitar and drums.", "A man is skiing down a mountain.", "The fatal dose was not taken when the murderer thought it would be.", "Person on bike", "The girl is standing, leaning against the archway.", "A group of women watch soap operas.", "No matter how old people get they never forget. "]}
-{"query": "A girl with a blue tank top sitting watching three dogs.", "pos": ["A girl is wearing blue."], "neg": ["A girl is with three cats.", "The people are watching a funeral procession.", "The child is wearing black.", "Financing is an issue for us in public schools.", "Kids at a pool.", "It is calming to be assaulted.", "I face a serious problem at eighteen years old. "]}
-{"query": "A yellow dog running along a forest path.", "pos": ["a dog is running"], "neg": ["a cat is running", "Steele did not keep her original story.", "The rule discourages people to pay their child support.", "A man in a vest sits in a car.", "Person in black clothing, with white bandanna and sunglasses waits at a bus stop.", "Neither the Globe or Mail had comments on the current state of Canada's road system. ", "The Spring Creek facility is old and outdated."]}
-{"query": "It sets out essential activities in each phase along with critical factors related to those activities.", "pos": ["Critical factors for essential activities are set out."], "neg": ["It lays out critical activities but makes no provision for critical factors related to those activities.", "People are assembled in protest.", "The state would prefer for you to do that.", "A girl sits beside a boy.", "Two males are performing.", "Nobody is jumping", "Conrad was being plotted against, to be hit on the head."]}
-EOF
-    HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'file=@./test_data.json' -F purpose="fine-tune" -H 'Content-Type: multipart/form-data' "$URL")
-    HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
-    RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
-    SERVICE_NAME="finetuning-server - upload - file"
-
-    # Parse the JSON response
-    purpose=$(echo "$RESPONSE_BODY" | jq -r '.purpose')
-    filename=$(echo "$RESPONSE_BODY" | jq -r '.filename')
-
-    # Define expected values
-    expected_purpose="fine-tune"
-    expected_filename="test_data.json"
-
-    if [ "$HTTP_STATUS" -ne "200" ]; then
-        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
-        docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_upload_file.log
-        exit 1
-    else
-        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
-    fi
-    # Check if the parsed values match the expected values
-    if [[ "$purpose" != "$expected_purpose" || "$filename" != "$expected_filename" ]]; then
-        echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
-        docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_upload_file.log
-        exit 1
-    else
-        echo "[ $SERVICE_NAME ] Content is as expected."
-    fi
-
-    # test /v1/fine_tuning/jobs
-    URL="http://${ip_address}:$finetuning_service_port/v1/fine_tuning/jobs"
-    HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' -d '{"training_file": "test_data.json","model": "BAAI/bge-reranker-base","General":{"task":"rerank","lora_config":null}}' "$URL")
-    HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
-    RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
-    SERVICE_NAME="finetuning-server - create finetuning job"
-
-    if [ "$HTTP_STATUS" -ne "200" ]; then
-        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
-        docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_create.log
-        exit 1
-    else
-        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
-    fi
-    if [[ "$RESPONSE_BODY" != *'{"id":"ft-job'* ]]; then
-        echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
-        docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_create.log
-        exit 1
-    else
-        echo "[ $SERVICE_NAME ] Content is as expected."
-    fi
-
-    sleep 3m
-
-    docker logs finetuning-server 2>&1 | tee ${LOG_PATH}/finetuning-server_create.log
-    FINETUNING_LOG=$(grep "succeeded" ${LOG_PATH}/finetuning-server_create.log)
-    if [[ "$FINETUNING_LOG" != *'succeeded'* ]]; then
-        echo "Finetuning failed."
-        RAY_JOBID=$(grep "Submitted Ray job" ${LOG_PATH}/finetuning-server_create.log | sed 's/.*raysubmit/raysubmit/' | cut -d' ' -f 1)
-        docker exec finetuning-server python -c "import os;os.environ['RAY_ADDRESS']='http://localhost:8265';from ray.job_submission import JobSubmissionClient;client = JobSubmissionClient();print(client.get_job_logs('${RAY_JOBID}'))" 2>&1 | tee ${LOG_PATH}/finetuning.log
-        exit 1
-    else
-        echo "Finetuning succeeded."
-    fi
-}
-
-function stop_docker() {
-    cid=$(docker ps -aq --filter "name=finetuning-server*")
-    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
-}
-
-function main() {
-
-    stop_docker
-
-    build_docker_images
-    start_service
-
-    validate_microservice
-
-    stop_docker
-    echo y | docker system prune
-
-}
-
-main
--- a/RerankFinetuning/tests/test_compose_on_xeon.sh
+++ b/RerankFinetuning/tests/test_compose_on_xeon.sh
@@ -14,14 +14,13 @@ LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')
 finetuning_service_port=8015
 ray_port=8265
-service_name=finetuning

 function build_docker_images() {
    cd $WORKPATH/docker_image_build
    if [ ! -d "GenAIComps" ] ; then
        git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git
    fi
-    docker compose -f build.yaml build ${service_name} --no-cache > ${LOG_PATH}/docker_image_build.log
+    docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
 }

 function start_service() {
@@ -95,18 +94,7 @@ EOF
        echo "[ $SERVICE_NAME ] Content is as expected."
    fi

-    sleep 3m
-
-    docker logs finetuning-server 2>&1 | tee ${LOG_PATH}/finetuning-server_create.log
-    FINETUNING_LOG=$(grep "succeeded" ${LOG_PATH}/finetuning-server_create.log)
-    if [[ "$FINETUNING_LOG" != *'succeeded'* ]]; then
-        echo "Finetuning failed."
-        RAY_JOBID=$(grep "Submitted Ray job" ${LOG_PATH}/finetuning-server_create.log | sed 's/.*raysubmit/raysubmit/' | cut -d' ' -f 1)
-        docker exec finetuning-server python -c "import os;os.environ['RAY_ADDRESS']='http://localhost:8265';from ray.job_submission import JobSubmissionClient;client = JobSubmissionClient();print(client.get_job_logs('${RAY_JOBID}'))" 2>&1 | tee ${LOG_PATH}/finetuning.log
-        exit 1
-    else
-        echo "Finetuning succeeded."
-    fi
+    sleep 1s
 }

 function stop_docker() {
--- a/SearchQnA/docker_compose/amd/gpu/rocm/README.md
+++ b/SearchQnA/docker_compose/amd/gpu/rocm/README.md
@@ -1,179 +0,0 @@
-# Build and deploy SearchQnA Application on AMD GPU (ROCm)
-
-## Build images
-
-### Build Embedding Image
-
-```bash
-git clone https://github.com/opea-project/GenAIComps.git
-cd GenAIComps
-docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/Dockerfile .
-```
-
-### Build Retriever Image
-
-```bash
-docker build --no-cache -t opea/web-retriever-chroma:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/web_retrievers/src/Dockerfile .
-```
-
-### Build Rerank Image
-
-```bash
-docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/rerankings/src/Dockerfile .
-```
-
-### Build the LLM Docker Image
-
-```bash
-docker build -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile .
-```
-
-### Build the MegaService Docker Image
-
-```bash
-git clone https://github.com/opea-project/GenAIExamples.git
-cd GenAIExamples/SearchQnA
-docker build --no-cache -t opea/searchqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
-```
-
-### Build the UI Docker Image
-
-```bash
-cd GenAIExamples/SearchQnA/ui
-docker build --no-cache -t opea/opea/searchqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
-```
-
-## Deploy SearchQnA Application
-
-### Features of Docker compose for AMD GPUs
-
-1. Added forwarding of GPU devices to the container TGI service with instructions:
-
-```yaml
-shm_size: 1g
-devices:
-  - /dev/kfd:/dev/kfd
-  - /dev/dri/:/dev/dri/
-cap_add:
-  - SYS_PTRACE
-group_add:
-  - video
-security_opt:
-  - seccomp:unconfined
-```
-
-In this case, all GPUs are thrown. To reset a specific GPU, you need to use specific device names cardN and renderN.
-
-For example:
-
-```yaml
-shm_size: 1g
-devices:
-  - /dev/kfd:/dev/kfd
-  - /dev/dri/card0:/dev/dri/card0
-  - /dev/dri/render128:/dev/dri/render128
-cap_add:
-  - SYS_PTRACE
-group_add:
-  - video
-security_opt:
-  - seccomp:unconfined
-```
-
-To find out which GPU device IDs cardN and renderN correspond to the same GPU, use the GPU driver utility
-
-### Go to the directory with the Docker compose file
-
-```bash
-cd GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm
-```
-
-### Set environments
-
-In the file "GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh " it is necessary to set the required values. Parameter assignments are specified in the comments for each variable setting command
-
-```bash
-chmod +x set_env.sh
-. set_env.sh
-```
-
-### Run services
-
-```
-docker compose up -d
-```
-
-# Validate the MicroServices and MegaService
-
-## Validate TEI service
-
-```bash
-curl http://${SEARCH_HOST_IP}:3001/embed \
-    -X POST \
-    -d '{"inputs":"What is Deep Learning?"}' \
-    -H 'Content-Type: application/json'
-```
-
-## Validate Embedding service
-
-```bash
-curl http://${SEARCH_HOST_IP}:3002/v1/embeddings\
-  -X POST \
-  -d '{"text":"hello"}' \
-  -H 'Content-Type: application/json'
-```
-
-## Validate Web Retriever service
-
-```bash
-export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
-curl http://${SEARCH_HOST_IP}:3003/v1/web_retrieval \
-  -X POST \
-  -d "{\"text\":\"What is the 2024 holiday schedule?\",\"embedding\":${your_embedding}}" \
-  -H 'Content-Type: application/json'
-```
-
-## Validate TEI Reranking service
-
-```bash
-curl http://${SEARCH_HOST_IP}:3004/rerank \
-    -X POST \
-    -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
-    -H 'Content-Type: application/json'
-```
-
-## Validate Reranking service
-
-```bash
-curl http://${SEARCH_HOST_IP}:3005/v1/reranking\
-  -X POST \
-  -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-  -H 'Content-Type: application/json'
-```
-
-## Validate TGI service
-
-```bash
-curl http://${SEARCH_HOST_IP}:3006/generate \
-  -X POST \
-  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-  -H 'Content-Type: application/json'
-```
-
-## Validate LLM service
-
-```bash
-curl http://${SEARCH_HOST_IP}:3007/v1/chat/completions\
-  -X POST \
-  -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-  -H 'Content-Type: application/json'
-```
-
-## Validate MegaService
-
-```bash
-curl http://${SEARCH_HOST_IP}:3008/v1/searchqna -H "Content-Type: application/json" -d '{
-     "messages": "What is the latest news? Give me also the source link.",
-     "stream": "True"
-     }'
-```
--- a/SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml
@@ -1,173 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-# SPDX-License-Identifier: Apache-2.0
-
-services:
-  search-tei-embedding-service:
-    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    container_name: search-tei-embedding-server
-    ports:
-      - "3001:80"
-    volumes:
-      - "./data:/data"
-    shm_size: 1g
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
-      HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
-      HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
-    command: --model-id ${SEARCH_EMBEDDING_MODEL_ID} --auto-truncate
-  search-embedding:
-    image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
-    container_name: search-embedding-server
-    depends_on:
-      - search-tei-embedding-service
-    ports:
-      - "3002:6000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      TEI_EMBEDDING_HOST_IP: ${SEARCH_HOST_IP}
-      TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT}
-      HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
-    restart: unless-stopped
-  search-web-retriever:
-    image: ${REGISTRY:-opea}/web-retriever:${TAG:-latest}
-    container_name: search-web-retriever-server
-    ports:
-      - "3003:7077"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT}
-      GOOGLE_API_KEY: ${SEARCH_GOOGLE_API_KEY}
-      GOOGLE_CSE_ID: ${SEARCH_GOOGLE_CSE_ID}
-    restart: unless-stopped
-  search-tei-reranking-service:
-    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    container_name: search-tei-reranking-server
-    ports:
-      - "3004:80"
-    volumes:
-      - "./data:/data"
-    shm_size: 1g
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-    command: --model-id ${SEARCH_RERANK_MODEL_ID} --auto-truncate
-  search-reranking:
-    image: ${REGISTRY:-opea}/reranking:${TAG:-latest}
-    container_name: search-reranking-server
-    depends_on:
-      - search-tei-reranking-service
-    ports:
-      - "3005:8000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      TEI_RERANKING_ENDPOINT: ${SEARCH_TEI_RERANKING_ENDPOINT}
-      HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
-      HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
-      HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
-    restart: unless-stopped
-  search-tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
-    container_name: search-tgi-service
-    ports:
-      - "3006:80"
-    volumes:
-      - "./data:/data"
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
-      HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
-    shm_size: 1g
-    devices:
-      - /dev/kfd:/dev/kfd
-      - /dev/dri/:/dev/dri/
-    cap_add:
-      - SYS_PTRACE
-    group_add:
-      - video
-    security_opt:
-      - seccomp:unconfined
-    ipc: host
-    command: --model-id ${SEARCH_LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
-  search-llm:
-    image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
-    container_name: search-llm-server
-    depends_on:
-      - search-tgi-service
-    ports:
-      - "3007:9000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      TGI_LLM_ENDPOINT: ${SEARCH_TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
-      LLM_ENDPOINT: ${SEARCH_TGI_LLM_ENDPOINT}
-      LLM_MODEL_ID: ${SEARCH_LLM_MODEL_ID}
-      LLM_MODEL: ${SEARCH_LLM_MODEL_ID}
-      HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
-      OPENAI_API_KEY: ${SEARCH_OPENAI_API_KEY}
-    restart: unless-stopped
-  search-backend-server:
-    image: ${REGISTRY:-opea}/searchqna:${TAG:-latest}
-    container_name: search-backend-server
-    depends_on:
-      - search-tei-embedding-service
-      - search-embedding
-      - search-web-retriever
-      - search-tei-reranking-service
-      - search-reranking
-      - search-tgi-service
-      - search-llm
-    ports:
-      - "${SEARCH_BACKEND_SERVICE_PORT:-3008}:8888"
-    environment:
-      - no_proxy=${no_proxy}
-      - https_proxy=${https_proxy}
-      - http_proxy=${http_proxy}
-      - MEGA_SERVICE_HOST_IP=${SEARCH_MEGA_SERVICE_HOST_IP}
-      - EMBEDDING_SERVICE_HOST_IP=${SEARCH_EMBEDDING_SERVICE_HOST_IP}
-      - WEB_RETRIEVER_SERVICE_HOST_IP=${SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP}
-      - RERANK_SERVICE_HOST_IP=${SEARCH_RERANK_SERVICE_HOST_IP}
-      - LLM_SERVICE_HOST_IP=${SEARCH_LLM_SERVICE_HOST_IP}
-      - EMBEDDING_SERVICE_PORT=${SEARCH_EMBEDDING_SERVICE_PORT}
-      - WEB_RETRIEVER_SERVICE_PORT=${SEARCH_WEB_RETRIEVER_SERVICE_PORT}
-      - RERANK_SERVICE_PORT=${SEARCH_RERANK_SERVICE_PORT}
-      - LLM_SERVICE_PORT=${SEARCH_LLM_SERVICE_PORT}
-    ipc: host
-    restart: always
-  search-ui-server:
-    image: ${REGISTRY:-opea}/searchqna-ui:${TAG:-latest}
-    container_name: search-ui-server
-    depends_on:
-      - search-backend-server
-    ports:
-      - "${SEARCH_FRONTEND_SERVICE_PORT:-5173}:5173"
-    environment:
-      - no_proxy=${no_proxy}
-      - https_proxy=${https_proxy}
-      - http_proxy=${http_proxy}
-      - BACKEND_BASE_URL=${SEARCH_BACKEND_SERVICE_ENDPOINT}
-    ipc: host
-    restart: always
-
-networks:
-  default:
-    driver: bridge
--- a/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh
@@ -1,36 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-# SPDX-License-Identifier: Apache-2.0
-
-export SEARCH_HOST_IP=10.53.22.29
-export SEARCH_EXTERNAL_HOST_IP=68.69.180.77
-export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5'
-export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${SEARCH_HOST_IP}:3001
-export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base'
-export SEARCH_TEI_RERANKING_ENDPOINT=http://${SEARCH_HOST_IP}:3004
-export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-export SEARCH_OPENAI_API_KEY=${OPENAI_API_KEY}
-
-export SEARCH_TGI_LLM_ENDPOINT=http://${SEARCH_HOST_IP}:3006
-export SEARCH_LLM_MODEL_ID='Intel/neural-chat-7b-v3-3'
-
-export SEARCH_MEGA_SERVICE_HOST_IP=${SEARCH_EXTERNAL_HOST_IP}
-export SEARCH_EMBEDDING_SERVICE_HOST_IP=${SEARCH_HOST_IP}
-export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${SEARCH_HOST_IP}
-export SEARCH_RERANK_SERVICE_HOST_IP=${SEARCH_HOST_IP}
-export SEARCH_LLM_SERVICE_HOST_IP=${SEARCH_HOST_IP}
-
-export SEARCH_EMBEDDING_SERVICE_PORT=3002
-export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003
-export SEARCH_RERANK_SERVICE_PORT=3005
-export SEARCH_LLM_SERVICE_PORT=3007
-
-export SEARCH_FRONTEND_SERVICE_PORT=18143
-export SEARCH_BACKEND_SERVICE_PORT=18142
-export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${SEARCH_EXTERNAL_HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna
-
-export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY}
-export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID}
--- a/SearchQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/SearchQnA/docker_compose/intel/cpu/xeon/README.md
@@ -66,7 +66,7 @@ Before starting the services with `docker compose`, you have to recheck the foll
 export host_ip=<your External Public IP>    # export host_ip=$(hostname -I | awk '{print $1}')
 export GOOGLE_CSE_ID=<your cse id>
 export GOOGLE_API_KEY=<your google api key>
-export HF_TOKEN=<your HF token>
+export HUGGINGFACEHUB_API_TOKEN=<your HF token>

 export EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
 export TEI_EMBEDDING_ENDPOINT=http://${host_ip}:3001
--- a/Show More
+++ b/Show More