update secrets token name

Signed-off-by: ZePan110 <ze.pan@intel.com>
Integrate CodeGen set_env to ut scripts. (#1976 )
2025-05-21 17:12:00 +08:00 · 2025-05-21 12:58:07 +08:00 · 2025-05-20 14:29:23 +08:00 · 2025-05-20 14:15:46 +08:00 · 2025-05-20 13:43:24 +08:00 · 2025-05-20 13:42:18 +08:00
607 changed files with 27087 additions and 14487 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,16 +1,18 @@
-* liang1.lv@intel.com feng.tian@intel.com suyue.chen@intel.com
+# Code owners will review PRs within their respective folders.
+
+* liang1.lv@intel.com feng.tian@intel.com suyue.chen@intel.com kaokao.lv@intel.com minmin.hou@intel.com rita.brugarolas.brufau@intel.com
 /.github/ suyue.chen@intel.com ze.pan@intel.com
-/AgentQnA/ kaokao.lv@intel.com minmin.hou@intel.com
+/AgentQnA/ abolfazl.shahbazi@intel.com kaokao.lv@intel.com minmin.hou@intel.com
 /AudioQnA/ sihan.chen@intel.com wenjiao.yue@intel.com
 /AvatarChatbot/ chun.tao@intel.com kaokao.lv@intel.com
 /ChatQnA/ liang1.lv@intel.com letong.han@intel.com
-/CodeGen/ liang1.lv@intel.com xinyao.wang@intel.com
-/CodeTrans/ sihan.chen@intel.com xinyao.wang@intel.com
+/CodeGen/ liang1.lv@intel.com
+/CodeTrans/ sihan.chen@intel.com
 /DBQnA/ supriya.krishnamurthi@intel.com liang1.lv@intel.com
-/DocIndexRetriever/ kaokao.lv@intel.com chendi.xue@intel.com
-/DocSum/ letong.han@intel.com xinyao.wang@intel.com
+/DocIndexRetriever/ abolfazl.shahbazi@intel.com kaokao.lv@intel.com chendi.xue@intel.com
+/DocSum/ letong.han@intel.com
 /EdgeCraftRAG/ yongbo.zhu@intel.com mingyuan.qi@intel.com
-/FaqGen/ yogesh.pandey@intel.com xinyao.wang@intel.com
+/FinanceAgent/ abolfazl.shahbazi@intel.com kaokao.lv@intel.com minmin.hou@intel.com rita.brugarolas.brufau@intel.com
 /GraphRAG/ rita.brugarolas.brufau@intel.com abolfazl.shahbazi@intel.com
 /InstructionTuning/ xinyu.ye@intel.com kaokao.lv@intel.com
 /MultimodalQnA/ melanie.h.buehler@intel.com tiep.le@intel.com
@@ -19,5 +21,6 @@
 /SearchQnA/ sihan.chen@intel.com letong.han@intel.com
 /Text2Image/ wenjiao.yue@intel.com xinyu.ye@intel.com
 /Translation/ liang1.lv@intel.com sihan.chen@intel.com
-/VideoQnA/ huiling.bao@intel.com xinyao.wang@intel.com
-/VisualQnA/ liang1.lv@intel.com sihan.chen@intel.com
+/VideoQnA/ huiling.bao@intel.com
+/VisualQnA/ liang1.lv@intel.com sihan.chen@intel.com
+/WorkflowExecAgent/ joshua.jian.ern.liew@intel.com kaokao.lv@intel.com
--- a/.github/ISSUE_TEMPLATE/1_bug_template.yml
+++ b/.github/ISSUE_TEMPLATE/1_bug_template.yml
@@ -32,6 +32,7 @@ body:
        - Mac
        - BSD
        - Other (Please let us know in description)
+        - N/A
    validations:
      required: true

@@ -56,6 +57,7 @@ body:
        - GPU-Nvidia
        - GPU-AMD
        - GPU-other (Please let us know in description)
+        - N/A
    validations:
      required: true

@@ -67,6 +69,7 @@ body:
        - label: Pull docker images from hub.docker.com
        - label: Build docker images from source
        - label: Other
+        - label: N/A
    validations:
      required: true

@@ -80,6 +83,7 @@ body:
        - label: Kubernetes Helm Charts
        - label: Kubernetes GMC
        - label: Other
+        - label: N/A
    validations:
      required: true

@@ -91,6 +95,7 @@ body:
        - Single Node
        - Multiple Nodes
        - Other
+        - N/A
      default: 0
    validations:
      required: true
--- a/.github/ISSUE_TEMPLATE/2_feature_template.yml
+++ b/.github/ISSUE_TEMPLATE/2_feature_template.yml
@@ -32,6 +32,7 @@ body:
        - Mac
        - BSD
        - Other (Please let us know in description)
+        - N/A
    validations:
      required: true

@@ -56,6 +57,7 @@ body:
        - GPU-Nvidia
        - GPU-AMD
        - GPU-other (Please let us know in description)
+        - N/A
    validations:
      required: true

@@ -67,6 +69,7 @@ body:
        - Single Node
        - Multiple Nodes
        - Other
+        - N/A
      default: 0
    validations:
      required: true
--- a/.github/env/_build_image.sh
+++ b/.github/env/_build_image.sh
@@ -0,0 +1,5 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+export VLLM_VER=v0.8.3
+export VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
--- a/.github/workflows/_build_comps_base_image.yml
+++ b/.github/workflows/_build_comps_base_image.yml
@@ -35,9 +35,9 @@ jobs:
      - name: Check if job should be skipped
        id: check-skip
        run: |
-          should_skip=false
-          if [[ "${{ inputs.node }}" == "gaudi3" || "${{ inputs.node }}" == "rocm" || "${{ inputs.node }}" == "arc" ]]; then
-            should_skip=true
+          should_skip=true
+          if [[ "${{ inputs.node }}" == "gaudi" || "${{ inputs.node }}" == "xeon" ]]; then
+            should_skip=false
          fi
          echo "should_skip=$should_skip"
          echo "should_skip=$should_skip" >> $GITHUB_OUTPUT
--- a/.github/workflows/_build_image.yml
+++ b/.github/workflows/_build_image.yml
@@ -42,9 +42,9 @@ jobs:
      - name: Check if job should be skipped
        id: check-skip
        run: |
-          should_skip=false
-          if [[ "${{ inputs.node }}" == "gaudi3" || "${{ inputs.node }}" == "rocm" || "${{ inputs.node }}" == "arc" ]]; then
-            should_skip=true
+          should_skip=true
+          if [[ "${{ inputs.node }}" == "gaudi" || "${{ inputs.node }}" == "xeon" ]]; then
+            should_skip=false
          fi
          echo "should_skip=$should_skip"
          echo "should_skip=$should_skip" >> $GITHUB_OUTPUT
@@ -75,19 +75,12 @@ jobs:
        run: |
          cd ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
          docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
+          source ${{ github.workspace }}/.github/env/_build_image.sh
          if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
-              git clone https://github.com/vllm-project/vllm.git && cd vllm
-              # Get the latest tag
-              VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
-              echo "Check out vLLM tag ${VLLM_VER}"
-              git checkout ${VLLM_VER} &> /dev/null && cd ../
+              git clone -b ${VLLM_VER} --single-branch https://github.com/vllm-project/vllm.git
          fi
          if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
-              git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-              # Get the latest tag
-              VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
-              echo "Check out vLLM tag ${VLLM_VER}"
-              git checkout ${VLLM_VER} &> /dev/null && cd ../
+              git clone -b ${VLLM_FORK_VER} --single-branch https://github.com/HabanaAI/vllm-fork.git
          fi
          git clone --depth 1 --branch ${{ inputs.opea_branch }} https://github.com/opea-project/GenAIComps.git
          cd GenAIComps && git rev-parse HEAD && cd ../
--- a/.github/workflows/_example-workflow.yml
+++ b/.github/workflows/_example-workflow.yml
@@ -76,6 +76,7 @@ jobs:
      example: ${{ inputs.example }}
      hardware: ${{ inputs.node }}
      use_model_cache: ${{ inputs.use_model_cache }}
+      opea_branch: ${{ inputs.opea_branch }}
    secrets: inherit


--- a/.github/workflows/_gmc-e2e.yml
+++ b/.github/workflows/_gmc-e2e.yml
@@ -55,7 +55,7 @@ jobs:
      - name: Run tests
        id: run-test
        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HF_TOKEN }}
          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
        run: |
--- a/.github/workflows/_helm-e2e.yml
+++ b/.github/workflows/_helm-e2e.yml
@@ -2,7 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0

 name: Helm Chart E2e Test For Call
-permissions: read-all
+permissions:
+  contents: read
+
 on:
  workflow_call:
    inputs:
@@ -81,6 +83,10 @@ jobs:
                if [[ "${{ inputs.hardware }}" == "gaudi" ]]; then
                  value_files="${value_files}\"${filename}\","
                fi
+              elif [[ "$filename" == *"rocm"* ]]; then
+                if [[ "${{ inputs.hardware }}" == "rocm" ]]; then
+                  value_files="${value_files}\"${filename}\","
+                fi
              elif [[ "$filename" == *"nv"* ]]; then
                continue
              else
@@ -131,24 +137,36 @@ jobs:
        env:
          example: ${{ inputs.example }}
        run: |
-          CHART_NAME="${example,,}"  # CodeGen
-          echo "CHART_NAME=$CHART_NAME" >> $GITHUB_ENV
-          echo "RELEASE_NAME=${CHART_NAME}$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
-          echo "NAMESPACE=${CHART_NAME}-$(head -c 4 /dev/urandom | xxd -p)" >> $GITHUB_ENV
-          echo "ROLLOUT_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV
-          echo "TEST_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV
-          echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
-          echo "should_cleanup=false" >> $GITHUB_ENV
-          echo "skip_validate=false" >> $GITHUB_ENV
-          echo "CHART_FOLDER=${example}/kubernetes/helm" >> $GITHUB_ENV
+          if [[ ! "$example" =~ ^[a-zA-Z0-9]{1,20}$ ]] || [[ "$example" =~ \.\. ]] || [[ "$example" == -* || "$example" == *- ]]; then
+            echo "Error: Invalid input - only lowercase alphanumeric and internal hyphens allowed"
+            exit 1
+          fi
+          # SAFE_PREFIX="kb-"
+          CHART_NAME="${SAFE_PREFIX}$(echo "$example" | tr '[:upper:]' '[:lower:]')"
+          RAND_SUFFIX=$(openssl rand -hex 2 | tr -dc 'a-f0-9')
+
+          cat <<EOF >> $GITHUB_ENV
+          CHART_NAME=${CHART_NAME}
+          RELEASE_NAME=${CHART_NAME}-$(date +%s)
+          NAMESPACE=ns-${CHART_NAME}-${RAND_SUFFIX}
+          ROLLOUT_TIMEOUT_SECONDS=600s
+          TEST_TIMEOUT_SECONDS=600s
+          KUBECTL_TIMEOUT_SECONDS=60s
+          should_cleanup=false
+          skip_validate=false
+          CHART_FOLDER=${example}/kubernetes/helm
+          EOF
+
+          echo "Generated safe variables:" >> $GITHUB_STEP_SUMMARY
+          echo "- CHART_NAME: ${CHART_NAME}" >> $GITHUB_STEP_SUMMARY

      - name: Helm install
        id: install
        env:
          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
-          HFTOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HF_TOKEN }}
+          HFTOKEN: ${{ secrets.HF_TOKEN }}
          value_file: ${{ matrix.value_file }}
        run: |
          set -xe
--- a/.github/workflows/_run-docker-compose.yml
+++ b/.github/workflows/_run-docker-compose.yml
@@ -32,6 +32,10 @@ on:
        required: false
        type: boolean
        default: false
+      opea_branch:
+        default: "main"
+        required: false
+        type: string
 jobs:
  get-test-case:
    runs-on: ubuntu-latest
@@ -64,8 +68,10 @@ jobs:
          cd ${{ github.workspace }}/${{ inputs.example }}/tests
          run_test_cases=""

-          if [ "${{ inputs.hardware }}" == "gaudi2" ] || [ "${{ inputs.hardware }}" == "gaudi3" ]; then
+          if [[ "${{ inputs.hardware }}" == "gaudi"* ]]; then
            hardware="gaudi"
+          elif [[ "${{ inputs.hardware }}" == "xeon"* ]]; then
+            hardware="xeon"
          else
            hardware="${{ inputs.hardware }}"
          fi
@@ -116,13 +122,17 @@ jobs:
        run: |
          sudo rm -rf ${{github.workspace}}/* || true

-          # clean up containers use ports
-          cid=$(docker ps --format '{{.Names}} : {{.Ports}}' | grep -v ' : $' | grep -v 5000 | awk -F' : ' '{print $1}')
+          echo "Cleaning up containers using ports..."
+          cid=$(docker ps --format '{{.Names}} : {{.Ports}}' | grep -v ' : $' | grep -v 0.0.0.0:5000 | awk -F' : ' '{print $1}')
          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
-
          docker system prune -f
-          docker rmi $(docker images --filter reference="*/*/*:latest" -q) || true
-          docker rmi $(docker images --filter reference="*/*:ci" -q) || true
+
+          echo "Cleaning up images ..."
+          docker images --filter reference="*/*/*:latest" -q | xargs -r docker rmi && sleep 1s
+          docker images --filter reference="*/*:ci" -q | xargs -r docker rmi && sleep 1s
+          docker images --filter reference="*:5000/*/*" -q | xargs -r docker rmi && sleep 1s
+          docker images --filter reference="opea/comps-base" -q | xargs -r docker rmi && sleep 1s
+          docker images

      - name: Checkout out Repo
        uses: actions/checkout@v4
@@ -141,11 +151,17 @@ jobs:
          bash ${{ github.workspace }}/.github/workflows/scripts/docker_compose_clean_up.sh "ports"
          docker ps

+      - name: Log in DockerHub
+        uses: docker/login-action@v3.2.0
+        with:
+          username: ${{ secrets.DOCKERHUB_USER }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
      - name: Run test
        shell: bash
        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
-          HF_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
          PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
@@ -153,8 +169,11 @@ jobs:
          SDK_BASE_URL: ${{ secrets.SDK_BASE_URL }}
          SERVING_TOKEN: ${{ secrets.SERVING_TOKEN }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          FINNHUB_API_KEY: ${{ secrets.FINNHUB_API_KEY }}
+          FINANCIAL_DATASETS_API_KEY: ${{ secrets.FINANCIAL_DATASETS_API_KEY }}
          IMAGE_REPO: ${{ inputs.registry }}
          IMAGE_TAG: ${{ inputs.tag }}
+          opea_branch: ${{ inputs.opea_branch }}
          example: ${{ inputs.example }}
          hardware: ${{ inputs.hardware }}
          test_case: ${{ matrix.test_case }}
@@ -167,30 +186,48 @@ jobs:
              export model_cache="/data2/hf_model"
            else
              echo "Model cache directory /data2/hf_model does not exist"
-              export model_cache="~/.cache/huggingface/hub"
+              export model_cache="$HOME/.cache/huggingface/hub"
+            fi
+            if [[ "$test_case" == *"rocm"* ]]; then
+              export model_cache="/var/lib/GenAI/data"
            fi
          fi
          if [ -f "${test_case}" ]; then timeout 60m bash "${test_case}"; else echo "Test script {${test_case}} not found, skip test!"; fi

      - name: Clean up container after test
-        shell: bash
-        if: cancelled() || failure()
+        if: always()
        run: |
-          cd ${{ github.workspace }}/${{ inputs.example }}
-          export test_case=${{ matrix.test_case }}
-          export hardware=${{ inputs.hardware }}
-          bash ${{ github.workspace }}/.github/workflows/scripts/docker_compose_clean_up.sh "containers"
+          set -x

-          # clean up containers use ports
-          cid=$(docker ps --format '{{.Names}} : {{.Ports}}' | grep -v ' : $' | grep -v 5000 | awk -F' : ' '{print $1}')
+          echo "Cleaning up containers using ports..."
+          cid=$(docker ps --format '{{.Names}} : {{.Ports}}' | grep -v ' : $' | grep -v 0.0.0.0:5000 | awk -F' : ' '{print $1}')
          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi

-          docker system prune -f
-          docker rmi $(docker images --filter reference="*:5000/*/*" -q) || true
+          echo "Cleaning up images ..."
+          df -h
+          sleep 1
+          docker system df
+          sleep 1
+          if [[ "${{ inputs.hardware }}" == "xeon"* ]]; then
+              docker system prune -a -f
+          else
+              docker images --filter reference="*/*/*:latest" -q | xargs -r docker rmi && sleep 1s
+              docker images --filter reference="*/*:ci" -q | xargs -r docker rmi && sleep 1s
+              docker images --filter reference="*:5000/*/*" -q | xargs -r docker rmi && sleep 1s
+              docker images --filter reference="opea/comps-base" -q | xargs -r docker rmi && sleep 1s
+              docker system prune -f
+          fi
+          sleep 5
+          docker images
+          sleep 1
+          df -h
+          sleep 1
+          docker system df
+          sleep 1

      - name: Publish pipeline artifact
        if: ${{ !cancelled() }}
        uses: actions/upload-artifact@v4
        with:
-          name: ${{ inputs.example }}_${{ matrix.test_case }}
+          name: ${{ inputs.hardware }}_${{ inputs.example }}_${{ matrix.test_case }}
          path: ${{ github.workspace }}/${{ inputs.example }}/tests/*.log
--- a/.github/workflows/daily-update-vllm-version.yml
+++ b/.github/workflows/daily-update-vllm-version.yml
@@ -0,0 +1,94 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Daily update vLLM & vLLM-fork version
+
+on:
+  schedule:
+    - cron: "30 22 * * *"
+  workflow_dispatch:
+
+env:
+  BRANCH_NAME: "update"
+  USER_NAME: "CICD-at-OPEA"
+  USER_EMAIL: "CICD@opea.dev"
+
+jobs:
+  freeze-tag:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - repo: vLLM
+            repo_name: vllm-project/vllm
+            ver_name: VLLM_VER
+          - repo: vLLM-fork
+            repo_name: HabanaAI/vllm-fork
+            ver_name: VLLM_FORK_VER
+      fail-fast: false
+    permissions:
+      contents: write
+      pull-requests: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          ref: ${{ github.ref }}
+
+      - name: Set up Git
+        run: |
+          git config --global user.name ${{ env.USER_NAME }}
+          git config --global user.email ${{ env.USER_EMAIL }}
+          git remote set-url origin https://${{ env.USER_NAME }}:"${{ secrets.ACTION_TOKEN }}"@github.com/${{ github.repository }}.git
+          git fetch
+
+          if git ls-remote https://github.com/${{ github.repository }}.git "refs/heads/${{ env.BRANCH_NAME }}_${{ matrix.repo }}" | grep -q "refs/heads/${{ env.BRANCH_NAME }}_${{ matrix.repo }}"; then
+            echo "branch ${{ env.BRANCH_NAME }}_${{ matrix.repo }} exists"
+            git checkout ${{ env.BRANCH_NAME }}_${{ matrix.repo }}
+          else
+            echo "branch ${{ env.BRANCH_NAME }}_${{ matrix.repo }} not exists"
+            git checkout -b ${{ env.BRANCH_NAME }}_${{ matrix.repo }}
+            git push origin ${{ env.BRANCH_NAME }}_${{ matrix.repo }}
+            echo "branch ${{ env.BRANCH_NAME }}_${{ matrix.repo }} created successfully"
+          fi
+
+      - name: Run script
+        run: |
+          latest_vllm_ver=$(curl -s "https://api.github.com/repos/${{ matrix.repo_name }}/tags" | jq '.[0].name' -)
+          latest_vllm_ver=$(echo "$latest_vllm_ver" | sed 's/"//g')
+          echo "latest_vllm_ver=${latest_vllm_ver}" >> "$GITHUB_ENV"
+          find . -type f -name "*.sh" -exec sed -i "s/${{ matrix.ver_name }}=.*/${{ matrix.ver_name }}=${latest_vllm_ver}/" {} \;
+
+      - name: Commit changes
+        run: |
+          git add .
+          if git diff-index --quiet HEAD --; then
+            echo "No changes detected, skipping commit."
+            exit 1
+          else
+            git commit -s -m "Update ${{ matrix.repo }} version to ${latest_vllm_ver}"
+            git push --set-upstream origin ${{ env.BRANCH_NAME }}_${{ matrix.repo }}
+          fi
+
+      - name: Create Pull Request
+        env:
+          GH_TOKEN: ${{ secrets.ACTION_TOKEN }}
+        run: |
+          pr_count=$(curl -H "Authorization: token ${{ secrets.ACTION_TOKEN }}" -s "https://api.github.com/repos/${{ github.repository }}/pulls?state=all&head=${{ env.USER_NAME }}:${{ env.BRANCH_NAME }}_${{ matrix.repo }}" | jq '. | length')
+          if [ $pr_count -gt 0 ]; then
+            echo "Pull Request exists"
+            pr_number=$(curl -H "Authorization: token ${{ secrets.ACTION_TOKEN }}" -s "https://api.github.com/repos/${{ github.repository }}/pulls?state=all&head=${{ env.USER_NAME }}:${{ env.BRANCH_NAME }}_${{ matrix.repo }}" | jq '.[0].number')
+            gh pr edit ${pr_number} \
+              --title "Update ${{ matrix.repo }} version to ${latest_vllm_ver}" \
+              --body "Update ${{ matrix.repo }} version to ${latest_vllm_ver}"
+            echo "Pull Request updated successfully"
+          else
+            echo "Pull Request does not exists..."
+            gh pr create \
+              -B main \
+              -H ${{ env.BRANCH_NAME }}_${{ matrix.repo }} \
+              --title "Update ${{ matrix.repo }} version to ${latest_vllm_ver}" \
+              --body "Update ${{ matrix.repo }} version to ${latest_vllm_ver}"
+            echo "Pull Request created successfully"
+          fi
--- a/.github/workflows/daily_check_issue_and_pr.yml
+++ b/.github/workflows/daily_check_issue_and_pr.yml
@@ -26,3 +26,4 @@ jobs:
          close-pr-message: "This PR was closed because it has been stalled for 7 days with no activity."
          repo-token: ${{ secrets.ACTION_TOKEN }}
          start-date: "2025-03-01T00:00:00Z"
+          exempt-issue-labels: "Backlog"
--- a/.github/workflows/dockerhub-description.yml
+++ b/.github/workflows/dockerhub-description.yml
--- a/.github/workflows/manual-example-workflow.yml
+++ b/.github/workflows/manual-example-workflow.yml
@@ -7,7 +7,7 @@ on:
    inputs:
      nodes:
        default: "gaudi,xeon"
-        description: "Hardware to run test gaudi,gaudi3,xeon,rocm,arc"
+        description: "Hardware to run test gaudi,xeon,rocm,arc,gaudi3,xeon-gnr"
        required: true
        type: string
      examples:
--- a/.github/workflows/nightly-docker-build-publish.yml
+++ b/.github/workflows/nightly-docker-build-publish.yml
@@ -5,7 +5,7 @@ name: Nightly build/publish latest docker images

 on:
  schedule:
-    - cron: "30 14 * * *" # UTC time
+    - cron: "30 14 * * 1-5" # UTC time
  workflow_dispatch:

 env:
@@ -38,8 +38,21 @@ jobs:
    with:
      node: gaudi

-  build-and-test:
-    needs: get-build-matrix
+  build-images:
+    needs: [get-build-matrix, build-comps-base]
+    strategy:
+      matrix:
+        example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
+      fail-fast: false
+    uses: ./.github/workflows/_build_image.yml
+    with:
+      node: gaudi
+      example: ${{ matrix.example }}
+      inject_commit: true
+    secrets: inherit
+
+  test-example:
+    needs: [get-build-matrix]
    if: ${{ needs.get-build-matrix.outputs.examples_json != '' }}
    strategy:
      matrix:
@@ -47,21 +60,22 @@ jobs:
      fail-fast: false
    uses: ./.github/workflows/_example-workflow.yml
    with:
-      node: gaudi
+      node: xeon
+      build: false
      example: ${{ matrix.example }}
      test_compose: true
      inject_commit: true
    secrets: inherit

  get-image-list:
-    needs: get-build-matrix
+    needs: [get-build-matrix]
    uses: ./.github/workflows/_get-image-list.yml
    with:
      examples: ${{ needs.get-build-matrix.outputs.EXAMPLES }}

  publish:
-    needs: [get-build-matrix, get-image-list, build-and-test]
-    if: always() && ${{ needs.get-image-list.outputs.matrix != '' }}
+    needs: [get-build-matrix, get-image-list, build-images]
+    if: always()
    strategy:
      matrix:
        image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
--- a/.github/workflows/pr-chart-e2e.yml
+++ b/.github/workflows/pr-chart-e2e.yml
@@ -19,6 +19,9 @@ concurrency:
 jobs:
  job1:
    name: Get-Test-Matrix
+    permissions:
+      contents: read
+      pull-requests: read
    runs-on: ubuntu-latest
    outputs:
      run_matrix: ${{ steps.get-test-matrix.outputs.run_matrix }}
@@ -46,6 +49,8 @@ jobs:
              example=$(echo "$values_file" | cut -d'/' -f1) # CodeGen
              if [[ "$valuefile" == *"gaudi"* ]]; then
                hardware="gaudi"
+              elif [[ "$valuefile" == *"rocm"* ]]; then
+                hardware="rocm"
              elif [[ "$valuefile" == *"nv"* ]]; then
                continue
              else
--- a/.github/workflows/pr-link-path-scan.yml
+++ b/.github/workflows/pr-link-path-scan.yml
@@ -23,6 +23,7 @@ jobs:
      - name: Check the Validity of Hyperlinks
        run: |
          cd ${{github.workspace}}
+          delay=15
          fail="FALSE"
          merged_commit=$(git log -1 --format='%H')
          changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
@@ -35,15 +36,20 @@ jobs:
                  # echo $url_line
                  url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
                  path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
-                  response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")|| true
-                  if [ "$response" -ne 200 ]; then
-                    echo "**********Validation failed, try again**********"
-                    response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
-                    if [ "$response_retry" -eq 200 ]; then
-                      echo "*****Retry successfully*****"
-                    else
-                      echo "Invalid link from ${{github.workspace}}/$path: $url"
-                      fail="TRUE"
+                  if [[ "$url" == "https://platform.openai.com/api-keys"* ]]; then
+                    echo "Link "$url" from ${{github.workspace}}/$path needs to be verified by a real person."
+                  else
+                    sleep $delay
+                    response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")|| true
+                    if [ "$response" -ne 200 ]; then
+                      echo "**********Validation failed ($response), try again**********"
+                      response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
+                      if [ "$response_retry" -eq 200 ]; then
+                        echo "*****Retry successfully*****"
+                      else
+                        echo "Invalid link ($response_retry) from ${{github.workspace}}/$path: $url"
+                        fail="TRUE"
+                      fi
                    fi
                  fi
                done
@@ -74,6 +80,7 @@ jobs:
      - name: Checking Relative Path Validity
        run: |
          cd ${{github.workspace}}
+          delay=15
          fail="FALSE"
          repo_name=${{ github.event.pull_request.head.repo.full_name }}
          branch="https://github.com/$repo_name/blob/${{ github.event.pull_request.head.ref }}"
@@ -105,14 +112,15 @@ jobs:
                if [[ "$png_line" == *#* ]]; then
                  if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then
                    url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIExamples||')$png_path
+                    sleep $delay
                    response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev")
                    if [ "$response" -ne 200 ]; then
-                      echo "**********Validation failed, try again**********"
+                      echo "**********Validation failed ($response), try again**********"
                      response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev")
                      if [ "$response_retry" -eq 200 ]; then
                        echo "*****Retry successfully*****"
                      else
-                        echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path"
+                        echo "Invalid path ($response_retry) from ${{github.workspace}}/$refer_path: $png_path"
                        fail="TRUE"
                      fi
                    else
--- a/.github/workflows/scripts/codeScan/hadolint.sh
+++ b/.github/workflows/scripts/codeScan/hadolint.sh
@@ -7,7 +7,7 @@ source /GenAIExamples/.github/workflows/scripts/change_color
 log_dir=/GenAIExamples/.github/workflows/scripts/codeScan
 ERROR_WARN=false

-find . -type f \( -name "Dockerfile*" \) -print -exec hadolint --ignore DL3006 --ignore DL3007 --ignore DL3008 --ignore DL3013 {} \; > ${log_dir}/hadolint.log
+find . -type f \( -name "Dockerfile*" \) -print -exec hadolint --ignore DL3006 --ignore DL3007 --ignore DL3008 --ignore DL3013 --ignore DL3018 --ignore DL3016 {} \; > ${log_dir}/hadolint.log

 if [[ $(grep -c "error" ${log_dir}/hadolint.log) != 0 ]]; then
    $BOLD_RED && echo "Error!! Please Click on the artifact button to download and check error details." && $RESET
--- a/.github/workflows/weekly-example-test.yml
+++ b/.github/workflows/weekly-example-test.yml
@@ -0,0 +1,55 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Weekly test all examples on multiple HWs
+
+on:
+  schedule:
+    - cron: "30 2 * * 6" # UTC time
+  workflow_dispatch:
+
+env:
+  EXAMPLES: ${{ vars.NIGHTLY_RELEASE_EXAMPLES }}
+  NODES: "gaudi,xeon,rocm,arc"
+
+jobs:
+  get-test-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      examples: ${{ steps.get-matrix.outputs.examples }}
+      nodes: ${{ steps.get-matrix.outputs.nodes }}
+    steps:
+      - name: Create Matrix
+        id: get-matrix
+        run: |
+          examples=($(echo ${EXAMPLES} | tr ',' ' '))
+          examples_json=$(printf '%s\n' "${examples[@]}" | sort -u | jq -R '.' | jq -sc '.')
+          echo "examples=$examples_json" >> $GITHUB_OUTPUT
+          nodes=($(echo ${NODES} | tr ',' ' '))
+          nodes_json=$(printf '%s\n' "${nodes[@]}" | sort -u | jq -R '.' | jq -sc '.')
+          echo "nodes=$nodes_json" >> $GITHUB_OUTPUT
+
+  build-comps-base:
+    needs: [get-test-matrix]
+    strategy:
+      matrix:
+        node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
+    uses: ./.github/workflows/_build_comps_base_image.yml
+    with:
+      node: ${{ matrix.node }}
+
+  run-examples:
+    needs: [get-test-matrix, build-comps-base]
+    strategy:
+      matrix:
+        example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }}
+        node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
+      fail-fast: false
+    uses: ./.github/workflows/_example-workflow.yml
+    with:
+      node: ${{ matrix.node }}
+      example: ${{ matrix.example }}
+      build: true
+      test_compose: true
+      test_helmchart: true
+    secrets: inherit
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -74,7 +74,7 @@ repos:
        name: Unused noqa

  - repo: https://github.com/pycqa/isort
-    rev: 5.13.2
+    rev: 6.0.1
    hooks:
      - id: isort

@@ -100,7 +100,7 @@ repos:
          - prettier@3.2.5

  - repo: https://github.com/psf/black.git
-    rev: 24.10.0
+    rev: 25.1.0
    hooks:
      - id: black
        files: (.*\.py)$
@@ -114,7 +114,7 @@ repos:
          - black==24.10.0

  - repo: https://github.com/codespell-project/codespell
-    rev: v2.3.0
+    rev: v2.4.1
    hooks:
      - id: codespell
        args: [-w]
@@ -122,7 +122,7 @@ repos:
          - tomli

  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.6
+    rev: v0.11.4
    hooks:
      - id: ruff
        args: [--fix, --exit-non-zero-on-fix, --no-cache]
--- a/AgentQnA/README.md
+++ b/AgentQnA/README.md
@@ -4,9 +4,10 @@

 1. [Overview](#overview)
 2. [Deploy with Docker](#deploy-with-docker)
-3. [Launch the UI](#launch-the-ui)
+3. [How to interact with the agent system with UI](#how-to-interact-with-the-agent-system-with-ui)
 4. [Validate Services](#validate-services)
 5. [Register Tools](#how-to-register-other-tools-with-the-ai-agent)
+6. [Monitoring and Tracing](#monitor-and-tracing)

 ## Overview

@@ -98,7 +99,7 @@ flowchart LR

 #### First, clone the `GenAIExamples` repo.

-```
+```bash
 export WORKDIR=<your-work-directory>
 cd $WORKDIR
 git clone https://github.com/opea-project/GenAIExamples.git
@@ -108,7 +109,7 @@ git clone https://github.com/opea-project/GenAIExamples.git

 ##### For proxy environments only

-```
+```bash
 export http_proxy="Your_HTTP_Proxy"
 export https_proxy="Your_HTTPs_Proxy"
 # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
@@ -117,14 +118,24 @@ export no_proxy="Your_No_Proxy"

 ##### For using open-source llms

-```
+Set up a [HuggingFace](https://huggingface.co/) account and generate a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token).
+
+Then set an environment variable with the token and another for a directory to download the models:
+
+```bash
 export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
-export HF_CACHE_DIR=<directory-where-llms-are-downloaded> #so that no need to redownload every time
+export HF_CACHE_DIR=<directory-where-llms-are-downloaded> #  to avoid redownloading models
 ```

-##### [Optional] OPANAI_API_KEY to use OpenAI models
+##### [Optional] OPENAI_API_KEY to use OpenAI models or Intel® AI for Enterprise Inference

-```
+To use OpenAI models, generate a key following these [instructions](https://platform.openai.com/api-keys).
+
+To use a remote server running Intel® AI for Enterprise Inference, contact the cloud service provider or owner of the on-prem machine for a key to access the desired model on the server.
+
+Then set the environment variable `OPENAI_API_KEY` with the key contents:
+
+```bash
 export OPENAI_API_KEY=<your-openai-key>
 ```

@@ -132,33 +143,33 @@ export OPENAI_API_KEY=<your-openai-key>

 ##### Gaudi

-```
+```bash
 source $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh
 ```

 ##### Xeon

-```
+```bash
 source $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
 ```

+For running
+
 ### 2. Launch the multi-agent system. </br>

-Two options are provided for the `llm_engine` of the agents: 1. open-source LLMs on Gaudi, 2. OpenAI models via API calls.
+We make it convenient to launch the whole system with docker compose, which includes microservices for LLM, agents, UI, retrieval tool, vector database, dataprep, and telemetry. There are 3 docker compose files, which make it easy for users to pick and choose. Users can choose a different retrieval tool other than the `DocIndexRetriever` example provided in our GenAIExamples repo. Users can choose not to launch the telemetry containers.

-#### Gaudi
+#### Launch on Gaudi

-On Gaudi, `meta-llama/Meta-Llama-3.1-70B-Instruct` will be served using vllm.
-By default, both the RAG agent and SQL agent will be launched to support the React Agent.  
-The React Agent requires the DocIndexRetriever's [`compose.yaml`](../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml) file, so two `compose.yaml` files need to be run with docker compose to start the multi-agent system.
-
-> **Note**: To enable the web search tool, skip this step and proceed to the "[Optional] Web Search Tool Support" section.
+On Gaudi, `meta-llama/Meta-Llama-3.3-70B-Instruct` will be served using vllm. The command below will launch the multi-agent system with the `DocIndexRetriever` as the retrieval tool for the Worker RAG agent.

 ```bash
 cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/
 docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml up -d
 ```

+> **Note**: To enable the web search tool, skip this step and proceed to the "[Optional] Web Search Tool Support" section.
+
 To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
 Gaudi example with Open Telemetry feature:

@@ -183,18 +194,39 @@ docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/

 </details>

-#### Xeon
+#### Launch on Xeon

-On Xeon, only OpenAI models are supported.
-By default, both the RAG Agent and SQL Agent will be launched to support the React Agent.  
-The React Agent requires the DocIndexRetriever's [`compose.yaml`](../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml) file, so two `compose yaml` files need to be run with docker compose to start the multi-agent system.
+On Xeon, OpenAI models and models deployed on a remote server are supported. Both methods require an API key.

 ```bash
 export OPENAI_API_KEY=<your-openai-key>
 cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
+```
+
+##### OpenAI Models
+
+The command below will launch the multi-agent system with the `DocIndexRetriever` as the retrieval tool for the Worker RAG agent.
+
+```bash
 docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose_openai.yaml up -d
 ```

+##### Models on Remote Server
+
+When models are deployed on a remote server with Intel® AI for Enterprise Inference, a base URL and an API key are required to access them. To run the Agent microservice on Xeon while using models deployed on a remote server, add `compose_remote.yaml` to the `docker compose` command and set additional environment variables.
+
+###### Notes
+
+- `OPENAI_API_KEY` is already set in a previous step.
+- `model` is used to overwrite the value set for this environment variable in `set_env.sh`.
+- `LLM_ENDPOINT_URL` is the base URL given from the owner of the on-prem machine or cloud service provider. It will follow this format: "https://<DNS>". Here is an example: "https://api.inference.example.com".
+
+```bash
+export model=<name-of-model-card>
+export LLM_ENDPOINT_URL=<http-endpoint-of-remote-server>
+docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose_openai.yaml -f compose_remote.yaml up -d
+```
+
 ### 3. Ingest Data into the vector database

 The `run_ingest_data.sh` script will use an example jsonl file to ingest example documents into a vector database. Other ways to ingest data and other types of documents supported can be found in the OPEA dataprep microservice located in the opea-project/GenAIComps repo.
@@ -206,11 +238,25 @@ bash run_ingest_data.sh

 > **Note**: This is a one-time operation.

-## Launch the UI
+## How to interact with the agent system with UI

-Open a web browser to http://localhost:5173 to access the UI. Ensure the environment variable `AGENT_URL` is set to http://$ip_address:9090/v1/chat/completions in [ui/svelte/.env](./ui/svelte/.env) or else the UI may not work properly.
+The UI microservice is launched in the previous step with the other microservices.
+To see the UI, open a web browser to `http://${ip_address}:5173` to access the UI. Note the `ip_address` here is the host IP of the UI microservice.

-The AgentQnA UI can be deployed locally or using Docker. To customize deployment, refer to the [AgentQnA UI Guide](./ui/svelte/README.md).
+1. Click on the arrow above `Get started`. Create an admin account with a name, email, and password.
+2. Add an OpenAI-compatible API endpoint. In the upper right, click on the circle button with the user's initial, go to `Admin Settings`->`Connections`. Under `Manage OpenAI API Connections`, click on the `+` to add a connection. Fill in these fields:
+
+- **URL**: `http://${ip_address}:9090/v1`, do not forget the `v1`
+- **Key**: any value
+- **Model IDs**: any name i.e. `opea-agent`, then press `+` to add it
+
+Click "Save".
+
+![opea-agent-setting](assets/img/opea-agent-setting.png)
+
+3. Test OPEA agent with UI. Return to `New Chat` and ensure the model (i.e. `opea-agent`) is selected near the upper left. Enter in any prompt to interact with the agent.
+
+![opea-agent-test](assets/img/opea-agent-test.png)

 ## [Optional] Deploy using Helm Charts

@@ -249,3 +295,8 @@ python $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --agent_role "supervisor" -
 ## How to register other tools with the AI agent

 The [tools](./tools) folder contains YAML and Python files for additional tools for the supervisor and worker agents. Refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/src/README.md) to add tools and customize the AI agents.
+
+## Monitor and Tracing
+
+Follow [OpenTelemetry OPEA Guide](https://opea-project.github.io/latest/tutorial/OpenTelemetry/OpenTelemetry_OPEA_Guide.html) to understand how to use OpenTelemetry tracing and metrics in OPEA.  
+For AgentQnA specific tracing and metrics monitoring, follow [OpenTelemetry on AgentQnA](https://opea-project.github.io/latest/tutorial/OpenTelemetry/deploy/AgentQnA.html) section.
--- a/AgentQnA/assets/img/opea-agent-setting.png
+++ b/AgentQnA/assets/img/opea-agent-setting.png
--- a/AgentQnA/assets/img/opea-agent-test.png
+++ b/AgentQnA/assets/img/opea-agent-test.png
--- a/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml
@@ -13,8 +13,8 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TGI_LLM_ENDPOINT: "http://${ip_address}:${TGI_SERVICE_PORT}"
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    shm_size: 32g
    devices:
      - /dev/kfd:/dev/kfd
@@ -29,7 +29,7 @@ services:
    command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192

  worker-rag-agent:
-    image: opea/agent:latest
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
    container_name: rag-agent-endpoint
    volumes:
      - "${TOOLSET_PATH}:/home/user/tools/"
@@ -42,7 +42,7 @@ services:
      with_memory: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: tgi
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -60,7 +60,7 @@ services:
      port: 9095

  worker-sql-agent:
-    image: opea/agent:latest
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
    container_name: sql-agent-endpoint
    volumes:
      - "${WORKDIR}/tests/Chinook_Sqlite.sqlite:/home/user/chinook-db/Chinook_Sqlite.sqlite:rw"
@@ -76,7 +76,7 @@ services:
      use_hints: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -89,7 +89,7 @@ services:
      port: 9096

  supervisor-react-agent:
-    image: opea/agent:latest
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
    container_name: react-agent-endpoint
    depends_on:
      - worker-rag-agent
@@ -104,7 +104,7 @@ services:
      with_memory: true
      recursion_limit: ${recursion_limit_supervisor}
      llm_engine: tgi
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
--- a/AgentQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
@@ -10,8 +10,8 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      WILM_USE_TRITON_FLASH_ATTENTION: 0
@@ -33,7 +33,7 @@ services:
    ipc: host

  worker-rag-agent:
-    image: opea/agent:latest
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
    container_name: rag-agent-endpoint
    volumes:
      - ${TOOLSET_PATH}:/home/user/tools/
@@ -46,7 +46,7 @@ services:
      with_memory: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -64,7 +64,7 @@ services:
      port: 9095

  worker-sql-agent:
-    image: opea/agent:latest
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
    container_name: sql-agent-endpoint
    volumes:
      - "${WORKDIR}/tests/Chinook_Sqlite.sqlite:/home/user/chinook-db/Chinook_Sqlite.sqlite:rw"
@@ -80,7 +80,7 @@ services:
      use_hints: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -93,7 +93,7 @@ services:
      port: 9096

  supervisor-react-agent:
-    image: opea/agent:latest
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
    container_name: react-agent-endpoint
    depends_on:
      - worker-rag-agent
@@ -108,7 +108,7 @@ services:
      with_memory: true
      recursion_limit: ${recursion_limit_supervisor}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
--- a/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh
@@ -19,7 +19,7 @@ export CRAG_SERVER_PORT="18114"

 export WORKPATH=$(dirname "$PWD")
 export WORKDIR=${WORKPATH}/../../../
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export HF_CACHE_DIR="./data"
 export MODEL_CACHE="./data"
@@ -39,7 +39,7 @@ export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
 export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
 export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
 export HF_CACHE_DIR=${HF_CACHE_DIR}
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export no_proxy=${no_proxy}
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
--- a/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_vllm_rocm.sh
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_vllm_rocm.sh
@@ -19,7 +19,7 @@ export CRAG_SERVER_PORT="18114"

 export WORKPATH=$(dirname "$PWD")
 export WORKDIR=${WORKPATH}/../../../
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export VLLM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export HF_CACHE_DIR="./data"
 export MODEL_CACHE="./data"
@@ -40,7 +40,7 @@ export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
 export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
 export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
 export HF_CACHE_DIR=${HF_CACHE_DIR}
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export no_proxy=${no_proxy}
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
--- a/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_vllm_rocm.sh
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_vllm_rocm.sh
@@ -20,8 +20,8 @@ export CRAG_SERVER_PORT="18114"

 export WORKPATH=$(dirname "$PWD")
 export WORKDIR=${WORKPATH}/../../../
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export VLLM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export HF_CACHE_DIR="./data"
 export MODEL_CACHE="./data"
@@ -42,7 +42,7 @@ export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
 export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
 export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
 export HF_CACHE_DIR=${HF_CACHE_DIR}
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export no_proxy=${no_proxy}
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
--- a/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
@@ -103,10 +103,8 @@ services:
  agent-ui:
    image: opea/agent-ui
    container_name: agent-ui
-    volumes:
-      - ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env:/home/user/svelte/.env # test db
    ports:
-      - "5173:5173"
+      - "5173:8080"
    ipc: host

 networks:
--- a/AgentQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  worker-rag-agent:
+    environment:
+      llm_endpoint_url: ${LLM_ENDPOINT_URL}
+      api_key: ${OPENAI_API_KEY}
+
+  worker-sql-agent:
+    environment:
+      llm_endpoint_url: ${LLM_ENDPOINT_URL}
+      api_key: ${OPENAI_API_KEY}
+
+  supervisor-react-agent:
+    environment:
+      llm_endpoint_url: ${LLM_ENDPOINT_URL}
+      api_key: ${OPENAI_API_KEY}
--- a/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -33,7 +33,7 @@ fi
 # retriever
 export host_ip=$(hostname -I | awk '{print $1}')
 export HF_CACHE_DIR=${HF_CACHE_DIR}
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export no_proxy=${no_proxy}
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -3,7 +3,7 @@

 services:
  worker-rag-agent:
-    image: opea/agent:latest
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
    container_name: rag-agent-endpoint
    volumes:
      - ${TOOLSET_PATH}:/home/user/tools/
@@ -16,7 +16,7 @@ services:
      with_memory: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -34,7 +34,7 @@ services:
      port: 9095

  worker-sql-agent:
-    image: opea/agent:latest
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
    container_name: sql-agent-endpoint
    volumes:
      - ${WORKDIR}/GenAIExamples/AgentQnA/tests:/home/user/chinook-db # test db
@@ -50,7 +50,7 @@ services:
      use_hints: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -63,7 +63,7 @@ services:
      port: 9096

  supervisor-react-agent:
-    image: opea/agent:latest
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
    container_name: react-agent-endpoint
    depends_on:
      - worker-rag-agent
@@ -79,7 +79,7 @@ services:
      with_memory: true
      recursion_limit: ${recursion_limit_supervisor}
      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
@@ -104,14 +104,12 @@ services:
      - "8080:8000"
    ipc: host
  agent-ui:
-    image: opea/agent-ui
+    image: ${REGISTRY:-opea}/agent-ui:${TAG:-latest}
    container_name: agent-ui
-    volumes:
-      - ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env:/home/user/svelte/.env
    environment:
      host_ip: ${host_ip}
    ports:
-      - "5173:5173"
+      - "5173:8080"
    ipc: host
  vllm-service:
    image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
@@ -119,12 +117,12 @@ services:
    ports:
      - "8086:8000"
    volumes:
-      - "./data:/data"
+      - "${MODEL_CACHE:-./data}:/data"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      LLM_MODEL_ID: ${LLM_MODEL_ID}
@@ -140,4 +138,4 @@ services:
    cap_add:
      - SYS_NICE
    ipc: host
-    command: --model $LLM_MODEL_ID --tensor-parallel-size 4 --host 0.0.0.0 --port 8000 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 16384
+    command: --model $LLM_MODEL_ID --tensor-parallel-size 4 --host 0.0.0.0 --port 8000 --block-size 128 --max-num-seqs 256 --max-seq-len-to-capture 16384
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -16,8 +16,8 @@ export ip_address=$(hostname -I | awk '{print $1}')
 # LLM related environment variables
 export HF_CACHE_DIR=${HF_CACHE_DIR}
 ls $HF_CACHE_DIR
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HF_TOKEN=${HF_TOKEN}
 export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
 export NUM_SHARDS=4
 export LLM_ENDPOINT_URL="http://${ip_address}:8086"
@@ -42,7 +42,7 @@ if [ ! -f $WORKDIR/GenAIExamples/AgentQnA/tests/Chinook_Sqlite.sqlite ]; then
 fi

 # configure agent ui
-echo "AGENT_URL = 'http://$ip_address:9090/v1/chat/completions'" | tee ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env
+# echo "AGENT_URL = 'http://$ip_address:9090/v1/chat/completions'" | tee ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env

 # retriever
 export host_ip=$(hostname -I | awk '{print $1}')
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml
@@ -13,7 +13,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/AgentQnA/docker_image_build/build.yaml
+++ b/AgentQnA/docker_image_build/build.yaml
@@ -17,12 +17,15 @@ services:
      dockerfile: ./docker/Dockerfile
    extends: agent
    image: ${REGISTRY:-opea}/agent-ui:${TAG:-latest}
+  vllm-gaudi:
+    build:
+      context: vllm-fork
+      dockerfile: Dockerfile.hpu
+    extends: agent
+    image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
  vllm-rocm:
    build:
-      args:
-        http_proxy: ${http_proxy}
-        https_proxy: ${https_proxy}
-        no_proxy: ${no_proxy}
      context: GenAIComps
      dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu
+    extends: agent
    image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
--- a/AgentQnA/kubernetes/helm/cpu-values.yaml
+++ b/AgentQnA/kubernetes/helm/cpu-values.yaml
@@ -0,0 +1,22 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+tgi:
+  enabled: false
+vllm:
+  enabled: true
+  LLM_MODEL_ID: "meta-llama/Meta-Llama-3-8B-Instruct"
+  extraCmdArgs: ["--max-seq-len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
+
+supervisor:
+  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Meta-Llama-3-8B-Instruct"
+ragagent:
+  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Meta-Llama-3-8B-Instruct"
+sqlagent:
+  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Meta-Llama-3-8B-Instruct"
--- a/AgentQnA/kubernetes/helm/gaudi-values.yaml
+++ b/AgentQnA/kubernetes/helm/gaudi-values.yaml
@@ -4,13 +4,32 @@
 # Accelerate inferencing in heaviest components to improve performance
 # by overriding their subchart values

+tgi:
+  enabled: false
 vllm:
  enabled: true
+  accelDevice: "gaudi"
  image:
    repository: opea/vllm-gaudi
+  resources:
+    limits:
+      habana.ai/gaudi: 4
+  LLM_MODEL_ID: "meta-llama/Llama-3.3-70B-Instruct"
+  OMPI_MCA_btl_vader_single_copy_mechanism: none
+  PT_HPU_ENABLE_LAZY_COLLECTIVES: true
+  VLLM_SKIP_WARMUP: true
+  shmSize: 16Gi
+  extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq-len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
+
 supervisor:
  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Llama-3.3-70B-Instruct"
 ragagent:
  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Llama-3.3-70B-Instruct"
 sqlagent:
  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Llama-3.3-70B-Instruct"
--- a/AgentQnA/retrieval_tool/launch_retrieval_tool.sh
+++ b/AgentQnA/retrieval_tool/launch_retrieval_tool.sh
@@ -3,7 +3,7 @@

 host_ip=$(hostname -I | awk '{print $1}')
 export HF_CACHE_DIR=${HF_CACHE_DIR}
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export no_proxy=${no_proxy}
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
--- a/AgentQnA/retrieval_tool/run_ingest_data.sh
+++ b/AgentQnA/retrieval_tool/run_ingest_data.sh
@@ -1,7 +1,22 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

+host_ip=$(hostname -I | awk '{print $1}')
+port=6007
 FILEDIR=${WORKDIR}/GenAIExamples/AgentQnA/example_data/
 FILENAME=test_docs_music.jsonl

-python3 index_data.py --filedir ${FILEDIR} --filename ${FILENAME} --host_ip $host_ip
+# AgentQnA ingestion script requires following packages
+packages=("requests" "tqdm")
+
+# Check if packages are installed
+for package in "${packages[@]}"; do
+  if pip freeze | grep -q "$package="; then
+    echo "$package is installed"
+  else
+    echo "$package is not installed"
+    pip install --no-cache-dir "$package"
+  fi
+done
+
+python3 index_data.py --filedir ${FILEDIR} --filename ${FILENAME} --host_ip $host_ip --port $port
--- a/AgentQnA/tests/_test_compose_openai_on_xeon.sh
+++ b/AgentQnA/tests/_test_compose_openai_on_xeon.sh
@@ -31,7 +31,7 @@ function stop_retrieval_tool() {
 }

 echo "=================== #1 Building docker images===================="
-bash step1_build_images.sh
+bash step1_build_images.sh xeon
 echo "=================== #1 Building docker images completed===================="

 echo "=================== #2 Start retrieval tool===================="
--- a/AgentQnA/tests/step1_build_images.sh
+++ b/AgentQnA/tests/step1_build_images.sh
@@ -15,42 +15,52 @@ function get_genai_comps() {
    fi
 }

-
 function build_docker_images_for_retrieval_tool(){
    cd $WORKDIR/GenAIExamples/DocIndexRetriever/docker_image_build/
    get_genai_comps
    echo "Build all the images with --no-cache..."
-    service_list="doc-index-retriever dataprep embedding retriever reranking"
-    docker compose -f build.yaml build ${service_list} --no-cache
-    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
-
+    docker compose -f build.yaml build --no-cache
    docker images && sleep 1s
 }

-function build_agent_docker_image() {
+function build_agent_docker_image_xeon() {
    cd $WORKDIR/GenAIExamples/AgentQnA/docker_image_build/
    get_genai_comps
+
    echo "Build agent image with --no-cache..."
-    docker compose -f build.yaml build --no-cache
+    service_list="agent agent-ui"
+    docker compose -f build.yaml build ${service_list} --no-cache
 }

-function build_vllm_docker_image() {
-    echo "Building the vllm docker image"
-    cd $WORKPATH
-    echo $WORKPATH
-    if [ ! -d "./vllm-fork" ]; then
-        git clone https://github.com/HabanaAI/vllm-fork.git
-    fi
-    cd ./vllm-fork
-    VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
-    git checkout ${VLLM_VER} &> /dev/null
-    docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:ci --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
-    if [ $? -ne 0 ]; then
-        echo "opea/vllm-gaudi:ci failed"
-        exit 1
-    else
-        echo "opea/vllm-gaudi:ci successful"
-    fi
+function build_agent_docker_image_gaudi_vllm() {
+    cd $WORKDIR/GenAIExamples/AgentQnA/docker_image_build/
+    get_genai_comps
+
+    git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
+    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
+
+    echo "Build agent image with --no-cache..."
+    service_list="agent agent-ui vllm-gaudi"
+    docker compose -f build.yaml build ${service_list} --no-cache
+}
+
+function build_agent_docker_image_rocm() {
+    cd $WORKDIR/GenAIExamples/AgentQnA/docker_image_build/
+    get_genai_comps
+
+    echo "Build agent image with --no-cache..."
+    service_list="agent agent-ui"
+    docker compose -f build.yaml build ${service_list} --no-cache
+}
+
+function build_agent_docker_image_rocm_vllm() {
+    cd $WORKDIR/GenAIExamples/AgentQnA/docker_image_build/
+    get_genai_comps
+
+    echo "Build agent image with --no-cache..."
+    service_list="agent agent-ui vllm-rocm"
+    docker compose -f build.yaml build ${service_list} --no-cache
 }


@@ -59,15 +69,32 @@ function main() {
    build_docker_images_for_retrieval_tool
    echo "==================== Build docker images for retrieval tool completed ===================="

-    echo "==================== Build agent docker image ===================="
-    build_agent_docker_image
-    echo "==================== Build agent docker image completed ===================="
+    sleep 3s

-    echo "==================== Build vllm docker image ===================="
-    build_vllm_docker_image
-    echo "==================== Build vllm docker image completed ===================="
+    case $1 in
+        "rocm")
+            echo "==================== Build agent docker image for ROCm ===================="
+            build_agent_docker_image_rocm
+            ;;
+        "rocm_vllm")
+            echo "==================== Build agent docker image for ROCm VLLM ===================="
+            build_agent_docker_image_rocm_vllm
+            ;;
+        "gaudi_vllm")
+            echo "==================== Build agent docker image for Gaudi ===================="
+            build_agent_docker_image_gaudi_vllm
+            ;;
+        "xeon")
+            echo "==================== Build agent docker image for Xeon ===================="
+            build_agent_docker_image_xeon
+            ;;
+        *)
+            echo "Invalid argument"
+            exit 1
+            ;;
+    esac

    docker image ls | grep vllm
 }

-main
+main $1
--- a/AgentQnA/tests/step1_build_images_rocm_vllm.sh
+++ b/AgentQnA/tests/step1_build_images_rocm_vllm.sh
@@ -1,64 +0,0 @@
-#!/bin/bash
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-set -e
-export WORKPATH=$(dirname "$PWD")
-export WORKDIR=${WORKPATH}/../../
-echo "WORKDIR=${WORKDIR}"
-export ip_address=$(hostname -I | awk '{print $1}')
-
-
-function get_genai_comps() {
-    if [ ! -d "GenAIComps" ] ; then
-        git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git
-    fi
-}
-
-
-function build_docker_images_for_retrieval_tool(){
-    cd $WORKPATH/../DocIndexRetriever/docker_image_build/
-    get_genai_comps
-    echo "Build all the images with --no-cache..."
-    service_list="doc-index-retriever dataprep embedding retriever reranking"
-    docker compose -f build.yaml build ${service_list} --no-cache
-    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-
-    docker images && sleep 3s
-}
-
-function build_agent_docker_image() {
-    cd $WORKPATH/docker_image_build/
-    get_genai_comps
-    echo "Build agent image with --no-cache..."
-    docker compose -f build.yaml build --no-cache
-
-    docker images && sleep 3s
-}
-
-#function build_vllm_docker_image() {
-#    echo "Building the vllm docker image"
-#    cd $WORKPATH/
-#    docker build --no-cache -t opea/llm-vllm-rocm:ci -f Dockerfile-vllm-rocm .
-#
-#    docker images && sleep 3s
-#}
-
-
-function main() {
-    echo "==================== Build docker images for retrieval tool ===================="
-    build_docker_images_for_retrieval_tool
-    echo "==================== Build docker images for retrieval tool completed ===================="
-
-    echo "==================== Build agent docker image ===================="
-    build_agent_docker_image
-    echo "==================== Build agent docker image completed ===================="
-
-#    echo "==================== Build vllm docker image ===================="
-#    build_vllm_docker_image
-#    echo "==================== Build vllm docker image completed ===================="
-
-    docker image ls | grep vllm
-}
-
-main
--- a/AgentQnA/tests/step2_start_retrieval_tool_rocm_vllm.sh
+++ b/AgentQnA/tests/step2_start_retrieval_tool_rocm_vllm.sh
@@ -20,7 +20,7 @@ function start_retrieval_tool() {
    cd $WORKPATH/../DocIndexRetriever/docker_compose/intel/cpu/xeon
    host_ip=$(hostname -I | awk '{print $1}')
    export HF_CACHE_DIR=${HF_CACHE_DIR}
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
    export no_proxy=${no_proxy}
    export http_proxy=${http_proxy}
    export https_proxy=${https_proxy}
--- a/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh
+++ b/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh
@@ -8,9 +8,11 @@ WORKPATH=$(dirname "$PWD")
 export WORKDIR=$WORKPATH/../../
 echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
+export host_ip=$ip_address
+echo "ip_address=${ip_address}"
 export TOOLSET_PATH=$WORKPATH/tools/
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+HF_TOKEN=${HF_TOKEN}
 model="meta-llama/Llama-3.3-70B-Instruct" #"meta-llama/Meta-Llama-3.1-70B-Instruct"

 export HF_CACHE_DIR=${model_cache:-"/data2/huggingface"}
@@ -24,12 +26,12 @@ ls $HF_CACHE_DIR
 vllm_port=8086
 vllm_volume=${HF_CACHE_DIR}

-function start_tgi(){
-    echo "Starting tgi-gaudi server"
+
+function start_agent_service() {
+    echo "Starting agent service"
    cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
    source set_env.sh
-    docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml tgi_gaudi.yaml -f compose.telemetry.yaml up -d
-
+    docker compose -f compose.yaml up -d
 }

 function start_all_services() {
@@ -69,7 +71,6 @@ function download_chinook_data(){
    cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite $WORKDIR/GenAIExamples/AgentQnA/tests/
 }

-
 function validate() {
    local CONTENT="$1"
    local EXPECTED_RESULT="$2"
@@ -138,24 +139,6 @@ function remove_chinook_data(){
    echo "Chinook data removed!"
 }

-export host_ip=$ip_address
-echo "ip_address=${ip_address}"
-
-
-function validate() {
-    local CONTENT="$1"
-    local EXPECTED_RESULT="$2"
-    local SERVICE_NAME="$3"
-
-    if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
-        echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT"
-        echo 0
-    else
-        echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
-        echo 1
-    fi
-}
-
 function ingest_data_and_validate() {
    echo "Ingesting data"
    cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool/
--- a/AgentQnA/tests/test_compose_on_gaudi.sh
+++ b/AgentQnA/tests/test_compose_on_gaudi.sh
@@ -7,34 +7,44 @@ WORKPATH=$(dirname "$PWD")
 export WORKDIR=$WORKPATH/../../
 echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
 export no_proxy="$no_proxy,rag-agent-endpoint,sql-agent-endpoint,react-agent-endpoint,agent-ui,vllm-gaudi-server,jaeger,grafana,prometheus,127.0.0.1,localhost,0.0.0.0,$ip_address"
+IMAGE_REPO=${IMAGE_REPO:-"opea"}
+IMAGE_TAG=${IMAGE_TAG:-"latest"}
+echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
+echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
+export REGISTRY=${IMAGE_REPO}
+export TAG=${IMAGE_TAG}
+export MODEL_CACHE=${model_cache:-"./data"}


-function get_genai_comps() {
-    if [ ! -d "GenAIComps" ] ; then
-        git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git
-    fi
-}
-
-
-function build_agent_docker_image() {
-    cd $WORKDIR/GenAIExamples/AgentQnA/docker_image_build/
-    get_genai_comps
-    echo "Build agent image with --no-cache..."
-    docker compose -f build.yaml build --no-cache
-}
-
 function stop_crag() {
    cid=$(docker ps -aq --filter "name=kdd-cup-24-crag-service")
    echo "Stopping container kdd-cup-24-crag-service with cid $cid"
    if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
 }

-function stop_agent_docker() {
+function stop_agent_containers() {
    cd $WORKPATH/docker_compose/intel/hpu/gaudi/
-    docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml down
+    container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
+    for container_name in $container_list; do
+        cid=$(docker ps -aq --filter "name=$container_name")
+        echo "Stopping container $container_name"
+        if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
+    done
+}
+
+function stop_telemetry_containers(){
+    cd $WORKPATH/docker_compose/intel/hpu/gaudi/
+    container_list=$(cat compose.telemetry.yaml | grep container_name | cut -d':' -f2)
+    for container_name in $container_list; do
+        cid=$(docker ps -aq --filter "name=$container_name")
+        echo "Stopping container $container_name"
+        if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
+    done
+    container_list=$(cat compose.telemetry.yaml | grep container_name | cut -d':' -f2)
+
 }

 function stop_llm(){
@@ -68,25 +78,31 @@ function stop_retrieval_tool() {
    done
 }
 echo "workpath: $WORKPATH"
-echo "=================== Stop containers ===================="
+echo "::group::=================== Stop containers ===================="
+stop_llm
 stop_crag
-stop_agent_docker
+stop_agent_containers
+stop_retrieval_tool
+stop_telemetry_containers
+echo "::endgroup::"

 cd $WORKPATH/tests

-echo "=================== #1 Building docker images===================="
-build_agent_docker_image
-echo "=================== #1 Building docker images completed===================="
+echo "::group::=================== Building docker images===================="
+bash step1_build_images.sh gaudi_vllm > docker_image_build.log
+echo "::endgroup::"

-echo "=================== #4 Start agent, API server, retrieval, and ingest data===================="
-bash $WORKPATH/tests/step4_launch_and_validate_agent_gaudi.sh
-echo "=================== #4 Agent, retrieval test passed ===================="
+echo "::group::=================== Start agent, API server, retrieval, and ingest data===================="
+bash step4_launch_and_validate_agent_gaudi.sh
+echo "::endgroup::"

-echo "=================== #5 Stop agent and API server===================="
+echo "::group::=================== Stop agent and API server===================="
+stop_llm
 stop_crag
-stop_agent_docker
-echo "=================== #5 Agent and API server stopped===================="
-
+stop_agent_containers
+stop_retrieval_tool
+stop_telemetry_containers
 echo y | docker system prune
+echo "::endgroup::"

 echo "ALL DONE!!"
--- a/AgentQnA/tests/test_compose_on_rocm.sh
+++ b/AgentQnA/tests/test_compose_on_rocm.sh
@@ -9,9 +9,15 @@ ls $WORKPATH
 export WORKDIR=$WORKPATH/../../
 echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export TOOLSET_PATH=$WORKPATH/tools/
-export MODEL_CACHE="./data"
+IMAGE_REPO=${IMAGE_REPO:-"opea"}
+IMAGE_TAG=${IMAGE_TAG:-"latest"}
+echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
+echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
+export REGISTRY=${IMAGE_REPO}
+export TAG=${IMAGE_TAG}
+export MODEL_CACHE=${model_cache:-"./data"}

 function stop_crag() {
    cid=$(docker ps -aq --filter "name=kdd-cup-24-crag-service")
@@ -37,34 +43,35 @@ function stop_retrieval_tool() {
    done
 }
 echo "workpath: $WORKPATH"
-echo "=================== Stop containers ===================="
+echo "::group::=================== Stop containers ===================="
 stop_crag
 stop_agent_docker
 stop_retrieval_tool
+echo "::endgroup::=================== Stop containers completed ===================="

 cd $WORKPATH/tests

-echo "=================== #1 Building docker images===================="
-bash step1_build_images.sh
-echo "=================== #1 Building docker images completed===================="
+echo "::group::=================== #1 Building docker images===================="
+bash step1_build_images.sh rocm > docker_image_build.log
+echo "::endgroup::=================== #1 Building docker images completed===================="

-echo "=================== #2 Start retrieval tool===================="
+echo "::group::=================== #2 Start retrieval tool===================="
 bash step2_start_retrieval_tool.sh
-echo "=================== #2 Retrieval tool started===================="
+echo "::endgroup::=================== #2 Retrieval tool started===================="

-echo "=================== #3 Ingest data and validate retrieval===================="
+echo "::group::=================== #3 Ingest data and validate retrieval===================="
 bash step3_ingest_data_and_validate_retrieval.sh
-echo "=================== #3 Data ingestion and validation completed===================="
+echo "::endgroup::=================== #3 Data ingestion and validation completed===================="

-echo "=================== #4 Start agent and API server===================="
+echo "::group::=================== #4 Start agent and API server===================="
 bash step4a_launch_and_validate_agent_tgi_on_rocm.sh
-echo "=================== #4 Agent test passed ===================="
+echo "::endgroup::=================== #4 Agent test passed ===================="

-echo "=================== #5 Stop agent and API server===================="
+echo "::group::=================== #5 Stop agent and API server===================="
 stop_crag
 stop_agent_docker
 stop_retrieval_tool
-echo "=================== #5 Agent and API server stopped===================="
+echo "::endgroup::=================== #5 Agent and API server stopped===================="

 echo y | docker system prune

--- a/AgentQnA/tests/test_compose_vllm_on_rocm.sh
+++ b/AgentQnA/tests/test_compose_vllm_on_rocm.sh
@@ -5,13 +5,18 @@
 set -e

 WORKPATH=$(dirname "$PWD")
-export LOG_PATH=${WORKPATH}
 export WORKDIR=${WORKPATH}/../../
 echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export TOOLSET_PATH=$WORKPATH/tools/
-export MODEL_CACHE="./data"
+IMAGE_REPO=${IMAGE_REPO:-"opea"}
+IMAGE_TAG=${IMAGE_TAG:-"latest"}
+echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
+echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
+export REGISTRY=${IMAGE_REPO}
+export TAG=${IMAGE_TAG}
+export MODEL_CACHE=${model_cache:-"./data"}

 function stop_crag() {
    cid=$(docker ps -aq --filter "name=kdd-cup-24-crag-service")
@@ -32,34 +37,35 @@ function stop_retrieval_tool() {
 }

 echo "workpath: $WORKPATH"
-echo "=================== Stop containers ===================="
+echo "::group::=================== Stop containers ===================="
 stop_crag
 stop_agent_docker
 stop_retrieval_tool
+echo "::endgroup::"

 cd $WORKPATH/tests

-echo "=================== #1 Building docker images===================="
-bash step1_build_images_rocm_vllm.sh
-echo "=================== #1 Building docker images completed===================="
+echo "::group::=================== #1 Building docker images===================="
+bash step1_build_images.sh rocm_vllm > docker_image_build.log
+echo "::endgroup::=================== #1 Building docker images completed===================="

-echo "=================== #2 Start retrieval tool===================="
+echo "::group::=================== #2 Start retrieval tool===================="
 bash step2_start_retrieval_tool_rocm_vllm.sh
-echo "=================== #2 Retrieval tool started===================="
+echo "::endgroup::=================== #2 Retrieval tool started===================="

-echo "=================== #3 Ingest data and validate retrieval===================="
+echo "::group::=================== #3 Ingest data and validate retrieval===================="
 bash step3_ingest_data_and_validate_retrieval_rocm_vllm.sh
-echo "=================== #3 Data ingestion and validation completed===================="
+echo "::endgroup::=================== #3 Data ingestion and validation completed===================="

-echo "=================== #4 Start agent and API server===================="
+echo "::group::=================== #4 Start agent and API server===================="
 bash step4_launch_and_validate_agent_rocm_vllm.sh
-echo "=================== #4 Agent test passed ===================="
+echo "::endgroup::=================== #4 Agent test passed ===================="

-echo "=================== #5 Stop agent and API server===================="
+echo "::group::=================== #5 Stop agent and API server===================="
 stop_crag
 stop_agent_docker
 stop_retrieval_tool
-echo "=================== #5 Agent and API server stopped===================="
+echo "::endgroup::=================== #5 Agent and API server stopped===================="

 echo y | docker system prune

--- a/AgentQnA/tools/worker_agent_tools.py
+++ b/AgentQnA/tools/worker_agent_tools.py
@@ -12,7 +12,7 @@ def search_knowledge_base(query: str) -> str:
    print(url)
    proxies = {"http": ""}
    payload = {
-        "text": query,
+        "messages": query,
    }
    response = requests.post(url, json=payload, proxies=proxies)
    print(response)
--- a/AgentQnA/ui/docker/Dockerfile
+++ b/AgentQnA/ui/docker/Dockerfile
@@ -1,26 +1,203 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-# Use node 20.11.1 as the base image
-FROM node:20.11.1
+# syntax=docker/dockerfile:1
+# Initialize device type args
+# use build args in the docker build command with --build-arg="BUILDARG=true"
+ARG USE_CUDA=false
+ARG USE_OLLAMA=false
+# Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default)
+ARG USE_CUDA_VER=cu121
+# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
+# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard
+# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
+# IMPORTANT: If you change the embedding model (sentence-transformers/all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
+ARG USE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
+ARG USE_RERANKING_MODEL=""

-# Update package manager and install Git
-RUN apt-get update -y && apt-get install -y git
+# Tiktoken encoding name; models to use can be found at https://huggingface.co/models?library=tiktoken
+ARG USE_TIKTOKEN_ENCODING_NAME="cl100k_base"

-# Copy the front-end code repository
-COPY svelte /home/user/svelte
+ARG BUILD_HASH=dev-build
+# Override at your own risk - non-root configurations are untested
+ARG UID=0
+ARG GID=0

-# Set the working directory
-WORKDIR /home/user/svelte
+######## WebUI frontend ########
+FROM --platform=$BUILDPLATFORM node:22-alpine3.20 AS build
+ARG BUILD_HASH

-# Install front-end dependencies
-RUN npm install
+WORKDIR /app

-# Build the front-end application
+COPY open_webui_patches /app/patches
+ARG WEBUI_VERSION=v0.5.20
+RUN apk add --no-cache git
+
+# Clone code and use patch
+RUN git config --global user.name "opea" && \
+    git config --global user.email "" && \
+    git clone https://github.com/open-webui/open-webui.git
+
+WORKDIR /app/open-webui
+
+RUN git checkout ${WEBUI_VERSION} && git am /app/patches/*.patch
+
+WORKDIR /app
+
+RUN mv open-webui/* . && rm -fr open-webui && ls -lrth /app/backend/
+
+RUN npm install onnxruntime-node --onnxruntime-node-install-cuda=skip
+RUN apk update && \
+    apk add --no-cache wget && \
+    wget https://github.com/microsoft/onnxruntime/releases/download/v1.20.1/onnxruntime-linux-x64-gpu-1.20.1.tgz
+
+ENV APP_BUILD_HASH=${BUILD_HASH}
 RUN npm run build

-# Expose the port of the front-end application
-EXPOSE 5173
+######## WebUI backend ########
+FROM python:3.11-slim-bookworm AS base

-# Run the front-end application in preview mode
-CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
+# Use args
+ARG USE_CUDA
+ARG USE_OLLAMA
+ARG USE_CUDA_VER
+ARG USE_EMBEDDING_MODEL
+ARG USE_RERANKING_MODEL
+ARG UID
+ARG GID
+
+## Basis ##
+ENV ENV=prod \
+    PORT=8080 \
+    # pass build args to the build
+    USE_OLLAMA_DOCKER=${USE_OLLAMA} \
+    USE_CUDA_DOCKER=${USE_CUDA} \
+    USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \
+    USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL} \
+    USE_RERANKING_MODEL_DOCKER=${USE_RERANKING_MODEL}
+
+## Basis URL Config ##
+ENV OLLAMA_BASE_URL="/ollama" \
+    OPENAI_API_BASE_URL=""
+
+## API Key and Security Config ##
+ENV OPENAI_API_KEY="" \
+    WEBUI_SECRET_KEY="" \
+    SCARF_NO_ANALYTICS=true \
+    DO_NOT_TRACK=true \
+    ANONYMIZED_TELEMETRY=false
+
+#### Other models #########################################################
+## whisper TTS model settings ##
+ENV WHISPER_MODEL="base" \
+    WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
+
+## RAG Embedding model settings ##
+ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \
+    RAG_RERANKING_MODEL="$USE_RERANKING_MODEL_DOCKER" \
+    SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models"
+
+## Tiktoken model settings ##
+ENV TIKTOKEN_ENCODING_NAME="cl100k_base" \
+    TIKTOKEN_CACHE_DIR="/app/backend/data/cache/tiktoken"
+
+## Hugging Face download cache ##
+ENV HF_HOME="/app/backend/data/cache/embedding/models"
+
+## Torch Extensions ##
+# ENV TORCH_EXTENSIONS_DIR="/.cache/torch_extensions"
+
+#### Other models ##########################################################
+
+COPY --from=build /app/backend /app/backend
+
+WORKDIR /app/backend
+
+
+ENV HOME=/root
+# Create user and group if not root
+RUN if [ $UID -ne 0 ]; then \
+    if [ $GID -ne 0 ]; then \
+    addgroup --gid $GID app; \
+    fi; \
+    adduser --uid $UID --gid $GID --home $HOME --disabled-password --no-create-home app; \
+    fi
+
+RUN mkdir -p $HOME/.cache/chroma
+RUN printf 00000000-0000-0000-0000-000000000000 > $HOME/.cache/chroma/telemetry_user_id
+
+# Make sure the user has access to the app and root directory
+RUN chown -R $UID:$GID /app $HOME
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN if [ "$USE_OLLAMA" = "true" ]; then \
+    apt-get update && \
+    # Install pandoc and netcat
+    apt-get install -y --no-install-recommends git build-essential pandoc netcat-openbsd curl && \
+    apt-get install -y --no-install-recommends gcc python3-dev && \
+    # for RAG OCR
+    apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
+    # install helper tools
+    apt-get install -y --no-install-recommends curl jq && \
+    # install ollama
+    curl -fsSL https://ollama.com/install.sh | sh && \
+    # cleanup
+    rm -rf /var/lib/apt/lists/*; \
+    else \
+    apt-get update && \
+    # Install pandoc, netcat and gcc
+    apt-get install -y --no-install-recommends git build-essential pandoc gcc netcat-openbsd curl jq && \
+    apt-get install -y --no-install-recommends gcc python3-dev && \
+    # for RAG OCR
+    apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
+    # cleanup
+    rm -rf /var/lib/apt/lists/*; \
+    fi
+
+# install python dependencies
+# COPY --chown=$UID:$GID ./backend/requirements.txt ./requirements.txt
+# RUN cp /app/backend/requirements.txt ./requirements.txt
+
+RUN pip3 install --no-cache-dir uv && \
+    if [ "$USE_CUDA" = "true" ]; then \
+    # If you use CUDA the whisper and embedding model will be downloaded on first use
+    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
+    uv pip install --system -r requirements.txt --no-cache-dir && \
+    python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \
+    python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \
+    python -c "import os; import tiktoken; tiktoken.get_encoding(os.environ['TIKTOKEN_ENCODING_NAME'])"; \
+    else \
+    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
+    uv pip install --system -r requirements.txt --no-cache-dir && \
+    python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \
+    python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \
+    python -c "import os; import tiktoken; tiktoken.get_encoding(os.environ['TIKTOKEN_ENCODING_NAME'])"; \
+    fi; \
+    chown -R $UID:$GID /app/backend/data/
+
+
+
+# copy embedding weight from build
+# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
+# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
+
+# copy built frontend files
+COPY --chown=$UID:$GID --from=build /app/build /app/build
+COPY --chown=$UID:$GID --from=build /app/CHANGELOG.md /app/CHANGELOG.md
+COPY --chown=$UID:$GID --from=build /app/package.json /app/package.json
+
+# copy backend files
+# COPY --chown=$UID:$GID ./backend .
+
+EXPOSE 8080
+
+HEALTHCHECK CMD curl --silent --fail http://localhost:${PORT:-8080}/health | jq -ne 'input.status == true' || exit 1
+
+USER $UID:$GID
+
+ARG BUILD_HASH
+ENV WEBUI_BUILD_VERSION=${BUILD_HASH}
+ENV DOCKER=true
+
+CMD [ "bash", "start.sh"]
--- a/AgentQnA/ui/open_webui_patches/0001-compatible-opea-agent-tool-content.patch
+++ b/AgentQnA/ui/open_webui_patches/0001-compatible-opea-agent-tool-content.patch
@@ -1,17 +1,26 @@
-From 799dcc304b3aecf2e2969df47c8dcac16d2267b0 Mon Sep 17 00:00:00 2001
+From d90ba418f866bc11848d7d6507aabc6b5e8cc3e2 Mon Sep 17 00:00:00 2001
 From: lkk12014402 <kaokao.lv@intel.com>
-Date: Fri, 4 Apr 2025 07:40:30 +0000
-Subject: [PATCH] deal opea agent tool content.
+Date: Mon, 7 Apr 2025 07:22:53 +0000
+Subject: [PATCH] compatible opea agent tool content

 ---
- backend/open_webui/utils/middleware.py | 54 ++++++++++++++++++++++++++
- 1 file changed, 54 insertions(+)
+ backend/open_webui/utils/middleware.py | 56 ++++++++++++++++++++++++++
+ 1 file changed, 56 insertions(+)

 diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py
-index 289d887df..afa0edf1e 100644
+index 289d887df..fddbe8ee1 100644
 --- a/backend/open_webui/utils/middleware.py
 +++ b/backend/open_webui/utils/middleware.py
-@@ -1486,6 +1486,60 @@ async def process_chat_response(
+@@ -1465,6 +1465,8 @@ async def process_chat_response(
+                 async def stream_body_handler(response):
+                     nonlocal content
+                     nonlocal content_blocks
+                    nonlocal events
+                    sources = []
+ 
+                     response_tool_calls = []
+ 
+@@ -1486,6 +1488,60 @@ async def process_chat_response(
                         try:
                             data = json.loads(data)
 
--- a/AgentQnA/ui/open_webui_patches/0002-update-agent-icloud-upload-feature.patch
+++ b/AgentQnA/ui/open_webui_patches/0002-update-agent-icloud-upload-feature.patch
@@ -0,0 +1,531 @@
+From 8ad31e50644eab3c9e698d7828b1857919887841 Mon Sep 17 00:00:00 2001
+From: lkk12014402 <kaokao.lv@intel.com>
+Date: Tue, 8 Apr 2025 03:38:09 +0000
+Subject: [PATCH 2/2] update agent icloud upload feature
+
+---
+ src/lib/apis/knowledge/index.ts               |  60 +++++++
+ .../admin/Settings/Connections.svelte         |  50 +++++-
+ .../components/icons/UploadCloudIcon.svelte   |  18 ++
+ src/lib/components/workspace/Knowledge.svelte |  57 +++++-
+ .../KnowledgeBase/AddIcloudContentMenu.svelte | 164 ++++++++++++++++++
+ .../KnowledgeBase/IcloudFiles.svelte          |  37 ++++
+ src/lib/i18n/locales/zh-CN/translation.json   |  15 +-
+ 7 files changed, 396 insertions(+), 5 deletions(-)
+ create mode 100644 src/lib/components/icons/UploadCloudIcon.svelte
+ create mode 100644 src/lib/components/workspace/Knowledge/KnowledgeBase/AddIcloudContentMenu.svelte
+ create mode 100644 src/lib/components/workspace/Knowledge/KnowledgeBase/IcloudFiles.svelte
+
+diff --git a/src/lib/apis/knowledge/index.ts b/src/lib/apis/knowledge/index.ts
+index c5fad1323..32be528a7 100644
+--- a/src/lib/apis/knowledge/index.ts
+++ b/src/lib/apis/knowledge/index.ts
+@@ -345,3 +345,63 @@ export const deleteKnowledgeById = async (token: string, id: string) => {
+ 
+ 	return res;
+ };
+
+export const getIcloudFiles = async (ICLOUD_BASE_URLS: string) => {
+	let error = null;
+
+	const res = await fetch(`${ICLOUD_BASE_URLS}/dataprep/get`, {
+		method: 'POST',
+		headers: {
+			Accept: 'application/json',
+			'Content-Type': 'application/json',
+		}
+	})
+		.then(async (res) => {
+			if (!res.ok) throw await res.json();
+			return res.json();
+		})
+		.then((json) => {
+			return json;
+		})
+		.catch((err) => {
+			error = err.detail;
+
+			console.log(err);
+			return null;
+		});
+
+	if (error) {
+		throw error;
+	}
+
+	return res;
+};
+
+export const updateIcloudFiles = async (ICLOUD_BASE_URLS: string, formData: any) => {
+	let error = null;
+
+	const res = await fetch(`${ICLOUD_BASE_URLS}/dataprep/ingest`, {
+		method: 'POST',
+		body: formData
+	})
+		.then(async (res) => {
+			if (!res.ok) throw await res.json();
+			return res.json();
+		})
+		.then((json) => {
+			return json;
+		})
+		.catch((err) => {
+			error = err.detail;
+
+			console.log(err);
+			return null;
+		});
+
+	if (error) {
+		throw error;
+	}
+
+	return res;
+};
+
+diff --git a/src/lib/components/admin/Settings/Connections.svelte b/src/lib/components/admin/Settings/Connections.svelte
+index 2fcfadaec..3237744d5 100644
+--- a/src/lib/components/admin/Settings/Connections.svelte
+++ b/src/lib/components/admin/Settings/Connections.svelte
+@@ -47,6 +47,9 @@
+ 	let showAddOpenAIConnectionModal = false;
+ 	let showAddOllamaConnectionModal = false;
+ 
+	let ENABLE_ICLOUD_API: null | boolean = (localStorage.getItem('ENABLE_ICLOUD_API') === "enable");
+	let ICLOUD_BASE_URL = localStorage.getItem('ICLOUD_BASE_URL') || '';
+
+ 	const updateOpenAIHandler = async () => {
+ 		if (ENABLE_OPENAI_API !== null) {
+ 			// Remove trailing slashes
+@@ -193,10 +196,22 @@
+ 		}
+ 	});
+ 
+	const updateIcloudHandler = async () => {
+		if (ENABLE_ICLOUD_API) {
+			localStorage.setItem('ICLOUD_BASE_URL', ICLOUD_BASE_URL);
+			localStorage.setItem('ENABLE_ICLOUD_API', "enable");
+		} else {
+			localStorage.setItem('ICLOUD_BASE_URL', '');
+			localStorage.setItem('ENABLE_ICLOUD_API', "");
+		}
+		toast.success($i18n.t('Icloud API settings updated'));
+	};
+
+ 	const submitHandler = async () => {
+ 		updateOpenAIHandler();
+ 		updateOllamaHandler();
+ 		updateDirectConnectionsHandler();
+		updateIcloudHandler();
+ 
+ 		dispatch('save');
+ 	};
+@@ -301,7 +316,7 @@
+ 				</div>
+ 
+ 				{#if ENABLE_OLLAMA_API}
+-					<hr class=" border-gray-100 dark:border-gray-850 my-2" />
+					<hr class=" border-gray-100 dark:border-gray-850" />
+ 
+ 					<div class="">
+ 						<div class="flex justify-between items-center">
+@@ -358,6 +373,39 @@
+ 				{/if}
+ 			</div>
+ 
+			<hr class=" border-gray-50 dark:border-gray-850" />
+
+			<div class="pr-1.5 my-2">
+				<div class="flex justify-between items-center text-sm">
+					<div class="font-medium">{$i18n.t('Icloud File API')}</div>
+
+					<div class="mt-1">
+						<Switch 
+							bind:state={ENABLE_ICLOUD_API} 
+							on:change={async () => {
+								updateIcloudHandler();
+							}}
+						/>
+					</div>
+				</div>
+
+				{#if ENABLE_ICLOUD_API}
+					<hr class=" border-gray-50 dark:border-gray-850 my-2" />
+
+					<div class="">
+						<div class="flex w-full gap-1.5">
+							<div class="flex-1 flex flex-col gap-1.5">
+								<input
+									class="w-full text-sm bg-transparent outline-none"
+									placeholder={$i18n.t('Enter Icloud URL(e.g.') + 'http://localhost:6007/v1)'}
+									bind:value={ICLOUD_BASE_URL}
+								/>
+							</div>
+						</div>
+					</div>
+				{/if}
+			</div>
+
+ 			<hr class=" border-gray-100 dark:border-gray-850" />
+ 
+ 			<div class="pr-1.5 my-2">
+diff --git a/src/lib/components/icons/UploadCloudIcon.svelte b/src/lib/components/icons/UploadCloudIcon.svelte
+new file mode 100644
+index 000000000..eed3bd582
+--- /dev/null
+++ b/src/lib/components/icons/UploadCloudIcon.svelte
+@@ -0,0 +1,18 @@
+<script lang="ts">
+	export let className = 'w-4 h-4';
+</script>
+
+<svg
+	t="1744007283647"
+	viewBox="0 0 1491 1024"
+	version="1.1"
+	xmlns="http://www.w3.org/2000/svg"
+	p-id="1630"
+	class = {className}
+	><path
+		d="M546.047379 263.651842s-90.221363-91.423424-212.63125-16.762074c-109.521121 71.300031-90.154581 201.768179-90.154582 201.76818S0 498.498962 0 759.902727c5.431535 261.003078 264.186314 263.674325 264.186314 263.674326l388.443814 0.422947V744.565318H466.355181l279.434681-279.412421 279.390161 279.412421h-186.297208V1024l377.157796-0.422947s240.812904 0.222604 274.648698-248.092052c16.094262-271.576764-232.754643-325.113003-232.754643-325.113003S1286.205362 48.327085 936.761752 2.470681C637.181417-29.740104 546.047379 263.651842 546.047379 263.651842z"
+		fill="#507BFC"
+		p-id="1631"
+	></path></svg
+>
+
+diff --git a/src/lib/components/workspace/Knowledge.svelte b/src/lib/components/workspace/Knowledge.svelte
+index 57d45312d..43a1f305e 100644
+--- a/src/lib/components/workspace/Knowledge.svelte
+++ b/src/lib/components/workspace/Knowledge.svelte
+@@ -13,7 +13,8 @@
+ 	import {
+ 		getKnowledgeBases,
+ 		deleteKnowledgeById,
+-		getKnowledgeBaseList
+		getKnowledgeBaseList,
+		getIcloudFiles
+ 	} from '$lib/apis/knowledge';
+ 
+ 	import { goto } from '$app/navigation';
+@@ -26,6 +27,11 @@
+ 	import Spinner from '../common/Spinner.svelte';
+ 	import { capitalizeFirstLetter } from '$lib/utils';
+ 	import Tooltip from '../common/Tooltip.svelte';
+	import AddIcloudConnectionModal from '$lib/components/workspace/Knowledge/KnowledgeBase/AddIcloudContentMenu.svelte';
+	import IcloudFiles from '$lib/components/workspace/Knowledge/KnowledgeBase/IcloudFiles.svelte';
+
+	let showAddTextContentModal = false;
+	let IcloudFile = [];
+ 
+ 	let loaded = false;
+ 
+@@ -65,9 +71,26 @@
+ 	};
+ 
+ 	onMount(async () => {
+		await updateIcloudFiles();
+
+ 		knowledgeBases = await getKnowledgeBaseList(localStorage.token);
+ 		loaded = true;
+ 	});
+
+	async function updateIcloudFiles() {
+		let ICLOUD_BASE_URL = localStorage.getItem('ICLOUD_BASE_URL') || '';
+		console.log('ICLOUD_BASE_URL', ICLOUD_BASE_URL);
+		
+		if (ICLOUD_BASE_URL !== '') {
+			const res = await getIcloudFiles(ICLOUD_BASE_URL).catch((e) => {
+				toast.error(`${e}`);
+			});
+
+			if (res) {
+				IcloudFile = res;
+			}
+		}
+	}
+ </script>
+ 
+ <svelte:head>
+@@ -187,11 +210,39 @@
+ 		{/each}
+ 	</div>
+ 
+-	<div class=" text-gray-500 text-xs mt-1 mb-2">
+-		ⓘ {$i18n.t("Use '#' in the prompt input to load and include your knowledge.")}
+	<div class="flex justify-between items-center">
+		<div class="flex md:self-center text-xl font-medium px-0.5 items-center">
+			{$i18n.t('Icloud Knowledge')}
+			<div class="flex self-center w-[1px] h-6 mx-2.5 bg-gray-50 dark:bg-gray-850" />
+			<span class="text-lg font-medium text-gray-500 dark:text-gray-300">{IcloudFile.length}</span>
+		</div>
+		<div>
+			<button
+				class=" px-2 py-2 rounded-xl hover:bg-gray-700/10 dark:hover:bg-gray-100/10 dark:text-gray-300 dark:hover:text-white transition font-medium text-sm flex items-center space-x-1"
+				aria-label={$i18n.t('Upload to Icloud')}
+				on:click={() => {
+					showAddTextContentModal = !showAddTextContentModal;
+				}}
+			>
+				<Plus className="size-3.5" />
+			</button>
+		</div>
+	</div>
+	<hr class="border-gray-100 dark:border-gray-850 my-2" />
+	<div class=" flex overflow-y-auto w-full h-[15rem] scrollbar-hidden text-xs">
+		<IcloudFiles files={IcloudFile} />
+ 	</div>
+ {:else}
+ 	<div class="w-full h-full flex justify-center items-center">
+ 		<Spinner />
+ 	</div>
+ {/if}
+
+<AddIcloudConnectionModal
+	bind:show={showAddTextContentModal}
+	on:updateIcloudFile={async (e) => {
+		if (e.detail.status) {
+			await updateIcloudFiles();
+		}
+	}}
+/>
+diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase/AddIcloudContentMenu.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase/AddIcloudContentMenu.svelte
+new file mode 100644
+index 000000000..fb906a0d3
+--- /dev/null
+++ b/src/lib/components/workspace/Knowledge/KnowledgeBase/AddIcloudContentMenu.svelte
+@@ -0,0 +1,164 @@
+<script lang="ts">
+	import { toast } from 'svelte-sonner';
+	import { getContext, onMount, createEventDispatcher } from 'svelte';
+	import Modal from '$lib/components/common/Modal.svelte';
+	import UploadCloudIcon from '$lib/components/icons/UploadCloudIcon.svelte';
+	import Spinner from '$lib/components/common/Spinner.svelte';
+	import { updateIcloudFiles } from '$lib/apis/knowledge';
+
+	const i18n = getContext('i18n');
+	const dispatch = createEventDispatcher();
+
+	export let show = false;
+
+	let url = '';
+
+	let loading = false;
+
+	let selectedFile = null;
+
+	function handleFileSelect(event) {
+		selectedFile = event.target.files[0];
+	}
+
+	function parseAndValidateUrls(normalizedInput: string): string[] {
+		return normalizedInput
+			.split(',')
+			.map((candidate) => {
+				const processed = candidate.replace(/^["']+|["']+$/g, '').trim();
+
+				try {
+					new URL(processed);
+					return processed;
+				} catch {
+					return null;
+				}
+			})
+			.filter((url): url is string => url !== null);
+	}
+
+	async function submitHandler() {
+		loading = true;
+
+		if (!url && !selectedFile) {
+			loading = false;
+			show = false;
+
+			toast.error($i18n.t('URL or File are required'));
+			return;
+		}
+		if (url && selectedFile) {
+			loading = false;
+			show = false;
+
+			toast.error($i18n.t('Upload file or enter URL'));
+			url = '';
+			selectedFile = null;
+			return;
+		}
+
+		const formData = new FormData();
+		if (url) {
+			formData.append('link_list', JSON.stringify(parseAndValidateUrls(url)));
+		}
+		if (selectedFile) {
+			formData.append('files', selectedFile, selectedFile.name);
+		}
+		let ICLOUD_BASE_URL = localStorage.getItem('ICLOUD_BASE_URL') || '';
+		console.log('ICLOUD_BASE_URL', ICLOUD_BASE_URL);
+
+		if (ICLOUD_BASE_URL !== '') {
+			const res = await updateIcloudFiles(ICLOUD_BASE_URL, formData).catch((e) => {
+				toast.error(`${e}`);
+
+				return;
+			});
+
+			if (res) {
+				toast.success($i18n.t('Upload Succeed'));
+				dispatch('updateIcloudFile', { status: true });
+			}
+
+			url = '';
+			selectedFile = null;
+			loading = false;
+			show = false;
+		}
+	}
+</script>
+
+<Modal size="sm" bind:show>
+	<div class="flex flex-col justify-end">
+		<div class=" flex justify-between dark:text-gray-100 px-5 pt-4 pb-2">
+			<div class="flex-col text-lg font-medium self-center font-primary">
+				{$i18n.t('Upload Icloud file')}
+				<span class="text-sm text-gray-500">- {$i18n.t('choose URL or local file')}</span>
+			</div>
+
+			<button
+				class="self-center"
+				on:click={() => {
+					show = false;
+				}}
+			>
+				<svg
+					xmlns="http://www.w3.org/2000/svg"
+					viewBox="0 0 20 20"
+					fill="currentColor"
+					class="w-5 h-5"
+				>
+					<path
+						d="M6.28 5.22a.75.75 0 00-1.06 1.06L8.94 10l-3.72 3.72a.75.75 0 101.06 1.06L10 11.06l3.72 3.72a.75.75 0 101.06-1.06L11.06 10l3.72-3.72a.75.75 0 00-1.06-1.06L10 8.94 6.28 5.22z"
+					/>
+				</svg>
+			</button>
+		</div>
+
+		<div class="flex flex-col md:flex-row w-full px-4 pb-4 md:space-x-4 dark:text-gray-200">
+			<div class=" flex flex-col w-full sm:flex-row sm:justify-center sm:space-x-6">
+				<div class="flex items-center w-full">
+					<div class="flex-1 min-w-0 mr-2">
+						<div class="flex flex-col w-full my-8 mx-2">
+							<input
+								class="w-full text-sm bg-transparent placeholder:text-gray-300 outline-none border-b-solid border-b-2 border-blue-500 rounded p-2"
+								type="text"
+								bind:value={url}
+								placeholder={$i18n.t('Upload from URL')}
+							/>
+						</div>
+					</div>
+
+					<div class="flex-none w-[1px] h-[60%] mx-2.5 bg-gray-300"></div>
+
+					<div class="flex-1 min-w-0">
+						<input type="file" id="fileInput" hidden on:change={handleFileSelect} />
+
+						<label
+							for="fileInput"
+							class="cursor-pointer flex flex-col items-center hover:bg-gray-100 rounded-lg p-2 transition-colors"
+						>
+							<UploadCloudIcon className="w-12 h-12 text-gray-500" />
+							<div class="text-xs text-gray-500 pt-2">
+								{selectedFile ? selectedFile.name : '点击上传文件'}
+							</div>
+						</label>
+					</div>
+				</div>
+			</div>
+		</div>
+		{#if loading}
+			<Spinner className="my-4 size-4" />
+		{:else}
+			<button
+				class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-3 px-4 rounded text-sm"
+				on:click={(e) => {
+					e.preventDefault();
+					submitHandler();
+				}}
+			>
+				{$i18n.t('Upload Confirm')}
+			</button>
+		{/if}
+	</div>
+</Modal>
+
+diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase/IcloudFiles.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase/IcloudFiles.svelte
+new file mode 100644
+index 000000000..d6490dce2
+--- /dev/null
+++ b/src/lib/components/workspace/Knowledge/KnowledgeBase/IcloudFiles.svelte
+@@ -0,0 +1,37 @@
+<script lang="ts">
+	export let selectedFileId = null;
+	export let files = [];
+
+	export let small = false;
+</script>
+
+<div class="max-h-full flex flex-col w-full">
+	{#each files as file}
+		<div class="mt-1 px-2 flex hover:bg-gray-50 transition">
+			<div class="p-3 bg-black/20 dark:bg-white/10 text-white rounded-xl my-2">
+				<svg
+					xmlns="http://www.w3.org/2000/svg"
+					viewBox="0 0 24 24"
+					fill="currentColor"
+					class=" size-3"
+				>
+					<path
+						fill-rule="evenodd"
+						d="M5.625 1.5c-1.036 0-1.875.84-1.875 1.875v17.25c0 1.035.84 1.875 1.875 1.875h12.75c1.035 0 1.875-.84 1.875-1.875V12.75A3.75 3.75 0 0 0 16.5 9h-1.875a1.875 1.875 0 0 1-1.875-1.875V5.25A3.75 3.75 0 0 0 9 1.5H5.625ZM7.5 15a.75.75 0 0 1 .75-.75h7.5a.75.75 0 0 1 0 1.5h-7.5A.75.75 0 0 1 7.5 15Zm.75 2.25a.75.75 0 0 0 0 1.5H12a.75.75 0 0 0 0-1.5H8.25Z"
+						clip-rule="evenodd"
+					/>
+					<path
+						d="M12.971 1.816A5.23 5.23 0 0 1 14.25 5.25v1.875c0 .207.168.375.375.375H16.5a5.23 5.23 0 0 1 3.434 1.279 9.768 9.768 0 0 0-6.963-6.963Z"
+					/>
+				</svg>
+			</div>
+
+			<div class="flex flex-col justify-center -space-y-0.5 px-2.5 w-full">
+				<div class=" dark:text-gray-100 text-sm font-medium line-clamp-1 mb-1">
+					{file.name}
+				</div>
+			</div>
+		</div>
+	{/each}
+</div>
+
+diff --git a/src/lib/i18n/locales/zh-CN/translation.json b/src/lib/i18n/locales/zh-CN/translation.json
+index ebb53a1b5..d6b72e04d 100644
+--- a/src/lib/i18n/locales/zh-CN/translation.json
+++ b/src/lib/i18n/locales/zh-CN/translation.json
+@@ -1174,5 +1174,18 @@
+ 	"Your entire contribution will go directly to the plugin developer; Open WebUI does not take any percentage. However, the chosen funding platform might have its own fees.": "您的全部捐款将直接给到插件开发者，Open WebUI 不会收取任何比例。但众筹平台可能会有服务费、抽成。",
+ 	"Youtube": "YouTube",
+ 	"Youtube Language": "Youtube 语言",
+-	"Youtube Proxy URL": "Youtube 代理 URL"
+	"Youtube Proxy URL": "Youtube 代理 URL",
+	"Upload Icloud file": "上传到云端",
+	"choose URL or local file": "选择URL或本地文件",
+	"Upload from URL": "从URL上传",
+	"Upload Confirm": "确认上传",
+	"URL or File are required": "未上传文件",
+	"Upload file or enter URL": "文件与URL不能同时提交",
+	"Icloud File": "云端文件",
+	"Icloud File API": "云端存储API",
+	"Enter Icloud URL(e.g.": "输入云端存储URL（例如.",
+	"Upload to Icloud": "上传到云端",
+	"Icloud Knowledge": "云端数据库",
+	"Upload Succeed": "上传文件成功",
+	"Icloud API settings updated": "云端存储API设置已更新"
+ }
+-- 
+2.34.1
+
--- a/AgentQnA/ui/open_webui_patches/0003-update-build-script.patch
+++ b/AgentQnA/ui/open_webui_patches/0003-update-build-script.patch
@@ -0,0 +1,56 @@
+From ebf3218eef81897b536521e2140bdd9176f3ace3 Mon Sep 17 00:00:00 2001
+From: lkk12014402 <kaokao.lv@intel.com>
+Date: Tue, 8 Apr 2025 07:13:20 +0000
+Subject: [PATCH 3/3] update build script
+
+---
+ hatch_build.py | 23 ++++++++++++++++++-----
+ 1 file changed, 18 insertions(+), 5 deletions(-)
+
+diff --git a/hatch_build.py b/hatch_build.py
+index 8ddaf0749..e15d6e99d 100644
+--- a/hatch_build.py
+++ b/hatch_build.py
+@@ -3,21 +3,34 @@ import os
+ import shutil
+ import subprocess
+ from sys import stderr
+-
+ 
+ from hatchling.builders.hooks.plugin.interface import BuildHookInterface
+-
+-
+ 
+ 
+ class CustomBuildHook(BuildHookInterface):
+     def initialize(self, version, build_data):
+         super().initialize(version, build_data)
+-        stderr.write(">>> Building Open Webui frontend\n")
+        stderr.write(">>> Building DCAI小智 frontend\n")
+         npm = shutil.which("npm")
+         if npm is None:
+             raise RuntimeError(
+-                "NodeJS `npm` is required for building Open Webui but it was not found"
+                "NodeJS `npm` is required for building DCAI小智 but it was not found"
+             )
+        stderr.write("### Installing onnxruntime-node\n")
+        subprocess.run([npm, "install", "onnxruntime-node", "--onnxruntime-node-install-cuda=skip"], check=True)  # noqa: S603
+       
+        stderr.write("### Installing huggingface/transformers.js\n")
+        subprocess.run([npm, "i", "@huggingface/transformers"], check=True)  # noqa: S603
+       
+        ort_version = "1.20.1"
+        ort_url = f"https://github.com/microsoft/onnxruntime/releases/download/v{ort_version}/onnxruntime-linux-x64-gpu-{ort_version}.tgz"
+       
+        stderr.write(f"### Downloading onnxruntime binaries from {ort_url}\n")
+        subprocess.run(["curl", "-L", ort_url, "-o", f"onnxruntime-linux-x64-gpu-{ort_version}.tgz"], check=True)  # noqa: S603
+       
+         stderr.write("### npm install\n")
+         subprocess.run([npm, "install"], check=True)  # noqa: S603
+ 
+         stderr.write("\n### npm run build\n")
+         os.environ["APP_BUILD_HASH"] = version
+         subprocess.run([npm, "run", "build"], check=True)  # noqa: S603
+-- 
+2.34.1
+
--- a/AgentQnA/ui/open_webui_patches/0004-enhance-tool-formating.patch
+++ b/AgentQnA/ui/open_webui_patches/0004-enhance-tool-formating.patch
@@ -0,0 +1,31 @@
+From 36d61dab9306cb8f12c4497a32781d84f8cfb2e7 Mon Sep 17 00:00:00 2001
+From: lkk12014402 <kaokao.lv@intel.com>
+Date: Tue, 8 Apr 2025 07:22:36 +0000
+Subject: [PATCH 4/4] enhance tool formatting
+
+---
+ backend/open_webui/utils/middleware.py | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py
+index fddbe8ee1..9e44ed91a 100644
+--- a/backend/open_webui/utils/middleware.py
+++ b/backend/open_webui/utils/middleware.py
+@@ -1142,12 +1142,12 @@ async def process_chat_response(
+                                 result_display_content = f"{result_display_content}\n> {tool_name}: {result.get('content', '')}"
+ 
+                             if not raw:
+-                                content = f'{content}\n<details type="tool_calls" done="true" content="{html.escape(json.dumps(block_content))}" results="{html.escape(json.dumps(results))}">\n<summary>Tool Executed</summary>\n{result_display_content}\n</details>\n'
+                                content = f'{content}\n<details type="tool_calls" done="true" content="{html.escape(json.dumps(block_content))}" results="{html.escape(json.dumps(results))}">\n<summary> Tool: {tool_call.get('function', {}).get('name', '')} Executed</summary>\n{result_display_content}\n</details>\n'
+                         else:
+                             tool_calls_display_content = ""
+ 
+                             for tool_call in block_content:
+-                                tool_calls_display_content = f"{tool_calls_display_content}\n> Executing {tool_call.get('function', {}).get('name', '')}"
+                                tool_calls_display_content = f"{tool_calls_display_content}\n> Executing Tool: {tool_call.get('function', {}).get('name', '')}"
+ 
+                             if not raw:
+                                 content = f'{content}\n<details type="tool_calls" done="false" content="{html.escape(json.dumps(block_content))}">\n<summary>Tool Executing...</summary>\n{tool_calls_display_content}\n</details>\n'
+-- 
+2.34.1
+
--- a/AgentQnA/ui/open_webui_patches/0005-fix-tool-call-typo.patch
+++ b/AgentQnA/ui/open_webui_patches/0005-fix-tool-call-typo.patch
@@ -0,0 +1,25 @@
+From 4723fb2df86df3e1c300f12fc0649823ea1a753b Mon Sep 17 00:00:00 2001
+From: lkk12014402 <kaokao.lv@intel.com>
+Date: Tue, 8 Apr 2025 08:09:36 +0000
+Subject: [PATCH 5/5] fix tool call typo.
+
+---
+ backend/open_webui/utils/middleware.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py
+index 9e44ed91a..82aed5346 100644
+--- a/backend/open_webui/utils/middleware.py
+++ b/backend/open_webui/utils/middleware.py
+@@ -1142,7 +1142,7 @@ async def process_chat_response(
+                                 result_display_content = f"{result_display_content}\n> {tool_name}: {result.get('content', '')}"
+ 
+                             if not raw:
+-                                content = f'{content}\n<details type="tool_calls" done="true" content="{html.escape(json.dumps(block_content))}" results="{html.escape(json.dumps(results))}">\n<summary> Tool: {tool_call.get('function', {}).get('name', '')} Executed</summary>\n{result_display_content}\n</details>\n'
+                                content = f'{content}\n<details type="tool_calls" done="true" content="{html.escape(json.dumps(block_content))}" results="{html.escape(json.dumps(results))}">\n<summary> Tool: {tool_call.get("function", {}).get("name", "")} Executed</summary>\n{result_display_content}\n</details>\n'
+                         else:
+                             tool_calls_display_content = ""
+ 
+-- 
+2.34.1
+
--- a/AudioQnA/Dockerfile
+++ b/AudioQnA/Dockerfile
@@ -1,8 +1,9 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

+ARG IMAGE_REPO=opea
 ARG BASE_TAG=latest
-FROM opea/comps-base:$BASE_TAG
+FROM $IMAGE_REPO/comps-base:$BASE_TAG

 COPY ./audioqna.py $HOME/audioqna.py

--- a/AudioQnA/Dockerfile.multilang
+++ b/AudioQnA/Dockerfile.multilang
@@ -1,8 +1,9 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

+ARG IMAGE_REPO=opea
 ARG BASE_TAG=latest
-FROM opea/comps-base:$BASE_TAG
+FROM $IMAGE_REPO/comps-base:$BASE_TAG

 COPY ./audioqna_multilang.py $HOME/audioqna_multilang.py

--- a/AudioQnA/README.md
+++ b/AudioQnA/README.md
@@ -2,6 +2,13 @@

 AudioQnA is an example that demonstrates the integration of Generative AI (GenAI) models for performing question-answering (QnA) on audio files, with the added functionality of Text-to-Speech (TTS) for generating spoken responses. The example showcases how to convert audio input to text using Automatic Speech Recognition (ASR), generate answers to user queries using a language model, and then convert those answers back to speech using Text-to-Speech (TTS).

+## Table of Contents
+
+1. [Architecture](#architecture)
+2. [Deployment Options](#deployment-options)
+
+## Architecture
+
 The AudioQnA example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.

 ```mermaid
@@ -59,37 +66,13 @@ flowchart LR

 ```

-## Deploy AudioQnA Service
+## Deployment Options

-The AudioQnA service can be deployed on either Intel Gaudi2 or Intel Xeon Scalable Processor.
+The table below lists currently available deployment options. They outline in detail the implementation of this example on selected hardware.

-### Deploy AudioQnA on Gaudi
-
-Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) for instructions on deploying AudioQnA on Gaudi.
-
-### Deploy AudioQnA on Xeon
-
-Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for instructions on deploying AudioQnA on Xeon.
-
-## Deploy using Helm Chart
-
-Refer to the [AudioQnA helm chart](./kubernetes/helm/README.md) for instructions on deploying AudioQnA on Kubernetes.
-
-## Supported Models
-
-### ASR
-
-The default model is [openai/whisper-small](https://huggingface.co/openai/whisper-small). It also supports all models in the Whisper family, such as `openai/whisper-large-v3`, `openai/whisper-medium`, `openai/whisper-base`, `openai/whisper-tiny`, etc.
-
-To replace the model, please edit the `compose.yaml` and add the `command` line to pass the name of the model you want to use:
-
-```yaml
-services:
-  whisper-service:
-    ...
-    command: --model_name_or_path openai/whisper-tiny
-```
-
-### TTS
-
-The default model is [microsoft/SpeechT5](https://huggingface.co/microsoft/speecht5_tts). We currently do not support replacing the model. More models under the commercial license will be added in the future.
+| Category               | Deployment Option | Description                                                      |
+| ---------------------- | ----------------- | ---------------------------------------------------------------- |
+| On-premise Deployments | Docker compose    | [AudioQnA deployment on Xeon](./docker_compose/intel/cpu/xeon)   |
+|                        |                   | [AudioQnA deployment on Gaudi](./docker_compose/intel/hpu/gaudi) |
+|                        |                   | [AudioQnA deployment on AMD ROCm](./docker_compose/amd/gpu/rocm) |
+|                        | Kubernetes        | [Helm Charts](./kubernetes/helm)                                 |
--- a/AudioQnA/README_miscellaneous.md
+++ b/AudioQnA/README_miscellaneous.md
@@ -0,0 +1,42 @@
+# AudioQnA Docker Image Build
+
+## Table of Contents
+
+1. [Build MegaService Docker Image](#build-megaservice-docker-image)
+2. [Build UI Docker Image](#build-ui-docker-image)
+3. [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token)
+4. [Troubleshooting](#troubleshooting)
+
+## Build MegaService Docker Image
+
+To construct the Megaservice of AudioQnA, the [GenAIExamples](https://github.com/opea-project/GenAIExamples.git) repository is utilized. Build Megaservice Docker image using command below:
+
+```bash
+git clone https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples/AudioQnA
+docker build --no-cache -t opea/audioqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+```
+
+## Build UI Docker Image
+
+Build frontend Docker image using below command:
+
+```bash
+cd GenAIExamples/AudioQnA/ui
+docker build -t opea/audioqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
+```
+
+## Generate a HuggingFace Access Token
+
+Some HuggingFace resources, such as some models, are only accessible if the developer has an access token. In the absence of a HuggingFace access token, the developer can create one by first creating an account by following the steps provided at [HuggingFace](https://huggingface.co/) and then generating a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token).
+
+## Troubleshooting
+
+1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/AudioQnA/docker_compose/intel/cpu/xeon/README.md#validate-microservices) first. A simple example:
+
+   ```bash
+   curl http://${host_ip}:7055/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3
+   ```
+
+2. (Docker only) If all microservices work well, check the port ${host_ip}:7777, the port may be allocated by other users, you can modify the `compose.yaml`.
+3. (Docker only) If you get errors like "The container name is in use", change container name in `compose.yaml`.
--- a/AudioQnA/docker_compose/amd/gpu/rocm/README.md
+++ b/AudioQnA/docker_compose/amd/gpu/rocm/README.md
@@ -1,120 +1,59 @@
-# Build Mega Service of AudioQnA on AMD ROCm GPU
+# Deploying AudioQnA on AMD ROCm GPU

-This document outlines the deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice
-pipeline on server on AMD ROCm GPU platform.
+This document outlines the single node deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservices on server with AMD ROCm processing accelerators. The steps include pulling Docker images, container deployment via Docker Compose, and service execution using microservices `llm`.

-## Build Docker Images
+Note: The default LLM is `Intel/neural-chat-7b-v3-3`. Before deploying the application, please make sure either you've requested and been granted the access to it on [Huggingface](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) or you've downloaded the model locally from [ModelScope](https://www.modelscope.cn/models).

-### 1. Build Docker Image
+## Table of Contents

- #### Create application install directory and go to it:
+1. [AudioQnA Quick Start Deployment](#audioqna-quick-start-deployment)
+2. [AudioQnA Docker Compose Files](#audioqna-docker-compose-files)
+3. [Validate Microservices](#validate-microservices)
+4. [Conclusion](#conclusion)

-  ```bash
-  mkdir ~/audioqna-install && cd audioqna-install
-  ```
+## AudioQnA Quick Start Deployment

- #### Clone the repository GenAIExamples (the default repository branch "main" is used here):
+This section describes how to quickly deploy and test the AudioQnA service manually on an AMD ROCm platform. The basic steps are:

-  ```bash
-  git clone https://github.com/opea-project/GenAIExamples.git
-  ```
+1. [Access the Code](#access-the-code)
+2. [Configure the Deployment Environment](#configure-the-deployment-environment)
+3. [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose)
+4. [Check the Deployment Status](#check-the-deployment-status)
+5. [Validate the Pipeline](#validate-the-pipeline)
+6. [Cleanup the Deployment](#cleanup-the-deployment)

-  If you need to use a specific branch/tag of the GenAIExamples repository, then (v1.3 replace with its own value):
+### Access the Code

-  ```bash
-  git clone https://github.com/opea-project/GenAIExamples.git && cd GenAIExamples && git checkout v1.3
-  ```
-
-  We remind you that when using a specific version of the code, you need to use the README from this version:
-
- #### Go to build directory:
-
-  ```bash
-  cd ~/audioqna-install/GenAIExamples/AudioQnA/docker_image_build
-  ```
-
- Cleaning up the GenAIComps repository if it was previously cloned in this directory.
-  This is necessary if the build was performed earlier and the GenAIComps folder exists and is not empty:
-
-  ```bash
-  echo Y | rm -R GenAIComps
-  ```
-
- #### Clone the repository GenAIComps (the default repository branch "main" is used here):
+Clone the GenAIExample repository and access the AudioQnA AMD ROCm platform Docker Compose files and supporting scripts:

 ```bash
-git clone https://github.com/opea-project/GenAIComps.git
-cd GenAIComps
+git clone https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples/AudioQnA
 ```

-We remind you that when using a specific version of the code, you need to use the README from this version.
+Then checkout a released version, such as v1.3:

- #### Setting the list of images for the build (from the build file.yaml)
+```bash
+git checkout v1.3
+```

-  If you want to deploy a vLLM-based or TGI-based application, then the set of services is installed as follows:
+### Configure the Deployment Environment

-  #### vLLM-based application
+#### Docker Compose GPU Configuration

-  ```bash
-  service_list="vllm-rocm whisper speecht5 audioqna audioqna-ui"
-  ```
+Consult the section on [AudioQnA Service configuration](#audioqna-configuration) for information on how service specific configuration parameters affect deployments.

-  #### TGI-based application
-
-  ```bash
-  service_list="whisper speecht5 audioqna audioqna-ui"
-  ```
-
- #### Optional. Pull TGI Docker Image (Do this if you want to use TGI)
-
-  ```bash
-  docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
-  ```
-
- #### Build Docker Images
-
-  ```bash
-  docker compose -f build.yaml build ${service_list} --no-cache
-  ```
-
-  After the build, we check the list of images with the command:
-
-  ```bash
-  docker image ls
-  ```
-
-  The list of images should include:
-
-  ##### vLLM-based application:
-
-  - opea/vllm-rocm:latest
-    - opea/whisper:latest
-    - opea/speecht5:latest
-    - opea/audioqna:latest
-
-  ##### TGI-based application:
-
-  - ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
-    - opea/whisper:latest
-    - opea/speecht5:latest
-    - opea/audioqna:latest
-
---
-
-## Deploy the AudioQnA Application
-
-### Docker Compose Configuration for AMD GPUs
-
-To enable GPU support for AMD GPUs, the following configuration is added to the Docker Compose file:
-
- compose_vllm.yaml - for vLLM-based application
- compose.yaml - for TGI-based
+To enable GPU support for AMD GPUs, the following configuration is added to the Docker Compose files (`compose.yaml`, `compose_vllm.yaml`) for the LLM serving container:

 ```yaml
+# Example for vLLM service in compose_vllm.yaml
+# Note: Modern docker compose might use deploy.resources syntax instead.
+# Check your docker version and compose file.
 shm_size: 1g
 devices:
  - /dev/kfd:/dev/kfd
  - /dev/dri/:/dev/dri/
+# - /dev/dri/render128:/dev/dri/render128
 cap_add:
  - SYS_PTRACE
 group_add:
@@ -123,131 +62,161 @@ security_opt:
  - seccomp:unconfined
 ```

-This configuration forwards all available GPUs to the container. To use a specific GPU, specify its `cardN` and `renderN` device IDs. For example:
+#### Environment Variables (`set_env*.sh`)

-```yaml
-shm_size: 1g
-devices:
-  - /dev/kfd:/dev/kfd
-  - /dev/dri/card0:/dev/dri/card0
-  - /dev/dri/render128:/dev/dri/render128
-cap_add:
-  - SYS_PTRACE
-group_add:
-  - video
-security_opt:
-  - seccomp:unconfined
-```
+These scripts (`set_env_vllm.sh` for vLLM, `set_env.sh` for TGI) configure crucial parameters passed to the containers.

-**How to Identify GPU Device IDs:**
-Use AMD GPU driver utilities to determine the correct `cardN` and `renderN` IDs for your GPU.
+To set up environment variables for deploying AudioQnA services, set up some parameters specific to the deployment environment and source the `set_env.sh` script in this directory:

-### Set deploy environment variables
-
-#### Setting variables in the operating system environment:
-
-##### Set variable HUGGINGFACEHUB_API_TOKEN:
+For TGI inference usage:

 ```bash
-### Replace the string 'your_huggingfacehub_token' with your HuggingFacehub repository access token.
-export HUGGINGFACEHUB_API_TOKEN='your_huggingfacehub_token'
+export host_ip="External_Public_IP"           # ip address of the node
+export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token"
+export http_proxy="Your_HTTP_Proxy"           # http proxy if any
+export https_proxy="Your_HTTPs_Proxy"         # https proxy if any
+export no_proxy=localhost,127.0.0.1,$host_ip,whisper-service,speecht5-service,vllm-service,tgi-service,audioqna-xeon-backend-server,audioqna-xeon-ui-server  # additional no proxies if needed
+export NGINX_PORT=${your_nginx_port}          # your usable port for nginx, 80 for example
+source ./set_env.sh
 ```

-#### Set variables value in set_env\*\*\*\*.sh file:
-
-Go to Docker Compose directory:
+For vLLM inference usage

 ```bash
-cd ~/audioqna-install/GenAIExamples/AudioQnA/docker_compose/amd/gpu/rocm
+export host_ip="External_Public_IP"           # ip address of the node
+export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token"
+export http_proxy="Your_HTTP_Proxy"           # http proxy if any
+export https_proxy="Your_HTTPs_Proxy"         # https proxy if any
+export no_proxy=localhost,127.0.0.1,$host_ip,whisper-service,speecht5-service,vllm-service,tgi-service,audioqna-xeon-backend-server,audioqna-xeon-ui-server  # additional no proxies if needed
+export NGINX_PORT=${your_nginx_port}          # your usable port for nginx, 80 for example
+source ./set_env_vllm.sh
 ```

-The example uses the Nano text editor. You can use any convenient text editor:
+### Deploy the Services Using Docker Compose

-#### If you use vLLM
-
-```bash
-nano set_env_vllm.sh
-```
-
-#### If you use TGI
-
-```bash
-nano set_env.sh
-```
-
-If you are in a proxy environment, also set the proxy-related environment variables:
-
-```bash
-export http_proxy="Your_HTTP_Proxy"
-export https_proxy="Your_HTTPs_Proxy"
-```
-
-Set the values of the variables:
-
- **HOST_IP, HOST_IP_EXTERNAL** - These variables are used to configure the name/address of the service in the operating system environment for the application services to interact with each other and with the outside world.
-
-  If your server uses only an internal address and is not accessible from the Internet, then the values for these two variables will be the same and the value will be equal to the server's internal name/address.
-
-  If your server uses only an external, Internet-accessible address, then the values for these two variables will be the same and the value will be equal to the server's external name/address.
-
-  If your server is located on an internal network, has an internal address, but is accessible from the Internet via a proxy/firewall/load balancer, then the HOST_IP variable will have a value equal to the internal name/address of the server, and the EXTERNAL_HOST_IP variable will have a value equal to the external name/address of the proxy/firewall/load balancer behind which the server is located.
-
-  We set these values in the file set_env\*\*\*\*.sh
-
- **Variables with names like "**\*\*\*\*\*\*\_PORT"\*\* - These variables set the IP port numbers for establishing network connections to the application services.
-  The values shown in the file set_env.sh or set_env_vllm they are the values used for the development and testing of the application, as well as configured for the environment in which the development is performed. These values must be configured in accordance with the rules of network access to your environment's server, and must not overlap with the IP ports of other applications that are already in use.
-
-#### Set variables with script set_env\*\*\*\*.sh
-
-#### If you use vLLM
-
-```bash
-. set_env_vllm.sh
-```
-
-#### If you use TGI
-
-```bash
-. set_env.sh
-```
-
-### Start the services:
-
-#### If you use vLLM
-
-```bash
-docker compose -f compose_vllm.yaml up -d
-```
-
-#### If you use TGI
+To deploy the AudioQnA services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute the command below. It uses the 'compose.yaml' file.
+
+for TGI inference deployment

 ```bash
+cd docker_compose/amd/gpu/rocm
 docker compose -f compose.yaml up -d
 ```

-All containers should be running and should not restart:
+for vLLM inference deployment

-##### If you use vLLM:
+```bash
+cd docker_compose/amd/gpu/rocm
+docker compose -f compose_vllm.yaml up -d
+```

- audioqna-vllm-service
- whisper-service
- speecht5-service
- audioqna-backend-server
- audioqna-ui-server
+> **Note**: developers should build docker image from source when:
+>
+> - Developing off the git main branch (as the container's ports in the repo may be different > from the published docker image).
+> - Unable to download the docker image.
+> - Use a specific version of Docker image.

-##### If you use TGI:
+Please refer to the table below to build different microservices from source:

- audioqna-tgi-service
- whisper-service
- speecht5-service
- audioqna-backend-server
- audioqna-ui-server
+| Microservice | Deployment Guide                                                                                                                  |
+| ------------ | --------------------------------------------------------------------------------------------------------------------------------- |
+| vLLM         | [vLLM build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/vllm#build-docker)                    |
+| LLM          | [LLM build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/llms)                                                |
+| WHISPER      | [Whisper build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/asr/src#211-whisper-server-image)                |
+| SPEECHT5     | [SpeechT5 build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/tts/src#211-speecht5-server-image)              |
+| GPT-SOVITS   | [GPT-SOVITS build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/gpt-sovits/src#build-the-image) |
+| MegaService  | [MegaService build guide](../../../../README_miscellaneous.md#build-megaservice-docker-image)                                     |
+| UI           | [Basic UI build guide](../../../../README_miscellaneous.md#build-ui-docker-image)                                                 |

---
+### Check the Deployment Status

-## Validate the Services
+After running docker compose, check if all the containers launched via docker compose have started:

-### 1. Validate the vLLM/TGI Service
+#### For TGI inference deployment
+
+```bash
+docker ps -a
+```
+
+For the default deployment, the following 5 containers should have started:
+
+```
+CONTAINER ID   IMAGE                                                      COMMAND                  CREATED          STATUS          PORTS                                         NAMES
+d8007690868d   opea/audioqna:latest                                       "python audioqna.py"     21 seconds ago   Up 19 seconds   0.0.0.0:3008->8888/tcp, [::]:3008->8888/tcp   audioqna-rocm-backend-server
+87ba9a1d56ae   ghcr.io/huggingface/text-generation-inference:2.4.1-rocm   "/tgi-entrypoint.sh …"   21 seconds ago   Up 20 seconds   0.0.0.0:3006->80/tcp, [::]:3006->80/tcp       tgi-service
+59e869acd742   opea/speecht5:latest                                       "python speecht5_ser…"   21 seconds ago   Up 20 seconds   0.0.0.0:7055->7055/tcp, :::7055->7055/tcp     speecht5-service
+0143267a4327   opea/whisper:latest                                        "python whisper_serv…"   21 seconds ago   Up 20 seconds   0.0.0.0:7066->7066/tcp, :::7066->7066/tcp     whisper-service
+```
+
+### For vLLM inference deployment
+
+```bash
+docker ps -a
+```
+
+For the default deployment, the following 5 containers should have started:
+
+```
+CONTAINER ID   IMAGE                     COMMAND                  CREATED          STATUS          PORTS                                           NAMES
+f3e6893a69fa   opea/audioqna-ui:latest   "docker-entrypoint.s…"   37 seconds ago   Up 35 seconds   0.0.0.0:18039->5173/tcp, [::]:18039->5173/tcp   audioqna-ui-server
+f943e5cd21e9   opea/audioqna:latest      "python audioqna.py"     37 seconds ago   Up 35 seconds   0.0.0.0:18038->8888/tcp, [::]:18038->8888/tcp   audioqna-backend-server
+074e8c418f52   opea/speecht5:latest      "python speecht5_ser…"   37 seconds ago   Up 36 seconds   0.0.0.0:7055->7055/tcp, :::7055->7055/tcp       speecht5-service
+77abe498e427   opea/vllm-rocm:latest     "python3 /workspace/…"   37 seconds ago   Up 36 seconds   0.0.0.0:8081->8011/tcp, [::]:8081->8011/tcp     audioqna-vllm-service
+9074a95bb7a6   opea/whisper:latest       "python whisper_serv…"   37 seconds ago   Up 36 seconds   0.0.0.0:7066->7066/tcp, :::7066->7066/tcp       whisper-service
+```
+
+If any issues are encountered during deployment, refer to the [Troubleshooting](../../../../README_miscellaneous.md#troubleshooting) section.
+
+### Validate the Pipeline
+
+Once the AudioQnA services are running, test the pipeline using the following command:
+
+```bash
+# Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the base64 string to the megaservice endpoint.
+# The megaservice will return a spoken response as a base64 string. To listen to the response, decode the base64 string and save it as a .wav file.
+wget https://github.com/intel/intel-extension-for-transformers/raw/refs/heads/main/intel_extension_for_transformers/neural_chat/assets/audio/sample_2.wav
+base64_audio=$(base64 -w 0 sample_2.wav)
+
+# if you are using speecht5 as the tts service, voice can be "default" or "male"
+# if you are using gpt-sovits for the tts service, you can set the reference audio following https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/gpt-sovits/src/README.md
+
+curl http://${host_ip}:3008/v1/audioqna \
+  -X POST \
+  -H "Content-Type: application/json" \
+  -d "{\"audio\": \"${base64_audio}\", \"max_tokens\": 64, \"voice\": \"default\"}" \
+  | sed 's/^"//;s/"$//' | base64 -d > output.wav
+```
+
+**Note** : Access the AudioQnA UI by web browser through this URL: `http://${host_ip}:5173`. Please confirm the `5173` port is opened in the firewall. To validate each microservice used in the pipeline refer to the [Validate Microservices](#validate-microservices) section.
+
+### Cleanup the Deployment
+
+To stop the containers associated with the deployment, execute the following command:
+
+#### If you use vLLM
+
+```bash
+cd ~/audioqna-install/GenAIExamples/AudioQnA/docker_compose/amd/gpu/rocm
+docker compose -f compose_vllm.yaml down
+```
+
+#### If you use TGI
+
+```bash
+cd ~/audioqna-install/GenAIExamples/AudioQnA/docker_compose/amd/gpu/rocm
+docker compose -f compose.yaml down
+```
+
+## AudioQnA Docker Compose Files
+
+In the context of deploying an AudioQnA pipeline on an Intel® Xeon® platform, we can pick and choose different large language model serving frameworks, or single English TTS/multi-language TTS component. The table below outlines the various configurations that are available as part of the application. These configurations can be used as templates and can be extended to different components available in [GenAIComps](https://github.com/opea-project/GenAIComps.git).
+
+| File                                     | Description                                                                               |
+| ---------------------------------------- | ----------------------------------------------------------------------------------------- |
+| [compose_vllm.yaml](./compose_vllm.yaml) | Default compose file using vllm as serving framework and redis as vector database         |
+| [compose.yaml](./compose.yaml)           | The LLM serving framework is TGI. All other configurations remain the same as the default |
+
+### Validate the vLLM/TGI Service

 #### If you use vLLM:

@@ -313,7 +282,7 @@ Checking the response from the service. The response should be similar to JSON:
 If the service response has a meaningful response in the value of the "generated_text" key,
 then we consider the TGI service to be successfully launched

-### 2. Validate MegaServices
+### Validate MegaServices

 Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the
 base64 string to the megaservice endpoint. The megaservice will return a spoken response as a base64 string. To listen
@@ -327,7 +296,7 @@ curl http://${host_ip}:3008/v1/audioqna \
  -H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
 ```

-### 3. Validate MicroServices
+### Validate MicroServices

 ```bash
 # whisper service
@@ -343,18 +312,6 @@ curl http://${host_ip}:7055/v1/tts \
  -H 'Content-Type: application/json'
 ```

-### 4. Stop application
+## Conclusion

-#### If you use vLLM
-
-```bash
-cd ~/audioqna-install/GenAIExamples/AudioQnA/docker_compose/amd/gpu/rocm
-docker compose -f compose_vllm.yaml down
-```
-
-#### If you use TGI
-
-```bash
-cd ~/audioqna-install/GenAIExamples/AudioQnA/docker_compose/amd/gpu/rocm
-docker compose -f compose.yaml down
-```
+This guide should enable developers to deploy the default configuration or any of the other compose yaml files for different configurations. It also highlights the configurable parameters that can be set before deployment.
--- a/AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml
@@ -30,7 +30,7 @@ services:
    ports:
      - "3006:80"
    volumes:
-     - "./data:/data"
+     - "${MODEL_CACHE:-./data}:/data"
    shm_size: 1g
    devices:
      - /dev/kfd:/dev/kfd
@@ -40,7 +40,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${LLM_MODEL_ID}
--- a/AudioQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
+++ b/AudioQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
@@ -35,8 +35,8 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      WILM_USE_TRITON_FLASH_ATTENTION: 0
--- a/AudioQnA/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/AudioQnA/docker_compose/amd/gpu/rocm/set_env.sh
@@ -6,8 +6,8 @@

 # export host_ip=<your External Public IP>    # export host_ip=$(hostname -I | awk '{print $1}')

-export host_ip="192.165.1.21"
-export HUGGINGFACEHUB_API_TOKEN=${YOUR_HUGGINGFACEHUB_API_TOKEN}
+export host_ip=${ip_address}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 # <token>

 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
--- a/AudioQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh
+++ b/AudioQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh
@@ -6,9 +6,9 @@

 # export host_ip=<your External Public IP>    # export host_ip=$(hostname -I | awk '{print $1}')

-export host_ip=""
-export external_host_ip=""
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export host_ip=${ip_address}
+export external_host_ip=${ip_address}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export HF_CACHE_DIR="./data"
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export VLLM_SERVICE_PORT="8081"
--- a/AudioQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/README.md
@@ -1,123 +1,146 @@
-# Build Mega Service of AudioQnA on Xeon
+# Deploying AudioQnA on Intel® Xeon® Processors

-This document outlines the deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server.
-
-The default pipeline deploys with vLLM as the LLM serving component. It also provides options of using TGI backend for LLM microservice, please refer to [Start the MegaService](#-start-the-megaservice) section in this page.
+This document outlines the single node deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservices on Intel Xeon server. The steps include pulling Docker images, container deployment via Docker Compose, and service execution using microservices `llm`.

 Note: The default LLM is `meta-llama/Meta-Llama-3-8B-Instruct`. Before deploying the application, please make sure either you've requested and been granted the access to it on [Huggingface](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) or you've downloaded the model locally from [ModelScope](https://www.modelscope.cn/models).

-## 🚀 Build Docker images
+## Table of Contents

-### 1. Source Code install GenAIComps
+1. [AudioQnA Quick Start Deployment](#audioqna-quick-start-deployment)
+2. [AudioQnA Docker Compose Files](#audioqna-docker-compose-files)
+3. [Validate Microservices](#validate-microservices)
+4. [Conclusion](#conclusion)

-```bash
-git clone https://github.com/opea-project/GenAIComps.git
-cd GenAIComps
-```
+## AudioQnA Quick Start Deployment

-### 2. Build ASR Image
+This section describes how to quickly deploy and test the AudioQnA service manually on an Intel® Xeon® processor. The basic steps are:

-```bash
-docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
-```
+1. [Access the Code](#access-the-code)
+2. [Configure the Deployment Environment](#configure-the-deployment-environment)
+3. [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose)
+4. [Check the Deployment Status](#check-the-deployment-status)
+5. [Validate the Pipeline](#validate-the-pipeline)
+6. [Cleanup the Deployment](#cleanup-the-deployment)

-### 3. Build vLLM Image
+### Access the Code

-```bash
-git clone https://github.com/vllm-project/vllm.git
-cd ./vllm/
-VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )"
-git checkout ${VLLM_VER}
-docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.cpu -t opea/vllm:latest --shm-size=128g .
-```
-
-### 4. Build TTS Image
-
-```bash
-docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile .
-
-# multilang tts (optional)
-docker build -t opea/gpt-sovits:latest --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -f comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile .
-```
-
-### 5. Build MegaService Docker Image
-
-To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
+Clone the GenAIExample repository and access the AudioQnA Intel® Xeon® platform Docker Compose files and supporting scripts:

 ```bash
 git clone https://github.com/opea-project/GenAIExamples.git
-cd GenAIExamples/AudioQnA/
-docker build --no-cache -t opea/audioqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+cd GenAIExamples/AudioQnA
 ```

-Then run the command `docker images`, you will have following images ready:
-
-1. `opea/whisper:latest`
-2. `opea/vllm:latest`
-3. `opea/speecht5:latest`
-4. `opea/audioqna:latest`
-5. `opea/gpt-sovits:latest` (optional)
-
-## 🚀 Set the environment variables
-
-Before starting the services with `docker compose`, you have to recheck the following environment variables.
+Then checkout a released version, such as v1.2:

 ```bash
-export host_ip=<your External Public IP>    # export host_ip=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=<your HF token>
-
-export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
-
-export MEGA_SERVICE_HOST_IP=${host_ip}
-export WHISPER_SERVER_HOST_IP=${host_ip}
-export SPEECHT5_SERVER_HOST_IP=${host_ip}
-export LLM_SERVER_HOST_IP=${host_ip}
-export GPT_SOVITS_SERVER_HOST_IP=${host_ip}
-
-export WHISPER_SERVER_PORT=7066
-export SPEECHT5_SERVER_PORT=7055
-export GPT_SOVITS_SERVER_PORT=9880
-export LLM_SERVER_PORT=3006
-
-export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna
+git checkout v1.2
 ```

-or use set_env.sh file to setup environment variables.
+### Configure the Deployment Environment

-Note:
-
- Please replace with host_ip with your external IP address, do not use localhost.
- If you are in a proxy environment, also set the proxy-related environment variables:
-
-```
-export http_proxy="Your_HTTP_Proxy"
-export https_proxy="Your_HTTPs_Proxy"
-# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-export no_proxy="Your_No_Proxy",${host_ip},whisper-service,speecht5-service,gpt-sovits-service,tgi-service,vllm-service,audioqna-xeon-backend-server,audioqna-xeon-ui-server
-```
-
-## 🚀 Start the MegaService
+To set up environment variables for deploying AudioQnA services, set up some parameters specific to the deployment environment and source the `set_env.sh` script in this directory:

 ```bash
-cd GenAIExamples/AudioQnA/docker_compose/intel/cpu/xeon/
+export host_ip="External_Public_IP"           # ip address of the node
+export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token"
+export http_proxy="Your_HTTP_Proxy"           # http proxy if any
+export https_proxy="Your_HTTPs_Proxy"         # https proxy if any
+export no_proxy=localhost,127.0.0.1,$host_ip,whisper-service,speecht5-service,vllm-service,tgi-service,audioqna-xeon-backend-server,audioqna-xeon-ui-server  # additional no proxies if needed
+export NGINX_PORT=${your_nginx_port}          # your usable port for nginx, 80 for example
+source ./set_env.sh
 ```

-If use vLLM as the LLM serving backend:
+Consult the section on [AudioQnA Service configuration](#audioqna-configuration) for information on how service specific configuration parameters affect deployments.

-```
-docker compose up -d
+### Deploy the Services Using Docker Compose

-# multilang tts (optional)
-docker compose -f compose_multilang.yaml up -d
+To deploy the AudioQnA services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute the command below. It uses the 'compose.yaml' file.
+
+```bash
+cd docker_compose/intel/cpu/xeon
+docker compose -f compose.yaml up -d
 ```

-If use TGI as the LLM serving backend:
+> **Note**: developers should build docker image from source when:
+>
+> - Developing off the git main branch (as the container's ports in the repo may be different > from the published docker image).
+> - Unable to download the docker image.
+> - Use a specific version of Docker image.

-```
-docker compose -f compose_tgi.yaml up -d
+Please refer to the table below to build different microservices from source:
+
+| Microservice | Deployment Guide                                                                                                                  |
+| ------------ | --------------------------------------------------------------------------------------------------------------------------------- |
+| vLLM         | [vLLM build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/vllm#build-docker)                    |
+| LLM          | [LLM build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/llms)                                                |
+| WHISPER      | [Whisper build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/asr/src#211-whisper-server-image)                |
+| SPEECHT5     | [SpeechT5 build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/tts/src#211-speecht5-server-image)              |
+| GPT-SOVITS   | [GPT-SOVITS build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/gpt-sovits/src#build-the-image) |
+| MegaService  | [MegaService build guide](../../../../README_miscellaneous.md#build-megaservice-docker-image)                                     |
+| UI           | [Basic UI build guide](../../../../README_miscellaneous.md#build-ui-docker-image)                                                 |
+
+### Check the Deployment Status
+
+After running docker compose, check if all the containers launched via docker compose have started:
+
+```bash
+docker ps -a
 ```

-## 🚀 Test MicroServices
+For the default deployment, the following 5 containers should have started:
+
+```
+1c67e44c39d2   opea/audioqna-ui:latest   "docker-entrypoint.s…"   About a minute ago   Up About a minute             0.0.0.0:5173->5173/tcp, :::5173->5173/tcp   audioqna-xeon-ui-server
+833a42677247   opea/audioqna:latest      "python audioqna.py"     About a minute ago   Up About a minute             0.0.0.0:3008->8888/tcp, :::3008->8888/tcp   audioqna-xeon-backend-server
+5dc4eb9bf499   opea/speecht5:latest      "python speecht5_ser…"   About a minute ago   Up About a minute             0.0.0.0:7055->7055/tcp, :::7055->7055/tcp   speecht5-service
+814e6efb1166   opea/vllm:latest          "python3 -m vllm.ent…"   About a minute ago   Up About a minute (healthy)   0.0.0.0:3006->80/tcp, :::3006->80/tcp       vllm-service
+46f7a00f4612   opea/whisper:latest       "python whisper_serv…"   About a minute ago   Up About a minute             0.0.0.0:7066->7066/tcp, :::7066->7066/tcp   whisper-service
+```
+
+If any issues are encountered during deployment, refer to the [Troubleshooting](../../../../README_miscellaneous.md#troubleshooting) section.
+
+### Validate the Pipeline
+
+Once the AudioQnA services are running, test the pipeline using the following command:
+
+```bash
+# Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the base64 string to the megaservice endpoint.
+# The megaservice will return a spoken response as a base64 string. To listen to the response, decode the base64 string and save it as a .wav file.
+wget https://github.com/intel/intel-extension-for-transformers/raw/refs/heads/main/intel_extension_for_transformers/neural_chat/assets/audio/sample_2.wav
+base64_audio=$(base64 -w 0 sample_2.wav)
+
+# if you are using speecht5 as the tts service, voice can be "default" or "male"
+# if you are using gpt-sovits for the tts service, you can set the reference audio following https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/gpt-sovits/src/README.md
+
+curl http://${host_ip}:3008/v1/audioqna \
+  -X POST \
+  -H "Content-Type: application/json" \
+  -d "{\"audio\": \"${base64_audio}\", \"max_tokens\": 64, \"voice\": \"default\"}" \
+  | sed 's/^"//;s/"$//' | base64 -d > output.wav
+```
+
+**Note** : Access the AudioQnA UI by web browser through this URL: `http://${host_ip}:5173`. Please confirm the `5173` port is opened in the firewall. To validate each microservice used in the pipeline refer to the [Validate Microservices](#validate-microservices) section.
+
+### Cleanup the Deployment
+
+To stop the containers associated with the deployment, execute the following command:
+
+```bash
+docker compose -f compose.yaml down
+```
+
+## AudioQnA Docker Compose Files
+
+In the context of deploying an AudioQnA pipeline on an Intel® Xeon® platform, we can pick and choose different large language model serving frameworks, or single English TTS/multi-language TTS component. The table below outlines the various configurations that are available as part of the application. These configurations can be used as templates and can be extended to different components available in [GenAIComps](https://github.com/opea-project/GenAIComps.git).
+
+| File                                               | Description                                                                               |
+| -------------------------------------------------- | ----------------------------------------------------------------------------------------- |
+| [compose.yaml](./compose.yaml)                     | Default compose file using vllm as serving framework and redis as vector database         |
+| [compose_tgi.yaml](./compose_tgi.yaml)             | The LLM serving framework is TGI. All other configurations remain the same as the default |
+| [compose_multilang.yaml](./compose_multilang.yaml) | The TTS component is GPT-SoVITS. All other configurations remain the same as the default  |
+
+## Validate MicroServices

 1. Whisper Service

@@ -161,7 +184,7 @@ docker compose -f compose_tgi.yaml up -d

 3. TTS Service

-   ```
+   ```bash
   # speecht5 service
   curl http://${host_ip}:${SPEECHT5_SERVER_PORT}/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3

@@ -169,17 +192,6 @@ docker compose -f compose_tgi.yaml up -d
   curl http://${host_ip}:${GPT_SOVITS_SERVER_PORT}/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3
   ```

-## 🚀 Test MegaService
+## Conclusion

-Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the
-base64 string to the megaservice endpoint. The megaservice will return a spoken response as a base64 string. To listen
-to the response, decode the base64 string and save it as a .wav file.
-
-```bash
-# if you are using speecht5 as the tts service, voice can be "default" or "male"
-# if you are using gpt-sovits for the tts service, you can set the reference audio following https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/integrations/dependency/gpt-sovits/README.md
-curl http://${host_ip}:3008/v1/audioqna \
-  -X POST \
-  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64, "voice":"default"}' \
-  -H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
-```
+This guide should enable developers to deploy the default configuration or any of the other compose yaml files for different configurations. It also highlights the configurable parameters that can be set before deployment.
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -36,7 +36,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
      LLM_SERVER_PORT: ${LLM_SERVER_PORT}
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
@@ -24,6 +24,9 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
+      llm_download: ${llm_download:-True}
+    # volumes:
+    #  - ./pretrained_models/:/home/user/GPT-SoVITS/GPT_SoVITS/pretrained_models/
    restart: unless-stopped
  vllm-service:
    image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
@@ -37,7 +40,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
      LLM_SERVER_PORT: ${LLM_SERVER_PORT}
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
@@ -36,7 +36,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LLM_SERVER_PORT: ${LLM_SERVER_PORT}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"]
--- a/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -5,7 +5,7 @@

 # export host_ip=<your External Public IP>
 export host_ip=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 # <token>

 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
@@ -14,7 +14,8 @@ export MEGA_SERVICE_HOST_IP=${host_ip}
 export WHISPER_SERVER_HOST_IP=${host_ip}
 export SPEECHT5_SERVER_HOST_IP=${host_ip}
 export LLM_SERVER_HOST_IP=${host_ip}
-
+export GPT_SOVITS_SERVER_HOST_IP=${host_ip}
+export GPT_SOVITS_SERVER_PORT=9880
 export WHISPER_SERVER_PORT=7066
 export SPEECHT5_SERVER_PORT=7055
 export LLM_SERVER_PORT=3006
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -1,145 +1,170 @@
-# Build Mega Service of AudioQnA on Gaudi
+# Deploying AudioQnA on Intel® Gaudi® Processors

-This document outlines the deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server.
-
-The default pipeline deploys with vLLM as the LLM serving component. It also provides options of using TGI backend for LLM microservice, please refer to [Start the MegaService](#-start-the-megaservice) section in this page.
+This document outlines the single node deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservices on Intel Gaudi server. The steps include pulling Docker images, container deployment via Docker Compose, and service execution using microservices `llm`.

 Note: The default LLM is `meta-llama/Meta-Llama-3-8B-Instruct`. Before deploying the application, please make sure either you've requested and been granted the access to it on [Huggingface](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) or you've downloaded the model locally from [ModelScope](https://www.modelscope.cn/models).

-## 🚀 Build Docker images
+## Table of Contents

-### 1. Source Code install GenAIComps
+1. [AudioQnA Quick Start Deployment](#audioqna-quick-start-deployment)
+2. [AudioQnA Docker Compose Files](#audioqna-docker-compose-files)
+3. [Validate Microservices](#validate-microservices)
+4. [Conclusion](#conclusion)

-```bash
-git clone https://github.com/opea-project/GenAIComps.git
-cd GenAIComps
-```
+## AudioQnA Quick Start Deployment

-### 2. Build ASR Image
+This section describes how to quickly deploy and test the AudioQnA service manually on an Intel® Gaudi® processor. The basic steps are:

-```bash
-docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu .
-```
+1. [Access the Code](#access-the-code)
+2. [Configure the Deployment Environment](#configure-the-deployment-environment)
+3. [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose)
+4. [Check the Deployment Status](#check-the-deployment-status)
+5. [Validate the Pipeline](#validate-the-pipeline)
+6. [Cleanup the Deployment](#cleanup-the-deployment)

-### 3. Build vLLM Image
+### Access the Code

-git clone https://github.com/HabanaAI/vllm-fork.git
-cd vllm-fork/
-VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
-git checkout ${VLLM_VER}
-docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g .
-
-### 4. Build TTS Image
-
-```bash
-docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu .
-```
-
-### 5. Build MegaService Docker Image
-
-To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
+Clone the GenAIExample repository and access the AudioQnA Intel® Gaudi® platform Docker Compose files and supporting scripts:

 ```bash
 git clone https://github.com/opea-project/GenAIExamples.git
-cd GenAIExamples/AudioQnA/
-docker build --no-cache -t opea/audioqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+cd GenAIExamples/AudioQnA
 ```

-Then run the command `docker images`, you will have following images ready:
-
-1. `opea/whisper-gaudi:latest`
-2. `opea/vllm-gaudi:latest`
-3. `opea/speecht5-gaudi:latest`
-4. `opea/audioqna:latest`
-
-## 🚀 Set the environment variables
-
-Before starting the services with `docker compose`, you have to recheck the following environment variables.
+Then checkout a released version, such as v1.2:

 ```bash
-export host_ip=<your External Public IP>    # export host_ip=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=<your HF token>
-
-export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
-# set vLLM parameters
-export NUM_CARDS=1
-export BLOCK_SIZE=128
-export MAX_NUM_SEQS=256
-export MAX_SEQ_LEN_TO_CAPTURE=2048
-
-export MEGA_SERVICE_HOST_IP=${host_ip}
-export WHISPER_SERVER_HOST_IP=${host_ip}
-export SPEECHT5_SERVER_HOST_IP=${host_ip}
-export LLM_SERVER_HOST_IP=${host_ip}
-
-export WHISPER_SERVER_PORT=7066
-export SPEECHT5_SERVER_PORT=7055
-export LLM_SERVER_PORT=3006
-
-export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna
+git checkout v1.2
 ```

-or use set_env.sh file to setup environment variables.
+### Configure the Deployment Environment

-Note:
-
- Please replace with host_ip with your external IP address, do not use localhost.
- If you are in a proxy environment, also set the proxy-related environment variables:
-
-```
-export http_proxy="Your_HTTP_Proxy"
-export https_proxy="Your_HTTPs_Proxy"
-# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-export no_proxy="Your_No_Proxy",${host_ip},whisper-service,speecht5-service,tgi-service,vllm-service,audioqna-gaudi-backend-server,audioqna-gaudi-ui-server
-```
-
-## 🚀 Start the MegaService
-
-> **_NOTE:_** Users will need at least three Gaudi cards for AudioQnA.
+To set up environment variables for deploying AudioQnA services, set up some parameters specific to the deployment environment and source the `set_env.sh` script in this directory:

 ```bash
-cd GenAIExamples/AudioQnA/docker_compose/intel/hpu/gaudi/
+export host_ip="External_Public_IP"           # ip address of the node
+export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token"
+export http_proxy="Your_HTTP_Proxy"           # http proxy if any
+export https_proxy="Your_HTTPs_Proxy"         # https proxy if any
+export no_proxy=localhost,127.0.0.1,$host_ip,whisper-service,speecht5-service,vllm-service,tgi-service,audioqna-gaudi-backend-server,audioqna-gaudi-ui-server  # additional no proxies if needed
+export NGINX_PORT=${your_nginx_port}          # your usable port for nginx, 80 for example
+source ./set_env.sh
 ```

-If use vLLM as the LLM serving backend:
+Consult the section on [AudioQnA Service configuration](#audioqna-configuration) for information on how service specific configuration parameters affect deployments.

-```
-docker compose up -d
+### Deploy the Services Using Docker Compose
+
+To deploy the AudioQnA services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute the command below. It uses the 'compose.yaml' file.
+
+```bash
+cd docker_compose/intel/hpu/gaudi
+docker compose -f compose.yaml up -d
 ```

-If use TGI as the LLM serving backend:
+> **Note**: developers should build docker image from source when:
+>
+> - Developing off the git main branch (as the container's ports in the repo may be different > from the published docker image).
+> - Unable to download the docker image.
+> - Use a specific version of Docker image.

-```
-docker compose -f compose_tgi.yaml up -d
+Please refer to the table below to build different microservices from source:
+
+| Microservice | Deployment Guide                                                                                                     |
+| ------------ | -------------------------------------------------------------------------------------------------------------------- |
+| vLLM-gaudi   | [vLLM build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/vllm#build-docker-1)     |
+| LLM          | [LLM build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/llms)                                   |
+| WHISPER      | [Whisper build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/asr/src#211-whisper-server-image)   |
+| SPEECHT5     | [SpeechT5 build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/tts/src#211-speecht5-server-image) |
+| MegaService  | [MegaService build guide](../../../../README_miscellaneous.md#build-megaservice-docker-image)                        |
+| UI           | [Basic UI build guide](../../../../README_miscellaneous.md#build-ui-docker-image)                                    |
+
+### Check the Deployment Status
+
+After running docker compose, check if all the containers launched via docker compose have started:
+
+```bash
+docker ps -a
 ```

-## 🚀 Test MicroServices
+For the default deployment, the following 5 containers should have started:
+
+```
+23f27dab14a5   opea/whisper-gaudi:latest                                                                   "python whisper_serv…"   18 minutes ago   Up 18 minutes             0.0.0.0:7066->7066/tcp, :::7066->7066/tcp                                              whisper-service
+629da06b7fb2   opea/audioqna-ui:latest                                                                     "docker-entrypoint.s…"   19 minutes ago   Up 18 minutes             0.0.0.0:5173->5173/tcp, :::5173->5173/tcp                                              audioqna-gaudi-ui-server
+8a74d9806b87   opea/audioqna:latest                                                                        "python audioqna.py"     19 minutes ago   Up 18 minutes             0.0.0.0:3008->8888/tcp, [::]:3008->8888/tcp                                            audioqna-gaudi-backend-server
+29324430f42e   opea/vllm-gaudi:latest                                                                      "python3 -m vllm.ent…"   19 minutes ago   Up 19 minutes (healthy)   0.0.0.0:3006->80/tcp, [::]:3006->80/tcp                                                vllm-gaudi-service
+dbd585f0a95a   opea/speecht5-gaudi:latest                                                                  "python speecht5_ser…"   19 minutes ago   Up 19 minutes             0.0.0.0:7055->7055/tcp, :::7055->7055/tcp                                              speecht5-service
+```
+
+If any issues are encountered during deployment, refer to the [Troubleshooting](../../../../README_miscellaneous.md#troubleshooting) section.
+
+### Validate the Pipeline
+
+Once the AudioQnA services are running, test the pipeline using the following command:
+
+```bash
+# Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the base64 string to the megaservice endpoint.
+# The megaservice will return a spoken response as a base64 string. To listen to the response, decode the base64 string and save it as a .wav file.
+wget https://github.com/intel/intel-extension-for-transformers/raw/refs/heads/main/intel_extension_for_transformers/neural_chat/assets/audio/sample_2.wav
+base64_audio=$(base64 -w 0 sample_2.wav)
+
+# if you are using speecht5 as the tts service, voice can be "default" or "male"
+
+curl http://${host_ip}:3008/v1/audioqna \
+  -X POST \
+  -H "Content-Type: application/json" \
+  -d "{\"audio\": \"${base64_audio}\", \"max_tokens\": 64, \"voice\": \"default\"}" \
+  | sed 's/^"//;s/"$//' | base64 -d > output.wav
+```
+
+**Note** : Access the AudioQnA UI by web browser through this URL: `http://${host_ip}:5173`. Please confirm the `5173` port is opened in the firewall. To validate each microservice used in the pipeline refer to the [Validate Microservices](#validate-microservices) section.
+
+### Cleanup the Deployment
+
+To stop the containers associated with the deployment, execute the following command:
+
+```bash
+docker compose -f compose.yaml down
+```
+
+## AudioQnA Docker Compose Files
+
+In the context of deploying an AudioQnA pipeline on an Intel® Gaudi® platform, we can pick and choose different large language model serving frameworks. The table below outlines the various configurations that are available as part of the application. These configurations can be used as templates and can be extended to different components available in [GenAIComps](https://github.com/opea-project/GenAIComps.git).
+
+| File                                   | Description                                                                               |
+| -------------------------------------- | ----------------------------------------------------------------------------------------- |
+| [compose.yaml](./compose.yaml)         | Default compose file using vllm as serving framework and redis as vector database         |
+| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as the default |
+
+## Validate MicroServices

 1. Whisper Service

   ```bash
-   curl http://${host_ip}:${WHISPER_SERVER_PORT}/v1/asr \
-     -X POST \
-     -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-     -H 'Content-Type: application/json'
+   wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav
+   curl http://${host_ip}:${WHISPER_SERVER_PORT}/v1/audio/transcriptions \
+     -H "Content-Type: multipart/form-data" \
+     -F file="@./sample.wav" \
+     -F model="openai/whisper-small"
   ```

 2. LLM backend Service

-   In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready and the container (`vllm-gaudi-service` or `tgi-gaudi-service`) status shown via `docker ps` will be `healthy`. Before that, the status will be `health: starting`.
+   In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready and the container (`vllm-service` or `tgi-service`) status shown via `docker ps` will be `healthy`. Before that, the status will be `health: starting`.

   Or try the command below to check whether the LLM serving is ready.

   ```bash
   # vLLM service
-   docker logs vllm-gaudi-service 2>&1 | grep complete
+   docker logs vllm-service 2>&1 | grep complete
   # If the service is ready, you will get the response like below.
   INFO:     Application startup complete.
   ```

   ```bash
   # TGI service
-   docker logs tgi-gaudi-service | grep Connected
+   docker logs tgi-service | grep Connected
   # If the service is ready, you will get the response like below.
   2024-09-03T02:47:53.402023Z  INFO text_generation_router::server: router/src/server.rs:2311: Connected
   ```
@@ -156,24 +181,11 @@ docker compose -f compose_tgi.yaml up -d

 3. TTS Service

-   ```
+   ```bash
   # speecht5 service
-   curl http://${host_ip}:${SPEECHT5_SERVER_PORT}/v1/tts
-     -X POST \
-     -d '{"text": "Who are you?"}' \
-     -H 'Content-Type: application/json'
+   curl http://${host_ip}:${SPEECHT5_SERVER_PORT}/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3
   ```

-## 🚀 Test MegaService
+## Conclusion

-Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the
-base64 string to the megaservice endpoint. The megaservice will return a spoken response as a base64 string. To listen
-to the response, decode the base64 string and save it as a .wav file.
-
-```bash
-# voice can be "default" or "male"
-curl http://${host_ip}:3008/v1/audioqna \
-  -X POST \
-  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64, "voice":"default"}' \
-  -H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
-```
+This guide should enable developers to deploy the default configuration or any of the other compose yaml files for different configurations. It also highlights the configurable parameters that can be set before deployment.
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -45,7 +45,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
@@ -62,7 +62,7 @@ services:
    cap_add:
      - SYS_NICE
    ipc: host
-    command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq_len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE}
+    command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq-len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE}
  audioqna-gaudi-backend-server:
    image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
    container_name: audioqna-gaudi-backend-server
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
@@ -45,7 +45,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -5,7 +5,7 @@

 # export host_ip=<your External Public IP>
 export host_ip=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 # <token>

 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
--- a/AudioQnA/docker_image_build/build.yaml
+++ b/AudioQnA/docker_image_build/build.yaml
@@ -5,6 +5,8 @@ services:
  audioqna:
    build:
      args:
+        IMAGE_REPO: ${REGISTRY}
+        BASE_TAG: ${TAG}
        http_proxy: ${http_proxy}
        https_proxy: ${https_proxy}
        no_proxy: ${no_proxy}
@@ -26,13 +28,13 @@ services:
  whisper-gaudi:
    build:
      context: GenAIComps
-      dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu
+      dockerfile: comps/third_parties/whisper/src/Dockerfile.intel_hpu
    extends: audioqna
    image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
  whisper:
    build:
      context: GenAIComps
-      dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile
+      dockerfile: comps/third_parties/whisper/src/Dockerfile
    extends: audioqna
    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
  asr:
@@ -50,13 +52,13 @@ services:
  speecht5-gaudi:
    build:
      context: GenAIComps
-      dockerfile: comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu
+      dockerfile: comps/third_parties/speecht5/src/Dockerfile.intel_hpu
    extends: audioqna
    image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
  speecht5:
    build:
      context: GenAIComps
-      dockerfile: comps/tts/src/integrations/dependency/speecht5/Dockerfile
+      dockerfile: comps/third_parties/speecht5/src/Dockerfile
    extends: audioqna
    image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
  tts:
@@ -68,13 +70,13 @@ services:
  gpt-sovits:
    build:
      context: GenAIComps
-      dockerfile: comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile
+      dockerfile: comps/third_parties/gpt-sovits/src/Dockerfile
    extends: audioqna
    image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
  vllm:
    build:
      context: vllm
-      dockerfile: Dockerfile.cpu
+      dockerfile: docker/Dockerfile.cpu
    extends: audioqna
    image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
  vllm-gaudi:
@@ -85,10 +87,7 @@ services:
    image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
  vllm-rocm:
    build:
-      args:
-        http_proxy: ${http_proxy}
-        https_proxy: ${https_proxy}
-        no_proxy: ${no_proxy}
      context: GenAIComps
      dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu
+    extends: audioqna
    image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
--- a/AudioQnA/kubernetes/helm/cpu-multilang-values.yaml
+++ b/AudioQnA/kubernetes/helm/cpu-multilang-values.yaml
@@ -0,0 +1,15 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+tgi:
+  enabled: false
+vllm:
+  enabled: true
+
+speecht5:
+  enabled: false
+gpt-sovits:
+  enabled: true
+
+image:
+  repository: opea/audioqna-multilang
--- a/AudioQnA/kubernetes/helm/cpu-tgi-values.yaml
+++ b/AudioQnA/kubernetes/helm/cpu-tgi-values.yaml
@@ -0,0 +1,12 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+tgi:
+  enabled: true
+vllm:
+  enabled: false
+
+speecht5:
+  enabled: true
+gpt-sovits:
+  enabled: false
--- a/AudioQnA/kubernetes/helm/cpu-values.yaml
+++ b/AudioQnA/kubernetes/helm/cpu-values.yaml
@@ -2,4 +2,11 @@
 # SPDX-License-Identifier: Apache-2.0

 tgi:
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  enabled: false
+vllm:
+  enabled: true
+
+speecht5:
+  enabled: true
+gpt-sovits:
+  enabled: false
--- a/AudioQnA/kubernetes/helm/gaudi-tgi-values.yaml
+++ b/AudioQnA/kubernetes/helm/gaudi-tgi-values.yaml
@@ -0,0 +1,49 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+tgi:
+  enabled: true
+  accelDevice: "gaudi"
+  image:
+    repository: ghcr.io/huggingface/tgi-gaudi
+    tag: "2.3.1"
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
+  CUDA_GRAPHS: ""
+  HF_HUB_DISABLE_PROGRESS_BARS: 1
+  HF_HUB_ENABLE_HF_TRANSFER: 0
+  ENABLE_HPU_GRAPH: true
+  LIMIT_HPU_GRAPH: true
+  USE_FLASH_ATTENTION: true
+  FLASH_ATTENTION_RECOMPUTE: true
+  readinessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+  startupProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+    failureThreshold: 120
+vllm:
+  enabled: false
+
+whisper:
+  image:
+    repository: opea/whisper-gaudi
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+
+speecht5:
+  enabled: true
+  image:
+    repository: opea/speecht5-gaudi
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+gpt-sovits:
+  enabled: false
--- a/AudioQnA/kubernetes/helm/gaudi-values.yaml
+++ b/AudioQnA/kubernetes/helm/gaudi-values.yaml
@@ -2,35 +2,27 @@
 # SPDX-License-Identifier: Apache-2.0

 tgi:
+  enabled: false
+vllm:
+  enabled: true
  accelDevice: "gaudi"
  image:
-    repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.3.1"
+    repository: opea/vllm-gaudi
+  startupProbe:
+    failureThreshold: 360
+
+  PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
+  OMPI_MCA_btl_vader_single_copy_mechanism: "none"
+
  resources:
    limits:
      habana.ai/gaudi: 1
-  MAX_INPUT_LENGTH: "1024"
-  MAX_TOTAL_TOKENS: "2048"
-  CUDA_GRAPHS: ""
-  HF_HUB_DISABLE_PROGRESS_BARS: 1
-  HF_HUB_ENABLE_HF_TRANSFER: 0
-  ENABLE_HPU_GRAPH: true
-  LIMIT_HPU_GRAPH: true
-  USE_FLASH_ATTENTION: true
-  FLASH_ATTENTION_RECOMPUTE: true
-  livenessProbe:
-    initialDelaySeconds: 5
-    periodSeconds: 5
-    timeoutSeconds: 1
-  readinessProbe:
-    initialDelaySeconds: 5
-    periodSeconds: 5
-    timeoutSeconds: 1
-  startupProbe:
-    initialDelaySeconds: 5
-    periodSeconds: 5
-    timeoutSeconds: 1
-    failureThreshold: 120
+  extraCmdArgs: [
+    "--tensor-parallel-size", "1",
+    "--block-size", "128",
+    "--max-num-seqs", "256",
+    "--max-seq-len-to-capture", "2048"
+  ]

 whisper:
  image:
@@ -40,8 +32,11 @@ whisper:
      habana.ai/gaudi: 1

 speecht5:
+  enabled: true
  image:
    repository: opea/speecht5-gaudi
  resources:
    limits:
      habana.ai/gaudi: 1
+gpt-sovits:
+  enabled: false
--- a/AudioQnA/tests/README.md
+++ b/AudioQnA/tests/README.md
@@ -0,0 +1,45 @@
+# AudioQnA E2E test scripts
+
+## Set the required environment variable
+
+```bash
+export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+```
+
+## Run test
+
+On Intel Xeon with TGI:
+
+```bash
+bash test_compose_tgi_on_xeon.sh
+```
+
+On Intel Xeon with vLLM:
+
+```bash
+bash test_compose_on_xeon.sh
+```
+
+On Intel Gaudi with TGI:
+
+```bash
+bash test_compose_tgi_on_gaudi.sh
+```
+
+On Intel Gaudi with vLLM:
+
+```bash
+bash test_compose_on_gaudi.sh
+```
+
+On AMD ROCm with TGI:
+
+```bash
+bash test_compose_on_rocm.sh
+```
+
+On AMD ROCm with vLLM:
+
+```bash
+bash test_compose_vllm_on_rocm.sh
+```
--- a/AudioQnA/tests/test_compose_multilang_on_xeon.sh
+++ b/AudioQnA/tests/test_compose_multilang_on_xeon.sh
@@ -17,23 +17,17 @@ ip_address=$(hostname -I | awk '{print $1}')

 function build_docker_images() {
    opea_branch=${opea_branch:-"main"}
-    # If the opea_branch isn't main, replace the git clone branch in Dockerfile.
-    if [[ "${opea_branch}" != "main" ]]; then
-        cd $WORKPATH
-        OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
-        NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
-        find . -type f -name "Dockerfile*" | while read -r file; do
-            echo "Processing file: $file"
-            sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
-        done
-    fi

    cd $WORKPATH/docker_image_build
    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
+    pushd GenAIComps
+    echo "GenAIComps test commit is $(git rev-parse HEAD)"
+    docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+    popd && sleep 1s

    git clone https://github.com/vllm-project/vllm.git
    cd ./vllm/
-    VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )"
+    VLLM_VER="v0.8.3"
    echo "Check out vLLM tag ${VLLM_VER}"
    git checkout ${VLLM_VER} &> /dev/null && cd ../

@@ -46,21 +40,8 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon/
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
-
-    export MEGA_SERVICE_HOST_IP=${ip_address}
-    export WHISPER_SERVER_HOST_IP=${ip_address}
-    export GPT_SOVITS_SERVER_HOST_IP=${ip_address}
-    export LLM_SERVER_HOST_IP=${ip_address}
-
-    export WHISPER_SERVER_PORT=7066
-    export GPT_SOVITS_SERVER_PORT=9880
-    export LLM_SERVER_PORT=3006
-
-    export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
    export host_ip=${ip_address}
-
+    source set_env.sh
    # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

    # Start Docker Containers
@@ -103,14 +84,26 @@ function stop_docker() {

 function main() {

+    echo "::group::stop_docker"
    stop_docker
+    echo "::endgroup::"
+
+    echo "::group::build_docker_images"
    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
+    echo "::endgroup::"
+
+    echo "::group::start_services"
    start_services
+    echo "::endgroup::"

+    echo "::group::validate_megaservice"
    validate_megaservice
+    echo "::endgroup::"

+    echo "::group::stop_docker"
    stop_docker
-    echo y | docker system prune
+    docker system prune -f
+    echo "::endgroup::"

 }

--- a/AudioQnA/tests/test_compose_on_gaudi.sh
+++ b/AudioQnA/tests/test_compose_on_gaudi.sh
@@ -17,25 +17,19 @@ ip_address=$(hostname -I | awk '{print $1}')

 function build_docker_images() {
    opea_branch=${opea_branch:-"main"}
-    # If the opea_branch isn't main, replace the git clone branch in Dockerfile.
-    if [[ "${opea_branch}" != "main" ]]; then
-        cd $WORKPATH
-        OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
-        NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
-        find . -type f -name "Dockerfile*" | while read -r file; do
-            echo "Processing file: $file"
-            sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
-        done
-    fi

    cd $WORKPATH/docker_image_build
    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
+    pushd GenAIComps
+    echo "GenAIComps test commit is $(git rev-parse HEAD)"
+    docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+    popd && sleep 1s

    git clone https://github.com/HabanaAI/vllm-fork.git
    cd vllm-fork/
-    VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
-    echo "Check out vLLM tag ${VLLM_VER}"
-    git checkout ${VLLM_VER} &> /dev/null && cd ../
+    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    echo "Check out vLLM tag ${VLLM_FORK_VER}"
+    git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../

    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
    service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi vllm-gaudi"
@@ -46,24 +40,8 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/intel/hpu/gaudi
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
-    export NUM_CARDS=1
-    export BLOCK_SIZE=128
-    export MAX_NUM_SEQS=256
-    export MAX_SEQ_LEN_TO_CAPTURE=2048
-
-    export MEGA_SERVICE_HOST_IP=${ip_address}
-    export WHISPER_SERVER_HOST_IP=${ip_address}
-    export SPEECHT5_SERVER_HOST_IP=${ip_address}
-    export LLM_SERVER_HOST_IP=${ip_address}
-
-    export WHISPER_SERVER_PORT=7066
-    export SPEECHT5_SERVER_PORT=7055
-    export LLM_SERVER_PORT=3006
-
-    export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
    export host_ip=${ip_address}
+    source set_env.sh
    # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

    # Start Docker Containers
@@ -105,34 +83,8 @@ function validate_megaservice() {
        echo "Result wrong."
        exit 1
    fi
-
 }

-#function validate_frontend() {
-#    cd $WORKPATH/ui/svelte
-#    local conda_env_name="OPEA_e2e"
-#    export PATH=${HOME}/miniforge3/bin/:$PATH
-##    conda remove -n ${conda_env_name} --all -y
-##    conda create -n ${conda_env_name} python=3.12 -y
-#    source activate ${conda_env_name}
-#
-#    sed -i "s/localhost/$ip_address/g" playwright.config.ts
-#
-##    conda install -c conda-forge nodejs=22.6.0 -y
-#    npm install && npm ci && npx playwright install --with-deps
-#    node -v && npm -v && pip list
-#
-#    exit_status=0
-#    npx playwright test || exit_status=$?
-#
-#    if [ $exit_status -ne 0 ]; then
-#        echo "[TEST INFO]: ---------frontend test failed---------"
-#        exit $exit_status
-#    else
-#        echo "[TEST INFO]: ---------frontend test passed---------"
-#    fi
-#}
-
 function stop_docker() {
    cd $WORKPATH/docker_compose/intel/hpu/gaudi
    docker compose -f compose.yaml stop && docker compose rm -f
@@ -140,15 +92,26 @@ function stop_docker() {

 function main() {

+    echo "::group::stop_docker"
    stop_docker
+    echo "::endgroup::"
+
+    echo "::group::build_docker_images"
    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
+    echo "::endgroup::"
+
+    echo "::group::start_services"
    start_services
+    echo "::endgroup::"

+    echo "::group::validate_megaservice"
    validate_megaservice
-    # validate_frontend
+    echo "::endgroup::"

+    echo "::group::stop_docker"
    stop_docker
-    echo y | docker system prune
+    docker system prune -f
+    echo "::endgroup::"

 }

--- a/AudioQnA/tests/test_compose_on_rocm.sh
+++ b/AudioQnA/tests/test_compose_on_rocm.sh
@@ -9,6 +9,7 @@ echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
 echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
 export REGISTRY=${IMAGE_REPO}
 export TAG=${IMAGE_TAG}
+export MODEL_CACHE=${model_cache:-"/var/lib/GenAI/data"}

 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
@@ -17,46 +18,24 @@ export PATH="~/miniconda3/bin:$PATH"

 function build_docker_images() {
    opea_branch=${opea_branch:-"main"}
-    # If the opea_branch isn't main, replace the git clone branch in Dockerfile.
-    if [[ "${opea_branch}" != "main" ]]; then
-        cd $WORKPATH
-        OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
-        NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
-        find . -type f -name "Dockerfile*" | while read -r file; do
-            echo "Processing file: $file"
-            sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
-        done
-    fi

    cd $WORKPATH/docker_image_build
    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
+    pushd GenAIComps
+    echo "GenAIComps test commit is $(git rev-parse HEAD)"
+    docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+    popd && sleep 1s

    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
    service_list="audioqna audioqna-ui whisper speecht5"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
-    echo "docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm"
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
+
    docker images && sleep 1s
 }

 function start_services() {
    cd $WORKPATH/docker_compose/amd/gpu/rocm/
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
-
-    export MEGA_SERVICE_HOST_IP=${ip_address}
-    export WHISPER_SERVER_HOST_IP=${ip_address}
-    export SPEECHT5_SERVER_HOST_IP=${ip_address}
-    export LLM_SERVER_HOST_IP=${ip_address}
-
-    export WHISPER_SERVER_PORT=7066
-    export SPEECHT5_SERVER_PORT=7055
-    export LLM_SERVER_PORT=3006
-
-    export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
-
-    # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
-
+    source set_env.sh
    # Start Docker Containers
    docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
    n=0
@@ -87,32 +66,6 @@ function validate_megaservice() {

 }

-#function validate_frontend() {
-# Frontend tests are currently disabled
-#    cd $WORKPATH/ui/svelte
-#    local conda_env_name="OPEA_e2e"
-#    export PATH=${HOME}/miniforge3/bin/:$PATH
-##    conda remove -n ${conda_env_name} --all -y
-##    conda create -n ${conda_env_name} python=3.12 -y
-#    source activate ${conda_env_name}
-#
-#    sed -i "s/localhost/$ip_address/g" playwright.config.ts
-#
-##    conda install -c conda-forge nodejs -y
-#    npm install && npm ci && npx playwright install --with-deps
-#    node -v && npm -v && pip list
-#
-#    exit_status=0
-#    npx playwright test || exit_status=$?
-#
-#    if [ $exit_status -ne 0 ]; then
-#        echo "[TEST INFO]: ---------frontend test failed---------"
-#        exit $exit_status
-#    else
-#        echo "[TEST INFO]: ---------frontend test passed---------"
-#    fi
-#}
-
 function stop_docker() {
    cd $WORKPATH/docker_compose/amd/gpu/rocm/
    docker compose stop && docker compose rm -f
@@ -120,16 +73,26 @@ function stop_docker() {

 function main() {

+    echo "::group::stop_docker"
    stop_docker
+    echo "::endgroup::"
+
+    echo "::group::build_docker_images"
    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
+    echo "::endgroup::"
+
+    echo "::group::start_services"
    start_services
+    echo "::endgroup::"

+    echo "::group::validate_megaservice"
    validate_megaservice
-    # Frontend tests are currently disabled
-    # validate_frontend
+    echo "::endgroup::"

+    echo "::group::stop_docker"
    stop_docker
-    echo y | docker system prune
+    docker system prune -f
+    echo "::endgroup::"

 }

--- a/AudioQnA/tests/test_compose_on_xeon.sh
+++ b/AudioQnA/tests/test_compose_on_xeon.sh
@@ -17,23 +17,17 @@ ip_address=$(hostname -I | awk '{print $1}')

 function build_docker_images() {
    opea_branch=${opea_branch:-"main"}
-    # If the opea_branch isn't main, replace the git clone branch in Dockerfile.
-    if [[ "${opea_branch}" != "main" ]]; then
-        cd $WORKPATH
-        OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
-        NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
-        find . -type f -name "Dockerfile*" | while read -r file; do
-            echo "Processing file: $file"
-            sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
-        done
-    fi

    cd $WORKPATH/docker_image_build
    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
+    pushd GenAIComps
+    echo "GenAIComps test commit is $(git rev-parse HEAD)"
+    docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+    popd && sleep 1s

    git clone https://github.com/vllm-project/vllm.git
    cd ./vllm/
-    VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )"
+    VLLM_VER="v0.8.3"
    echo "Check out vLLM tag ${VLLM_VER}"
    git checkout ${VLLM_VER} &> /dev/null && cd ../

@@ -46,21 +40,8 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon/
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
-
-    export MEGA_SERVICE_HOST_IP=${ip_address}
-    export WHISPER_SERVER_HOST_IP=${ip_address}
-    export SPEECHT5_SERVER_HOST_IP=${ip_address}
-    export LLM_SERVER_HOST_IP=${ip_address}
-
-    export WHISPER_SERVER_PORT=7066
-    export SPEECHT5_SERVER_PORT=7055
-    export LLM_SERVER_PORT=3006
-
-    export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
    export host_ip=${ip_address}
-
+    source set_env.sh
    # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

    # Start Docker Containers
@@ -95,31 +76,6 @@ function validate_megaservice() {

 }

-#function validate_frontend() {
-#    cd $WORKPATH/ui/svelte
-#    local conda_env_name="OPEA_e2e"
-#    export PATH=${HOME}/miniforge3/bin/:$PATH
-##    conda remove -n ${conda_env_name} --all -y
-##    conda create -n ${conda_env_name} python=3.12 -y
-#    source activate ${conda_env_name}
-#
-#    sed -i "s/localhost/$ip_address/g" playwright.config.ts
-#
-##    conda install -c conda-forge nodejs=22.6.0 -y
-#    npm install && npm ci && npx playwright install --with-deps
-#    node -v && npm -v && pip list
-#
-#    exit_status=0
-#    npx playwright test || exit_status=$?
-#
-#    if [ $exit_status -ne 0 ]; then
-#        echo "[TEST INFO]: ---------frontend test failed---------"
-#        exit $exit_status
-#    else
-#        echo "[TEST INFO]: ---------frontend test passed---------"
-#    fi
-#}
-
 function stop_docker() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon/
    docker compose -f compose.yaml stop && docker compose rm -f
@@ -127,15 +83,26 @@ function stop_docker() {

 function main() {

+    echo "::group::stop_docker"
    stop_docker
+    echo "::endgroup::"
+
+    echo "::group::build_docker_images"
    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
+    echo "::endgroup::"
+
+    echo "::group::start_services"
    start_services
+    echo "::endgroup::"

+    echo "::group::validate_megaservice"
    validate_megaservice
-    # validate_frontend
+    echo "::endgroup::"

+    echo "::group::stop_docker"
    stop_docker
-    echo y | docker system prune
+    docker system prune -f
+    echo "::endgroup::"

 }

--- a/AudioQnA/tests/test_compose_tgi_on_gaudi.sh
+++ b/AudioQnA/tests/test_compose_tgi_on_gaudi.sh
@@ -17,46 +17,25 @@ ip_address=$(hostname -I | awk '{print $1}')

 function build_docker_images() {
    opea_branch=${opea_branch:-"main"}
-    # If the opea_branch isn't main, replace the git clone branch in Dockerfile.
-    if [[ "${opea_branch}" != "main" ]]; then
-        cd $WORKPATH
-        OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
-        NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
-        find . -type f -name "Dockerfile*" | while read -r file; do
-            echo "Processing file: $file"
-            sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
-        done
-    fi

    cd $WORKPATH/docker_image_build
    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
+    pushd GenAIComps
+    echo "GenAIComps test commit is $(git rev-parse HEAD)"
+    docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+    popd && sleep 1s

    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
    service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
    docker images && sleep 1s
 }

 function start_services() {
    cd $WORKPATH/docker_compose/intel/hpu/gaudi
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
-
-    export MEGA_SERVICE_HOST_IP=${ip_address}
-    export WHISPER_SERVER_HOST_IP=${ip_address}
-    export SPEECHT5_SERVER_HOST_IP=${ip_address}
-    export LLM_SERVER_HOST_IP=${ip_address}
-
-    export WHISPER_SERVER_PORT=7066
-    export SPEECHT5_SERVER_PORT=7055
-    export LLM_SERVER_PORT=3006
-
-    export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
    export host_ip=${ip_address}
-    # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
-
+    source set_env.sh
    # Start Docker Containers
    docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
    n=0
@@ -99,31 +78,6 @@ function validate_megaservice() {

 }

-#function validate_frontend() {
-#    cd $WORKPATH/ui/svelte
-#    local conda_env_name="OPEA_e2e"
-#    export PATH=${HOME}/miniforge3/bin/:$PATH
-##    conda remove -n ${conda_env_name} --all -y
-##    conda create -n ${conda_env_name} python=3.12 -y
-#    source activate ${conda_env_name}
-#
-#    sed -i "s/localhost/$ip_address/g" playwright.config.ts
-#
-##    conda install -c conda-forge nodejs=22.6.0 -y
-#    npm install && npm ci && npx playwright install --with-deps
-#    node -v && npm -v && pip list
-#
-#    exit_status=0
-#    npx playwright test || exit_status=$?
-#
-#    if [ $exit_status -ne 0 ]; then
-#        echo "[TEST INFO]: ---------frontend test failed---------"
-#        exit $exit_status
-#    else
-#        echo "[TEST INFO]: ---------frontend test passed---------"
-#    fi
-#}
-
 function stop_docker() {
    cd $WORKPATH/docker_compose/intel/hpu/gaudi
    docker compose -f compose_tgi.yaml stop && docker compose rm -f
@@ -131,15 +85,26 @@ function stop_docker() {

 function main() {

+    echo "::group::stop_docker"
    stop_docker
+    echo "::endgroup::"
+
+    echo "::group::build_docker_images"
    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
+    echo "::endgroup::"
+
+    echo "::group::start_services"
    start_services
+    echo "::endgroup::"

+    echo "::group::validate_megaservice"
    validate_megaservice
-    # validate_frontend
+    echo "::endgroup::"

+    echo "::group::stop_docker"
    stop_docker
-    echo y | docker system prune
+    docker system prune -f
+    echo "::endgroup::"

 }

--- a/AudioQnA/tests/test_compose_tgi_on_xeon.sh
+++ b/AudioQnA/tests/test_compose_tgi_on_xeon.sh
@@ -17,47 +17,25 @@ ip_address=$(hostname -I | awk '{print $1}')

 function build_docker_images() {
    opea_branch=${opea_branch:-"main"}
-    # If the opea_branch isn't main, replace the git clone branch in Dockerfile.
-    if [[ "${opea_branch}" != "main" ]]; then
-        cd $WORKPATH
-        OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
-        NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
-        find . -type f -name "Dockerfile*" | while read -r file; do
-            echo "Processing file: $file"
-            sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
-        done
-    fi

    cd $WORKPATH/docker_image_build
    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
+    pushd GenAIComps
+    echo "GenAIComps test commit is $(git rev-parse HEAD)"
+    docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+    popd && sleep 1s

    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
    service_list="audioqna audioqna-ui whisper speecht5"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    docker images && sleep 1s
 }

 function start_services() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon/
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
-
-    export MEGA_SERVICE_HOST_IP=${ip_address}
-    export WHISPER_SERVER_HOST_IP=${ip_address}
-    export SPEECHT5_SERVER_HOST_IP=${ip_address}
-    export LLM_SERVER_HOST_IP=${ip_address}
-
-    export WHISPER_SERVER_PORT=7066
-    export SPEECHT5_SERVER_PORT=7055
-    export LLM_SERVER_PORT=3006
-
-    export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
    export host_ip=${ip_address}
-
-    # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
-
+    source set_env.sh
    # Start Docker Containers
    docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
    n=0
@@ -90,31 +68,6 @@ function validate_megaservice() {

 }

-#function validate_frontend() {
-#    cd $WORKPATH/ui/svelte
-#    local conda_env_name="OPEA_e2e"
-#    export PATH=${HOME}/miniforge3/bin/:$PATH
-##    conda remove -n ${conda_env_name} --all -y
-##    conda create -n ${conda_env_name} python=3.12 -y
-#    source activate ${conda_env_name}
-#
-#    sed -i "s/localhost/$ip_address/g" playwright.config.ts
-#
-##    conda install -c conda-forge nodejs=22.6.0 -y
-#    npm install && npm ci && npx playwright install --with-deps
-#    node -v && npm -v && pip list
-#
-#    exit_status=0
-#    npx playwright test || exit_status=$?
-#
-#    if [ $exit_status -ne 0 ]; then
-#        echo "[TEST INFO]: ---------frontend test failed---------"
-#        exit $exit_status
-#    else
-#        echo "[TEST INFO]: ---------frontend test passed---------"
-#    fi
-#}
-
 function stop_docker() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon/
    docker compose -f compose_tgi.yaml stop && docker compose rm -f
@@ -122,15 +75,26 @@ function stop_docker() {

 function main() {

+    echo "::group::stop_docker"
    stop_docker
+    echo "::endgroup::"
+
+    echo "::group::build_docker_images"
    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
+    echo "::endgroup::"
+
+    echo "::group::start_services"
    start_services
+    echo "::endgroup::"

+    echo "::group::validate_megaservice"
    validate_megaservice
-    # validate_frontend
+    echo "::endgroup::"

+    echo "::group::stop_docker"
    stop_docker
-    echo y | docker system prune
+    docker system prune -f
+    echo "::endgroup::"

 }

--- a/AudioQnA/tests/test_compose_vllm_on_rocm.sh
+++ b/AudioQnA/tests/test_compose_vllm_on_rocm.sh
@@ -17,19 +17,13 @@ export PATH="~/miniconda3/bin:$PATH"

 function build_docker_images() {
    opea_branch=${opea_branch:-"main"}
-    # If the opea_branch isn't main, replace the git clone branch in Dockerfile.
-    if [[ "${opea_branch}" != "main" ]]; then
-        cd $WORKPATH
-        OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
-        NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
-        find . -type f -name "Dockerfile*" | while read -r file; do
-            echo "Processing file: $file"
-            sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
-        done
-    fi

    cd $WORKPATH/docker_image_build
    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
+    pushd GenAIComps
+    echo "GenAIComps test commit is $(git rev-parse HEAD)"
+    docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+    popd && sleep 1s

    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
    service_list="audioqna audioqna-ui whisper speecht5 vllm-rocm"
@@ -39,27 +33,7 @@ function build_docker_images() {

 function start_services() {
    cd $WORKPATH/docker_compose/amd/gpu/rocm/
-
-    export host_ip=${ip_address}
-    export external_host_ip=${ip_address}
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-    export HF_CACHE_DIR="./data"
-    export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-    export VLLM_SERVICE_PORT="8081"
-
-    export MEGA_SERVICE_HOST_IP=${host_ip}
-    export WHISPER_SERVER_HOST_IP=${host_ip}
-    export SPEECHT5_SERVER_HOST_IP=${host_ip}
-    export LLM_SERVER_HOST_IP=${host_ip}
-
-    export WHISPER_SERVER_PORT=7066
-    export SPEECHT5_SERVER_PORT=7055
-    export LLM_SERVER_PORT=${VLLM_SERVICE_PORT}
-    export BACKEND_SERVICE_PORT=3008
-    export FRONTEND_SERVICE_PORT=5173
-
-    export BACKEND_SERVICE_ENDPOINT=http://${external_host_ip}:${BACKEND_SERVICE_PORT}/v1/audioqna
-
+    source set_env_vllm.sh
    sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

    # Start Docker Containers
@@ -92,32 +66,6 @@ function validate_megaservice() {

 }

-#function validate_frontend() {
-## Frontend tests are currently disabled
-#    cd $WORKPATH/ui/svelte
-#    local conda_env_name="OPEA_e2e"
-#    export PATH=${HOME}/miniforge3/bin/:$PATH
-##    conda remove -n ${conda_env_name} --all -y
-##    conda create -n ${conda_env_name} python=3.12 -y
-#    source activate ${conda_env_name}
-#
-#    sed -i "s/localhost/$ip_address/g" playwright.config.ts
-#
-##    conda install -c conda-forge nodejs -y
-#    npm install && npm ci && npx playwright install --with-deps
-#    node -v && npm -v && pip list
-#
-#    exit_status=0
-#    npx playwright test || exit_status=$?
-#
-#    if [ $exit_status -ne 0 ]; then
-#        echo "[TEST INFO]: ---------frontend test failed---------"
-#        exit $exit_status
-#    else
-#        echo "[TEST INFO]: ---------frontend test passed---------"
-#    fi
-#}
-
 function stop_docker() {
    cd $WORKPATH/docker_compose/amd/gpu/rocm/
    docker compose -f compose_vllm.yaml stop && docker compose -f compose_vllm.yaml rm -f
@@ -125,16 +73,26 @@ function stop_docker() {

 function main() {

+    echo "::group::stop_docker"
    stop_docker
+    echo "::endgroup::"
+
+    echo "::group::build_docker_images"
    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
+    echo "::endgroup::"
+
+    echo "::group::start_services"
    start_services
+    echo "::endgroup::"

+    echo "::group::validate_megaservice"
    validate_megaservice
-    # Frontend tests are currently disabled
-    # validate_frontend
+    echo "::endgroup::"

+    echo "::group::stop_docker"
    stop_docker
-    echo y | docker system prune
+    docker system prune -f
+    echo "::endgroup::"

 }

--- a/AudioQnA/tests/test_gmc_on_gaudi.sh
+++ b/AudioQnA/tests/test_gmc_on_gaudi.sh
@@ -2,6 +2,8 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

+# ===== Deprecated =====
+
 set -xe
 USER_ID=$(whoami)
 LOG_PATH=/home/$(whoami)/logs
--- a/AudioQnA/tests/test_gmc_on_xeon.sh
+++ b/AudioQnA/tests/test_gmc_on_xeon.sh
@@ -2,6 +2,8 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

+# ===== Deprecated =====
+
 set -xe
 USER_ID=$(whoami)
 LOG_PATH=/home/$(whoami)/logs
--- a/AvatarChatbot/Dockerfile
+++ b/AvatarChatbot/Dockerfile
@@ -1,48 +1,9 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-# Stage 1: base setup used by other stages
-FROM python:3.11-slim AS base
-
-# get security updates
-RUN apt-get update && apt-get upgrade -y && \
-    apt-get clean && rm -rf /var/lib/apt/lists/*
-
-ENV HOME=/home/user
-
-RUN useradd -m -s /bin/bash user && \
-    mkdir -p $HOME && \
-    chown -R user $HOME
-
-WORKDIR $HOME
-
-
-# Stage 2: latest GenAIComps sources
-FROM base AS git
-
-RUN apt-get update && apt-get install -y --no-install-recommends git
-RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git
-
-
-# Stage 3: common layer shared by services using GenAIComps
-FROM base AS comps-base
-
-# copy just relevant parts
-COPY --from=git $HOME/GenAIComps/comps $HOME/GenAIComps/comps
-COPY --from=git $HOME/GenAIComps/*.* $HOME/GenAIComps/LICENSE $HOME/GenAIComps/
-
-WORKDIR $HOME/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip setuptools && \
-    pip install --no-cache-dir -r $HOME/GenAIComps/requirements.txt
-WORKDIR $HOME
-
-ENV PYTHONPATH=$PYTHONPATH:$HOME/GenAIComps
-
-USER user
-
-
-# Stage 4: unique part
-FROM comps-base
+ARG IMAGE_REPO=opea
+ARG BASE_TAG=latest
+FROM $IMAGE_REPO/comps-base:$BASE_TAG

 COPY ./avatarchatbot.py $HOME/avatarchatbot.py

--- a/AvatarChatbot/docker_compose/amd/gpu/rocm/README.md
+++ b/AvatarChatbot/docker_compose/amd/gpu/rocm/README.md
@@ -14,7 +14,7 @@ cd GenAIComps
 ### 2. Build ASR Image

 ```bash
-docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
+docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/whisper/src/Dockerfile .


 docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile .
@@ -29,7 +29,7 @@ docker build --no-cache -t opea/llm-textgen:latest --build-arg https_proxy=$http
 ### 4. Build TTS Image

 ```bash
-docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile .
+docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/speecht5/src/Dockerfile .

 docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/Dockerfile .
 ```
@@ -68,7 +68,7 @@ Then run the command `docker images`, you will have following images ready:
 Before starting the services with `docker compose`, you have to recheck the following environment variables.

 ```bash
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export host_ip=$(hostname -I | awk '{print $1}')

 export TGI_SERVICE_PORT=3006
--- a/AvatarChatbot/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/AvatarChatbot/docker_compose/amd/gpu/rocm/compose.yaml
@@ -42,18 +42,18 @@ services:
    environment:
      TTS_ENDPOINT: ${TTS_ENDPOINT}
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
    container_name: tgi-service
    ports:
      - "${TGI_SERVICE_PORT:-3006}:80"
    volumes:
-      - "./data:/data"
+      - "${MODEL_CACHE:-./data}:/data"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
    shm_size: 1g
    devices:
      - /dev/kfd:/dev/kfd
@@ -66,24 +66,6 @@ services:
      - seccomp:unconfined
    ipc: host
    command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192
-  llm:
-    image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
-    container_name: llm-tgi-server
-    depends_on:
-      - tgi-service
-    ports:
-      - "3007:9000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      OPENAI_API_KEY: ${OPENAI_API_KEY}
-    restart: unless-stopped
  wav2lip-service:
    image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
    container_name: wav2lip-service
@@ -125,7 +107,7 @@ services:
    container_name: avatarchatbot-backend-server
    depends_on:
      - asr
-      - llm
+      - tgi-service
      - tts
      - animation
    ports:
--- a/AvatarChatbot/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/AvatarChatbot/docker_compose/amd/gpu/rocm/set_env.sh
@@ -3,7 +3,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 export OPENAI_API_KEY=${OPENAI_API_KEY}
 export host_ip=$(hostname -I | awk '{print $1}')

@@ -30,7 +30,7 @@ export ANIMATION_SERVICE_HOST_IP=${host_ip}
 export MEGA_SERVICE_PORT=8888
 export ASR_SERVICE_PORT=3001
 export TTS_SERVICE_PORT=3002
-export LLM_SERVICE_PORT=3007
+export LLM_SERVICE_PORT=3006
 export ANIMATION_SERVICE_PORT=3008

 export DEVICE="cpu"
@@ -41,7 +41,7 @@ export FACE="/home/user/comps/animation/src/assets/img/avatar5.png"
 # export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
 export AUDIO='None'
 export FACESIZE=96
-export OUTFILE="/outputs/result.mp4"
+export OUTFILE="./outputs/result.mp4"
 export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
 export UPSCALE_FACTOR=1
-export FPS=10
+export FPS=5
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
@@ -14,7 +14,7 @@ cd GenAIComps
 ### 2. Build ASR Image

 ```bash
-docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
+docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/whisper/src/Dockerfile .
 ```

 ### 3. Build LLM Image
@@ -24,7 +24,7 @@ Intel Xeon optimized image hosted in huggingface repo will be used for TGI servi
 ### 4. Build TTS Image

 ```bash
-docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile .
+docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/speecht5/src/Dockerfile .
 ```

 ### 5. Build Animation Image
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
@@ -31,13 +31,13 @@ services:
    ports:
      - "3006:80"
    volumes:
-      - "./data:/data"
+      - "${MODEL_CACHE:-./data}:/data"
    shm_size: 1g
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://${host_ip}:3006/health || exit 1"]
      interval: 10s
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/set_env.sh
@@ -5,3 +5,32 @@
 pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
+
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export host_ip=$(hostname -I | awk '{print $1}')
+export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
+export WAV2LIP_ENDPOINT=http://$host_ip:7860
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export WHISPER_SERVER_HOST_IP=${host_ip}
+export WHISPER_SERVER_PORT=7066
+export SPEECHT5_SERVER_HOST_IP=${host_ip}
+export SPEECHT5_SERVER_PORT=7055
+export LLM_SERVER_HOST_IP=${host_ip}
+export LLM_SERVER_PORT=3006
+export ANIMATION_SERVICE_HOST_IP=${host_ip}
+export ANIMATION_SERVICE_PORT=3008
+
+export MEGA_SERVICE_PORT=8888
+
+export DEVICE="cpu"
+export WAV2LIP_PORT=7860
+export INFERENCE_MODE='wav2lip+gfpgan'
+export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
+export FACE="/home/user/comps/animation/src/assets/img/avatar5.png"
+# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
+export AUDIO='None'
+export FACESIZE=96
+export OUTFILE="/outputs/result.mp4"
+export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
+export UPSCALE_FACTOR=1
+export FPS=10
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
@@ -14,7 +14,7 @@ cd GenAIComps
 ### 2. Build ASR Image

 ```bash
-docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu .
+docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/whisper/src/Dockerfile.intel_hpu .
 ```

 ### 3. Build LLM Image
@@ -24,7 +24,7 @@ Intel Gaudi optimized image hosted in huggingface repo will be used for TGI serv
 ### 4. Build TTS Image

 ```bash
-docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu .
+docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/speecht5/src/Dockerfile.intel_hpu .
 ```

 ### 5. Build Animation Image
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -43,12 +43,12 @@ services:
    ports:
      - "3006:80"
    volumes:
-      - "./data:/data"
+      - "${MODEL_CACHE:-./data}:/data"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -5,3 +5,35 @@
 pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
+
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export host_ip=$(hostname -I | awk '{print $1}')
+
+export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
+
+export WAV2LIP_ENDPOINT=http://$host_ip:7860
+
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export WHISPER_SERVER_HOST_IP=${host_ip}
+export WHISPER_SERVER_PORT=7066
+export SPEECHT5_SERVER_HOST_IP=${host_ip}
+export SPEECHT5_SERVER_PORT=7055
+export LLM_SERVER_HOST_IP=${host_ip}
+export LLM_SERVER_PORT=3006
+export ANIMATION_SERVICE_HOST_IP=${host_ip}
+export ANIMATION_SERVICE_PORT=3008
+
+export MEGA_SERVICE_PORT=8888
+
+export DEVICE="hpu"
+export WAV2LIP_PORT=7860
+export INFERENCE_MODE='wav2lip+gfpgan'
+export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
+export FACE="/home/user/comps/animation/src/assets/img/avatar1.jpg"
+# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
+export AUDIO='None'
+export FACESIZE=96
+export OUTFILE="/outputs/result.mp4"
+export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
+export UPSCALE_FACTOR=1
+export FPS=10
--- a/AvatarChatbot/docker_image_build/build.yaml
+++ b/AvatarChatbot/docker_image_build/build.yaml
@@ -5,6 +5,8 @@ services:
  avatarchatbot:
    build:
      args:
+        IMAGE_REPO: ${REGISTRY:-opea}
+        BASE_TAG: ${TAG:-latest}
        http_proxy: ${http_proxy}
        https_proxy: ${https_proxy}
        no_proxy: ${no_proxy}
@@ -14,13 +16,13 @@ services:
  whisper-gaudi:
    build:
      context: GenAIComps
-      dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu
+      dockerfile: comps/third_parties/whisper/src/Dockerfile.intel_hpu
    extends: avatarchatbot
    image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
  whisper:
    build:
      context: GenAIComps
-      dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile
+      dockerfile: comps/third_parties/whisper/src/Dockerfile
    extends: avatarchatbot
    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
  asr:
@@ -38,13 +40,13 @@ services:
  speecht5-gaudi:
    build:
      context: GenAIComps
-      dockerfile: comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu
+      dockerfile: comps/third_parties/speecht5/src/Dockerfile.intel_hpu
    extends: avatarchatbot
    image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
  speecht5:
    build:
      context: GenAIComps
-      dockerfile: comps/tts/src/integrations/dependency/speecht5/Dockerfile
+      dockerfile: comps/third_parties/speecht5/src/Dockerfile
    extends: avatarchatbot
    image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
  tts:
--- a/AvatarChatbot/tests/README.md
+++ b/AvatarChatbot/tests/README.md
@@ -0,0 +1,27 @@
+# AvatarChatbot E2E test scripts
+
+## Set the required environment variable
+
+```bash
+export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+```
+
+## Run test
+
+On Intel Xeon with TGI:
+
+```bash
+bash test_compose_on_xeon.sh
+```
+
+On Intel Gaudi with TGI:
+
+```bash
+bash test_compose_on_gaudi.sh
+```
+
+On AMD ROCm with TGI:
+
+```bash
+bash test_compose_on_rocm.sh
+```
--- a/Show More
+++ b/Show More