Only test

Signed-off-by: ZePan110 <ze.pan@intel.com>
Add exempt-issue-labels to stale check workflow (#1861 )
2025-05-07 13:48:07 +08:00 · 2025-05-07 11:35:37 +08:00 · 2025-05-07 11:28:04 +08:00 · 2025-05-06 16:34:16 +08:00 · 2025-05-06 15:54:43 +08:00 · 2025-05-06 13:41:21 +08:00
1033 changed files with 78332 additions and 25865 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,16 +1,18 @@
-* liang1.lv@intel.com feng.tian@intel.com suyue.chen@intel.com
+# Code owners will review PRs within their respective folders.
+
+* liang1.lv@intel.com feng.tian@intel.com suyue.chen@intel.com kaokao.lv@intel.com minmin.hou@intel.com rita.brugarolas.brufau@intel.com
 /.github/ suyue.chen@intel.com ze.pan@intel.com
-/AgentQnA/ kaokao.lv@intel.com minmin.hou@intel.com
+/AgentQnA/ abolfazl.shahbazi@intel.com kaokao.lv@intel.com minmin.hou@intel.com
 /AudioQnA/ sihan.chen@intel.com wenjiao.yue@intel.com
 /AvatarChatbot/ chun.tao@intel.com kaokao.lv@intel.com
 /ChatQnA/ liang1.lv@intel.com letong.han@intel.com
-/CodeGen/ liang1.lv@intel.com xinyao.wang@intel.com
-/CodeTrans/ sihan.chen@intel.com xinyao.wang@intel.com
+/CodeGen/ liang1.lv@intel.com
+/CodeTrans/ sihan.chen@intel.com
 /DBQnA/ supriya.krishnamurthi@intel.com liang1.lv@intel.com
-/DocIndexRetriever/ kaokao.lv@intel.com chendi.xue@intel.com
-/DocSum/ letong.han@intel.com xinyao.wang@intel.com
+/DocIndexRetriever/ abolfazl.shahbazi@intel.com kaokao.lv@intel.com chendi.xue@intel.com
+/DocSum/ letong.han@intel.com
 /EdgeCraftRAG/ yongbo.zhu@intel.com mingyuan.qi@intel.com
-/FaqGen/ yogesh.pandey@intel.com xinyao.wang@intel.com
+/FinanceAgent/ abolfazl.shahbazi@intel.com kaokao.lv@intel.com minmin.hou@intel.com rita.brugarolas.brufau@intel.com
 /GraphRAG/ rita.brugarolas.brufau@intel.com abolfazl.shahbazi@intel.com
 /InstructionTuning/ xinyu.ye@intel.com kaokao.lv@intel.com
 /MultimodalQnA/ melanie.h.buehler@intel.com tiep.le@intel.com
@@ -19,5 +21,6 @@
 /SearchQnA/ sihan.chen@intel.com letong.han@intel.com
 /Text2Image/ wenjiao.yue@intel.com xinyu.ye@intel.com
 /Translation/ liang1.lv@intel.com sihan.chen@intel.com
-/VideoQnA/ huiling.bao@intel.com xinyao.wang@intel.com
-/VisualQnA/ liang1.lv@intel.com sihan.chen@intel.com
+/VideoQnA/ huiling.bao@intel.com
+/VisualQnA/ liang1.lv@intel.com sihan.chen@intel.com
+/WorkflowExecAgent/ joshua.jian.ern.liew@intel.com kaokao.lv@intel.com
--- a/.github/ISSUE_TEMPLATE/1_bug_template.yml
+++ b/.github/ISSUE_TEMPLATE/1_bug_template.yml
@@ -32,6 +32,7 @@ body:
        - Mac
        - BSD
        - Other (Please let us know in description)
+        - N/A
    validations:
      required: true

@@ -56,6 +57,7 @@ body:
        - GPU-Nvidia
        - GPU-AMD
        - GPU-other (Please let us know in description)
+        - N/A
    validations:
      required: true

@@ -67,6 +69,7 @@ body:
        - label: Pull docker images from hub.docker.com
        - label: Build docker images from source
        - label: Other
+        - label: N/A
    validations:
      required: true

@@ -80,6 +83,7 @@ body:
        - label: Kubernetes Helm Charts
        - label: Kubernetes GMC
        - label: Other
+        - label: N/A
    validations:
      required: true

@@ -91,6 +95,7 @@ body:
        - Single Node
        - Multiple Nodes
        - Other
+        - N/A
      default: 0
    validations:
      required: true
--- a/.github/ISSUE_TEMPLATE/2_feature_template.yml
+++ b/.github/ISSUE_TEMPLATE/2_feature_template.yml
@@ -32,6 +32,7 @@ body:
        - Mac
        - BSD
        - Other (Please let us know in description)
+        - N/A
    validations:
      required: true

@@ -56,6 +57,7 @@ body:
        - GPU-Nvidia
        - GPU-AMD
        - GPU-other (Please let us know in description)
+        - N/A
    validations:
      required: true

@@ -67,6 +69,7 @@ body:
        - Single Node
        - Multiple Nodes
        - Other
+        - N/A
      default: 0
    validations:
      required: true
--- a/.github/code_spell_ignore.txt
+++ b/.github/code_spell_ignore.txt
@@ -1,2 +1,3 @@
 ModelIn
 modelin
+pressEnter
--- a/.github/workflows/_build_comps_base_image.yml
+++ b/.github/workflows/_build_comps_base_image.yml
@@ -0,0 +1,65 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Build Comps Base Image
+permissions: read-all
+on:
+  workflow_call:
+    inputs:
+      node:
+        required: true
+        type: string
+      build:
+        default: true
+        required: false
+        type: boolean
+      tag:
+        default: "latest"
+        required: false
+        type: string
+      opea_branch:
+        default: "main"
+        required: false
+        type: string
+      inject_commit:
+        default: false
+        required: false
+        type: boolean
+
+jobs:
+  pre-build-image-check:
+    runs-on: ubuntu-latest
+    outputs:
+      should_skip: ${{ steps.check-skip.outputs.should_skip }}
+    steps:
+      - name: Check if job should be skipped
+        id: check-skip
+        run: |
+          should_skip=true
+          if [[ "${{ inputs.node }}" == "gaudi" || "${{ inputs.node }}" == "xeon" ]]; then
+            should_skip=false
+          fi
+          echo "should_skip=$should_skip"
+          echo "should_skip=$should_skip" >> $GITHUB_OUTPUT
+
+  build-images:
+    needs: [ pre-build-image-check ]
+    if: ${{ needs.pre-build-image-check.outputs.should_skip == 'false' && fromJSON(inputs.build) }}
+    runs-on: "docker-build-${{ inputs.node }}"
+    steps:
+      - name: Clean Up Working Directory
+        run: sudo rm -rf ${{github.workspace}}/*
+
+      - name: Clone Required Repo
+        run: |
+          git clone --depth 1 --branch ${{ inputs.opea_branch }} https://github.com/opea-project/GenAIComps.git
+          cd GenAIComps && git rev-parse HEAD && cd ../ && ls -l
+
+      - name: Build Image
+        uses: opea-project/validation/actions/image-build@main
+        with:
+          work_dir: ${{ github.workspace }}/GenAIComps
+          docker_compose_path: ${{ github.workspace }}/GenAIComps/.github/workflows/docker/compose/base-compose.yaml
+          registry: ${OPEA_IMAGE_REPO}opea
+          inject_commit: ${{ inputs.inject_commit }}
+          tag: ${{ inputs.tag }}
--- a/.github/workflows/_build_image.yml
+++ b/.github/workflows/_build_image.yml
@@ -0,0 +1,101 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Build Images
+permissions: read-all
+on:
+  workflow_call:
+    inputs:
+      node:
+        required: true
+        type: string
+      build:
+        default: true
+        required: false
+        type: boolean
+      example:
+        required: true
+        type: string
+      services:
+        default: ""
+        required: false
+        type: string
+      tag:
+        default: "latest"
+        required: false
+        type: string
+      opea_branch:
+        default: "main"
+        required: false
+        type: string
+      inject_commit:
+        default: false
+        required: false
+        type: boolean
+
+jobs:
+  pre-build-image-check:
+    runs-on: ubuntu-latest
+    outputs:
+      should_skip: ${{ steps.check-skip.outputs.should_skip }}
+    steps:
+      - name: Check if job should be skipped
+        id: check-skip
+        run: |
+          should_skip=true
+          if [[ "${{ inputs.node }}" == "gaudi" || "${{ inputs.node }}" == "xeon" ]]; then
+            should_skip=false
+          fi
+          echo "should_skip=$should_skip"
+          echo "should_skip=$should_skip" >> $GITHUB_OUTPUT
+
+  build-images:
+    needs: [ pre-build-image-check ]
+    if: ${{ needs.pre-build-image-check.outputs.should_skip == 'false' && fromJSON(inputs.build) }}
+    runs-on: "docker-build-${{ inputs.node }}"
+    steps:
+      - name: Clean Up Working Directory
+        run: sudo rm -rf ${{github.workspace}}/*
+
+      - name: Get Checkout Ref
+        run: |
+          if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
+            echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
+          else
+            echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
+          fi
+
+      - name: Checkout out GenAIExamples
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ env.CHECKOUT_REF }}
+          fetch-depth: 0
+
+      - name: Clone Required Repo
+        run: |
+          cd ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
+          docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
+          if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
+              git clone https://github.com/vllm-project/vllm.git && cd vllm
+              VLLM_VER=v0.8.3
+              echo "Check out vLLM tag ${VLLM_VER}"
+              git checkout ${VLLM_VER} &> /dev/null && cd ../
+          fi
+          if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
+              git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
+              VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
+              echo "Check out vLLM tag ${VLLM_VER}"
+              git checkout ${VLLM_VER} &> /dev/null && cd ../
+          fi
+          git clone --depth 1 --branch ${{ inputs.opea_branch }} https://github.com/opea-project/GenAIComps.git
+          cd GenAIComps && git rev-parse HEAD && cd ../
+
+      - name: Build Image
+        uses: opea-project/validation/actions/image-build@main
+        with:
+          work_dir: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
+          docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
+          service_list: ${{ inputs.services }}
+          registry: ${OPEA_IMAGE_REPO}opea
+          inject_commit: ${{ inputs.inject_commit }}
+          tag: ${{ inputs.tag }}
--- a/.github/workflows/_example-workflow.yml
+++ b/.github/workflows/_example-workflow.yml
@@ -43,68 +43,40 @@ on:
      inject_commit:
        default: false
        required: false
-        type: string
+        type: boolean
+      use_model_cache:
+        default: false
+        required: false
+        type: boolean

 jobs:
 ####################################################################################################
 # Image Build
 ####################################################################################################
  build-images:
-    runs-on: "docker-build-${{ inputs.node }}"
-    steps:
-      - name: Clean Up Working Directory
-        run: sudo rm -rf ${{github.workspace}}/*
-
-      - name: Get Checkout Ref
-        run: |
-          if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
-            echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
-          else
-            echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
-          fi
-
-      - name: Checkout out GenAIExamples
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ env.CHECKOUT_REF }}
-          fetch-depth: 0
-
-      - name: Clone Required Repo
-        run: |
-          cd ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
-          docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
-          if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
-              git clone --depth 1 https://github.com/vllm-project/vllm.git
-              cd vllm && git rev-parse HEAD && cd ../
-          fi
-          if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
-               git clone --depth 1 --branch v0.6.4.post2+Gaudi-1.19.0 https://github.com/HabanaAI/vllm-fork.git
-          fi
-          git clone --depth 1 --branch ${{ inputs.opea_branch }} https://github.com/opea-project/GenAIComps.git
-          cd GenAIComps && git rev-parse HEAD && cd ../
-
-      - name: Build Image
-        if: ${{ fromJSON(inputs.build) }}
-        uses: opea-project/validation/actions/image-build@main
-        with:
-          work_dir: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
-          docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
-          service_list: ${{ inputs.services }}
-          registry: ${OPEA_IMAGE_REPO}opea
-          inject_commit: ${{ inputs.inject_commit }}
-          tag: ${{ inputs.tag }}
+    uses: ./.github/workflows/_build_image.yml
+    with:
+      node: ${{ inputs.node }}
+      build: ${{ fromJSON(inputs.build) }}
+      example: ${{ inputs.example }}
+      services: ${{ inputs.services }}
+      tag: ${{ inputs.tag }}
+      opea_branch: ${{ inputs.opea_branch }}
+      inject_commit: ${{ inputs.inject_commit }}

 ####################################################################################################
 # Docker Compose Test
 ####################################################################################################
  test-example-compose:
    needs: [build-images]
-    if: ${{ fromJSON(inputs.test_compose) }}
+    if: ${{ inputs.test_compose }}
    uses: ./.github/workflows/_run-docker-compose.yml
    with:
      tag: ${{ inputs.tag }}
      example: ${{ inputs.example }}
      hardware: ${{ inputs.node }}
+      use_model_cache: ${{ inputs.use_model_cache }}
+      opea_branch: ${{ inputs.opea_branch }}
    secrets: inherit


@@ -126,7 +98,7 @@ jobs:
 ####################################################################################################
  test-gmc-pipeline:
    needs: [build-images]
-    if: ${{ fromJSON(inputs.test_gmc) }}
+    if: false # ${{ fromJSON(inputs.test_gmc) }}
    uses: ./.github/workflows/_gmc-e2e.yml
    with:
      example: ${{ inputs.example }}
--- a/.github/workflows/_helm-e2e.yml
+++ b/.github/workflows/_helm-e2e.yml
@@ -2,7 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0

 name: Helm Chart E2e Test For Call
-permissions: read-all
+permissions:
+  contents: read
+
 on:
  workflow_call:
    inputs:
@@ -81,6 +83,10 @@ jobs:
                if [[ "${{ inputs.hardware }}" == "gaudi" ]]; then
                  value_files="${value_files}\"${filename}\","
                fi
+              elif [[ "$filename" == *"rocm"* ]]; then
+                if [[ "${{ inputs.hardware }}" == "rocm" ]]; then
+                  value_files="${value_files}\"${filename}\","
+                fi
              elif [[ "$filename" == *"nv"* ]]; then
                continue
              else
@@ -97,6 +103,7 @@ jobs:

  helm-test:
    needs: [get-test-case]
+    if: ${{ needs.get-test-case.outputs.value_files != '[]' }}
    strategy:
      matrix:
        value_file: ${{ fromJSON(needs.get-test-case.outputs.value_files) }}
@@ -130,16 +137,28 @@ jobs:
        env:
          example: ${{ inputs.example }}
        run: |
-          CHART_NAME="${example,,}"  # CodeGen
-          echo "CHART_NAME=$CHART_NAME" >> $GITHUB_ENV
-          echo "RELEASE_NAME=${CHART_NAME}$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
-          echo "NAMESPACE=${CHART_NAME}-$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
-          echo "ROLLOUT_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV
-          echo "TEST_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV
-          echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
-          echo "should_cleanup=false" >> $GITHUB_ENV
-          echo "skip_validate=false" >> $GITHUB_ENV
-          echo "CHART_FOLDER=${example}/kubernetes/helm" >> $GITHUB_ENV
+          if [[ ! "$example" =~ ^[a-zA-Z]{1,20}$ ]] || [[ "$example" =~ \.\. ]] || [[ "$example" == -* || "$example" == *- ]]; then
+            echo "Error: Invalid input - only lowercase alphanumeric and internal hyphens allowed"
+            exit 1
+          fi
+          # SAFE_PREFIX="kb-"
+          CHART_NAME="${SAFE_PREFIX}$(echo "$example" | tr '[:upper:]' '[:lower:]')"
+          RAND_SUFFIX=$(openssl rand -hex 2 | tr -dc 'a-f0-9')
+
+          cat <<EOF >> $GITHUB_ENV
+          CHART_NAME=${CHART_NAME}
+          RELEASE_NAME=${CHART_NAME}-$(date +%s)
+          NAMESPACE=ns-${CHART_NAME}-${RAND_SUFFIX}
+          ROLLOUT_TIMEOUT_SECONDS=600s
+          TEST_TIMEOUT_SECONDS=600s
+          KUBECTL_TIMEOUT_SECONDS=60s
+          should_cleanup=false
+          skip_validate=false
+          CHART_FOLDER=${example}/kubernetes/helm
+          EOF
+
+          echo "Generated safe variables:" >> $GITHUB_STEP_SUMMARY
+          echo "- CHART_NAME: ${CHART_NAME}" >> $GITHUB_STEP_SUMMARY

      - name: Helm install
        id: install
@@ -172,7 +191,7 @@ jobs:
            $RELEASE_NAME \
            oci://ghcr.io/opea-project/charts/${CHART_NAME} \
            --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} \
-            --set global.modelUseHostPath=/home/sdp/.cache/huggingface/hub \
+            --set global.modelUseHostPath=/data2/hf_model \
            --set GOOGLE_API_KEY=${{ env.GOOGLE_API_KEY}} \
            --set GOOGLE_CSE_ID=${{ env.GOOGLE_CSE_ID}} \
            --set web-retriever.GOOGLE_API_KEY=${{ env.GOOGLE_API_KEY}} \
--- a/.github/workflows/_run-docker-compose.yml
+++ b/.github/workflows/_run-docker-compose.yml
@@ -28,6 +28,14 @@ on:
        required: false
        type: string
        default: ""
+      use_model_cache:
+        required: false
+        type: boolean
+        default: false
+      opea_branch:
+        default: "main"
+        required: false
+        type: string
 jobs:
  get-test-case:
    runs-on: ubuntu-latest
@@ -60,9 +68,16 @@ jobs:
          cd ${{ github.workspace }}/${{ inputs.example }}/tests
          run_test_cases=""

-          default_test_case=$(find . -type f -name "test_compose_on_${{ inputs.hardware }}.sh" | cut -d/ -f2)
+          if [[ "${{ inputs.hardware }}" == "gaudi"* ]]; then
+            hardware="gaudi"
+          elif [[ "${{ inputs.hardware }}" == "xeon"* ]]; then
+            hardware="xeon"
+          else
+            hardware="${{ inputs.hardware }}"
+          fi
+          default_test_case=$(find . -type f -name "test_compose_on_$hardware.sh" | cut -d/ -f2)
          if [ "$default_test_case" ]; then run_test_cases="$default_test_case"; fi
-          other_test_cases=$(find . -type f -name "test_compose_*_on_${{ inputs.hardware }}.sh" | cut -d/ -f2)
+          other_test_cases=$(find . -type f -name "test_compose_*_on_$hardware.sh" | cut -d/ -f2)
          echo "default_test_case=$default_test_case"
          echo "other_test_cases=$other_test_cases"

@@ -85,12 +100,17 @@ jobs:
              fi
          done

+          if [ -z "$run_test_cases" ] && [[ $(printf '%s\n' "${changed_files[@]}" | grep ${{ inputs.example }} | grep /tests/) ]]; then
+              run_test_cases=$other_test_cases
+          fi
+
          test_cases=$(echo $run_test_cases | tr ' ' '\n' | sort -u | jq -R '.' | jq -sc '.')
          echo "test_cases=$test_cases"
          echo "test_cases=$test_cases" >> $GITHUB_OUTPUT

  compose-test:
    needs: [get-test-case]
+    if: ${{ needs.get-test-case.outputs.test_cases != '[""]' }}
    strategy:
      matrix:
        test_case: ${{ fromJSON(needs.get-test-case.outputs.test_cases) }}
@@ -101,9 +121,18 @@ jobs:
      - name: Clean up Working Directory
        run: |
          sudo rm -rf ${{github.workspace}}/* || true
+
+          echo "Cleaning up containers using ports..."
+          cid=$(docker ps --format '{{.Names}} : {{.Ports}}' | grep -v ' : $' | grep -v 0.0.0.0:5000 | awk -F' : ' '{print $1}')
+          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
          docker system prune -f
-          docker rmi $(docker images --filter reference="*/*/*:latest" -q) || true
-          docker rmi $(docker images --filter reference="*/*:ci" -q) || true
+
+          echo "Cleaning up images ..."
+          docker images --filter reference="*/*/*:latest" -q | xargs -r docker rmi && sleep 1s
+          docker images --filter reference="*/*:ci" -q | xargs -r docker rmi && sleep 1s
+          docker images --filter reference="*:5000/*/*" -q | xargs -r docker rmi && sleep 1s
+          docker images --filter reference="opea/comps-base" -q | xargs -r docker rmi && sleep 1s
+          docker images

      - name: Checkout out Repo
        uses: actions/checkout@v4
@@ -122,10 +151,17 @@ jobs:
          bash ${{ github.workspace }}/.github/workflows/scripts/docker_compose_clean_up.sh "ports"
          docker ps

+      - name: Log in DockerHub
+        uses: docker/login-action@v3.2.0
+        with:
+          username: ${{ secrets.DOCKERHUB_USER }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
      - name: Run test
        shell: bash
        env:
          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HF_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
          PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
@@ -133,30 +169,55 @@ jobs:
          SDK_BASE_URL: ${{ secrets.SDK_BASE_URL }}
          SERVING_TOKEN: ${{ secrets.SERVING_TOKEN }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          FINNHUB_API_KEY: ${{ secrets.FINNHUB_API_KEY }}
+          FINANCIAL_DATASETS_API_KEY: ${{ secrets.FINANCIAL_DATASETS_API_KEY }}
          IMAGE_REPO: ${{ inputs.registry }}
          IMAGE_TAG: ${{ inputs.tag }}
+          opea_branch: ${{ inputs.opea_branch }}
          example: ${{ inputs.example }}
          hardware: ${{ inputs.hardware }}
          test_case: ${{ matrix.test_case }}
+          use_model_cache: ${{ inputs.use_model_cache }}
        run: |
          cd ${{ github.workspace }}/$example/tests
          if [[ "$IMAGE_REPO" == "" ]]; then export IMAGE_REPO="${OPEA_IMAGE_REPO}opea"; fi
-          if [ -f ${test_case} ]; then timeout 30m bash ${test_case}; else echo "Test script {${test_case}} not found, skip test!"; fi
+          if [[ "$use_model_cache" == "true" ]]; then
+            if [ -d "/data2/hf_model" ]; then
+              export model_cache="/data2/hf_model"
+            else
+              echo "Model cache directory /data2/hf_model does not exist"
+              export model_cache="$HOME/.cache/huggingface/hub"
+            fi
+            if [[ "$test_case" == *"rocm"* ]]; then
+              export model_cache="/var/lib/GenAI/data"
+            fi
+          fi
+          if [ -f "${test_case}" ]; then timeout 60m bash "${test_case}"; else echo "Test script {${test_case}} not found, skip test!"; fi

      - name: Clean up container after test
-        shell: bash
-        if: cancelled() || failure()
+        if: always()
        run: |
-          cd ${{ github.workspace }}/${{ inputs.example }}
-          export test_case=${{ matrix.test_case }}
-          export hardware=${{ inputs.hardware }}
-          bash ${{ github.workspace }}/.github/workflows/scripts/docker_compose_clean_up.sh "containers"
-          docker system prune -f
-          docker rmi $(docker images --filter reference="*:5000/*/*" -q) || true
+          set -x
+
+          echo "Cleaning up containers using ports..."
+          cid=$(docker ps --format '{{.Names}} : {{.Ports}}' | grep -v ' : $' | grep -v 0.0.0.0:5000 | awk -F' : ' '{print $1}')
+          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+
+          echo "Cleaning up images ..."
+          if [[ "${{ inputs.hardware }}" == "xeon"* ]]; then
+              docker system prune -a -f
+          else
+              docker images --filter reference="*/*/*:latest" -q | xargs -r docker rmi && sleep 1s
+              docker images --filter reference="*/*:ci" -q | xargs -r docker rmi && sleep 1s
+              docker images --filter reference="*:5000/*/*" -q | xargs -r docker rmi && sleep 1s
+              docker images --filter reference="opea/comps-base" -q | xargs -r docker rmi && sleep 1s
+              docker system prune -f
+          fi
+          docker images

      - name: Publish pipeline artifact
        if: ${{ !cancelled() }}
        uses: actions/upload-artifact@v4
        with:
-          name: ${{ inputs.example }}_${{ matrix.test_case }}
+          name: ${{ inputs.hardware }}_${{ inputs.example }}_${{ matrix.test_case }}
          path: ${{ github.workspace }}/${{ inputs.example }}/tests/*.log
--- a/.github/workflows/daily_check_issue_and_pr.yml
+++ b/.github/workflows/daily_check_issue_and_pr.yml
@@ -0,0 +1,29 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Check stale issue and pr
+
+on:
+  schedule:
+    - cron: "30 22 * * *"
+
+jobs:
+  close-issues:
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
+    steps:
+      - uses: actions/stale@v9
+        with:
+          days-before-issue-stale: 30
+          days-before-pr-stale: 30
+          days-before-issue-close: 7
+          days-before-pr-close: 7
+          stale-issue-message: "This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 7 days."
+          stale-pr-message: "This PR is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 7 days."
+          close-issue-message: "This issue was closed because it has been stalled for 7 days with no activity."
+          close-pr-message: "This PR was closed because it has been stalled for 7 days with no activity."
+          repo-token: ${{ secrets.ACTION_TOKEN }}
+          start-date: "2025-03-01T00:00:00Z"
+          exempt-issue-labels: "Backlog"
--- a/.github/workflows/dockerhub-description.yml
+++ b/.github/workflows/dockerhub-description.yml
@@ -0,0 +1,117 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Update Docker Hub Description
+on:
+  schedule:
+    - cron: "0 0 * * 0"
+  workflow_dispatch:
+
+jobs:
+  get-images-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      examples_json: ${{ steps.extract.outputs.examples_json }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Extract images info and generate JSON matrix
+        id: extract
+        run: |
+          #!/bin/bash
+          set -e
+          images=$(awk -F'|' '/^\| *\[opea\// {
+            gsub(/^ +| +$/, "", $2);
+            gsub(/^ +| +$/, "", $4);
+            gsub(/^ +| +$/, "", $5);
+
+            # Extract the path portion of the dockerHub link from the Example Images column
+            match($2, /\(https:\/\/hub\.docker\.com\/r\/[^)]*\)/);
+            repository = substr($2, RSTART, RLENGTH);
+            # Remove the prefix and the trailing right bracket
+            sub(/^\(https:\/\/hub\.docker\.com\/r\//, "", repository);
+            sub(/\)$/, "", repository);
+
+            # Description Direct assignment
+            description = $4;
+
+            # Extract the content of the github link from the Readme column
+            match($5, /\(https:\/\/github\.com\/[^)]*\)/);
+            readme_url = substr($5, RSTART, RLENGTH);
+            # Remove the prefix and the trailing right bracket
+            sub(/^\(https:\/\/github\.com\//, "", readme_url);
+            sub(/\)$/, "", readme_url);
+            # Remove blob information, such as "blob/main/" or "blob/habana_main/"
+            gsub(/blob\/[^/]+\//, "", readme_url);
+            # Remove the organization name and keep only the file path, such as changing "opea-project/GenAIExamples/AudioQnA/README.md" to "GenAIExamples/AudioQnA/README.md"
+            sub(/^[^\/]+\//, "", readme_url);
+
+            # Generate JSON object string
+            printf "{\"repository\":\"%s\",\"short-description\":\"%s\",\"readme-filepath\":\"%s\"}\n", repository, description, readme_url;
+          }' docker_images_list.md)
+
+          # Concatenate all JSON objects into a JSON array, using paste to separate them with commas
+          json="[$(echo "$images" | paste -sd, -)]"
+          echo "$json"
+          # Set as output variable for subsequent jobs to use
+          echo "::set-output name=examples_json::$json"
+
+  check-images-matrix:
+    runs-on: ubuntu-latest
+    needs: get-images-matrix
+    if: ${{ needs.get-images-matrix.outputs.examples_json != '' }}
+    strategy:
+      matrix:
+        image: ${{ fromJSON(needs.get-images-matrix.outputs.examples_json) }}
+      fail-fast: false
+    steps:
+    - name: Check dockerhub description
+      run: |
+        echo "dockerhub description for ${{ matrix.image.repository }}"
+        echo "short-description: ${{ matrix.image.short-description }}"
+        echo "readme-filepath: ${{ matrix.image.readme-filepath }}"
+
+  dockerHubDescription:
+    runs-on: ubuntu-latest
+    needs: get-images-matrix
+    if: ${{ needs.get-images-matrix.outputs.examples_json != '' }}
+    strategy:
+      matrix:
+        image: ${{ fromJSON(needs.get-images-matrix.outputs.examples_json) }}
+      fail-fast: false
+    steps:
+    - name: Checkout GenAIExamples
+      uses: actions/checkout@v4
+      with:
+        repository: opea-project/GenAIExamples
+        path: GenAIExamples
+
+    - name: Checkout GenAIComps
+      uses: actions/checkout@v4
+      with:
+        repository: opea-project/GenAIComps
+        path: GenAIComps
+
+    - name: Checkout vllm-openvino
+      uses: actions/checkout@v4
+      with:
+        repository: vllm-project/vllm
+        path: vllm
+
+    - name: Checkout vllm-gaudi
+      uses: actions/checkout@v4
+      with:
+        repository: HabanaAI/vllm-fork
+        ref: habana_main
+        path: vllm-fork
+
+    - name: add dockerhub description
+      uses: peter-evans/dockerhub-description@v4
+      with:
+        username: ${{ secrets.DOCKERHUB_USER }}
+        password: ${{ secrets.DOCKERHUB_TOKEN }}
+        repository: ${{ matrix.image.repository }}
+        short-description: ${{ matrix.image.short-description }}
+        readme-filepath: ${{ matrix.image.readme-filepath }}
+        enable-url-completion: false
--- a/.github/workflows/manual-docker-publish.yml
+++ b/.github/workflows/manual-docker-publish.yml
@@ -41,9 +41,11 @@ jobs:

  publish:
    needs: [get-image-list]
+    if: ${{ needs.get-image-list.outputs.matrix != '' }}
    strategy:
      matrix:
        image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
+      fail-fast: false
    runs-on: "docker-build-${{ inputs.node }}"
    steps:
      - uses: docker/login-action@v3.2.0
--- a/.github/workflows/manual-docker-scan.yml
+++ b/.github/workflows/manual-docker-scan.yml
@@ -12,7 +12,7 @@ on:
        type: string
      examples:
        default: ""
-        description: 'List of examples to publish "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"'
+        description: 'List of examples to publish "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"'
        required: false
        type: string
      images:
@@ -47,6 +47,7 @@ jobs:
  scan-docker:
    needs: get-image-list
    runs-on: "docker-build-${{ inputs.node }}"
+    if: ${{ needs.get-image-list.outputs.matrix != '' }}
    strategy:
      matrix:
        image: ${{ fromJson(needs.get-image-list.outputs.matrix) }}
--- a/.github/workflows/manual-example-workflow.yml
+++ b/.github/workflows/manual-example-workflow.yml
@@ -7,7 +7,7 @@ on:
    inputs:
      nodes:
        default: "gaudi,xeon"
-        description: "Hardware to run test"
+        description: "Hardware to run test gaudi,xeon,rocm,arc,gaudi3,xeon-gnr"
        required: true
        type: string
      examples:
@@ -20,11 +20,6 @@ on:
        description: "Tag to apply to images"
        required: true
        type: string
-      deploy_gmc:
-        default: false
-        description: 'Whether to deploy gmc'
-        required: true
-        type: boolean
      build:
        default: true
        description: 'Build test required images for Examples'
@@ -40,11 +35,6 @@ on:
        description: 'Test examples with helm charts'
        required: false
        type: boolean
-      test_gmc:
-        default: false
-        description: 'Test examples with gmc'
-        required: false
-        type: boolean
      opea_branch:
        default: "main"
        description: 'OPEA branch for image build'
@@ -52,9 +42,14 @@ on:
        type: string
      inject_commit:
        default: false
-        description: "inject commit to docker images true or false"
+        description: "inject commit to docker images"
        required: false
-        type: string
+        type: boolean
+      use_model_cache:
+        default: false
+        description: "use model cache"
+        required: false
+        type: boolean

 permissions: read-all
 jobs:
@@ -74,23 +69,20 @@ jobs:
        nodes_json=$(printf '%s\n' "${nodes[@]}" | sort -u | jq -R '.' | jq -sc '.')
        echo "nodes=$nodes_json" >> $GITHUB_OUTPUT

-  build-deploy-gmc:
+  build-comps-base:
    needs: [get-test-matrix]
-    if: ${{ fromJSON(inputs.deploy_gmc) }}
    strategy:
      matrix:
        node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
-      fail-fast: false
-    uses: ./.github/workflows/_gmc-workflow.yml
+    uses: ./.github/workflows/_build_comps_base_image.yml
    with:
      node: ${{ matrix.node }}
+      build: ${{ fromJSON(inputs.build) }}
      tag: ${{ inputs.tag }}
      opea_branch: ${{ inputs.opea_branch }}
-    secrets: inherit

  run-examples:
-    needs: [get-test-matrix, build-deploy-gmc]
-    if: always()
+    needs: [get-test-matrix, build-comps-base]
    strategy:
      matrix:
        example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }}
@@ -104,7 +96,7 @@ jobs:
      build: ${{ fromJSON(inputs.build) }}
      test_compose: ${{ fromJSON(inputs.test_compose) }}
      test_helmchart: ${{ fromJSON(inputs.test_helmchart) }}
-      test_gmc: ${{ fromJSON(inputs.test_gmc) }}
      opea_branch: ${{ inputs.opea_branch }}
      inject_commit: ${{ inputs.inject_commit }}
+      use_model_cache: ${{ inputs.use_model_cache }}
    secrets: inherit
--- a/.github/workflows/manual-freeze-tag.yml
+++ b/.github/workflows/manual-freeze-tag.yml
@@ -25,9 +25,9 @@ jobs:

      - name: Set up Git
        run: |
-          git config --global user.name "NeuralChatBot"
-          git config --global user.email "grp_neural_chat_bot@intel.com"
-          git remote set-url origin https://NeuralChatBot:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
+          git config --global user.name "CICD-at-OPEA"
+          git config --global user.email "CICD@opea.dev"
+          git remote set-url origin https://CICD-at-OPEA:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git

      - name: Run script
        run: |
--- a/.github/workflows/manual-image-build.yml
+++ b/.github/workflows/manual-image-build.yml
@@ -32,9 +32,9 @@ on:
        type: string
      inject_commit:
        default: false
-        description: "inject commit to docker images true or false"
+        description: "inject commit to docker images"
        required: false
-        type: string
+        type: boolean

 jobs:
  get-test-matrix:
@@ -51,6 +51,7 @@ jobs:

  image-build:
    needs: get-test-matrix
+    if: ${{ needs.get-test-matrix.outputs.nodes != '' }}
    strategy:
      matrix:
        node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
--- a/.github/workflows/manual-reset-local-registry.yml
+++ b/.github/workflows/manual-reset-local-registry.yml
@@ -33,6 +33,7 @@ jobs:

  clean-up:
    needs: get-build-matrix
+    if: ${{ needs.get-image-list.outputs.matrix != '' }}
    strategy:
      matrix:
        node: ${{ fromJson(needs.get-build-matrix.outputs.nodes) }}
@@ -47,6 +48,7 @@ jobs:

  build:
    needs: [get-build-matrix, clean-up]
+    if: ${{ needs.get-image-list.outputs.matrix != '' }}
    strategy:
      matrix:
        example: ${{ fromJson(needs.get-build-matrix.outputs.examples) }}
--- a/.github/workflows/nightly-docker-build-publish.yml
+++ b/.github/workflows/nightly-docker-build-publish.yml
@@ -5,11 +5,11 @@ name: Nightly build/publish latest docker images

 on:
  schedule:
-    - cron: "30 14 * * *" # UTC time
+    - cron: "30 14 * * 1-5" # UTC time
  workflow_dispatch:

 env:
-  EXAMPLES: ${{ vars.NIGHTLY_RELEASE_EXAMPLES }}
+  EXAMPLES: CodeGen,CodeTrans #${{ vars.NIGHTLY_RELEASE_EXAMPLES }}
  TAG: "latest"
  PUBLISH_TAGS: "latest"

@@ -32,30 +32,54 @@ jobs:
          echo "TAG=$TAG" >> $GITHUB_OUTPUT
          echo "PUBLISH_TAGS=$PUBLISH_TAGS" >> $GITHUB_OUTPUT

-  build-and-test:
-    needs: get-build-matrix
+  build-comps-base:
+    needs: [get-build-matrix]
+    uses: ./.github/workflows/_build_comps_base_image.yml
+    with:
+      node: gaudi
+
+  build-images:
+    needs: [get-build-matrix, build-comps-base]
+    strategy:
+      matrix:
+        example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
+      fail-fast: false
+    uses: ./.github/workflows/_build_image.yml
+    with:
+      node: gaudi
+      example: ${{ matrix.example }}
+      inject_commit: true
+    secrets: inherit
+
+  test-example:
+    needs: [get-build-matrix]
+    if: ${{ needs.get-build-matrix.outputs.examples_json != '' }}
    strategy:
      matrix:
        example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
      fail-fast: false
    uses: ./.github/workflows/_example-workflow.yml
    with:
-      node: gaudi
+      node: xeon
+      build: false
      example: ${{ matrix.example }}
      test_compose: true
+      inject_commit: true
    secrets: inherit

  get-image-list:
-    needs: get-build-matrix
+    needs: [get-build-matrix]
    uses: ./.github/workflows/_get-image-list.yml
    with:
      examples: ${{ needs.get-build-matrix.outputs.EXAMPLES }}

  publish:
-    needs: [get-build-matrix, get-image-list, build-and-test]
+    needs: [get-build-matrix, get-image-list, build-images]
+    if: ${{ success() }}
    strategy:
      matrix:
        image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
+      fail-fast: false
    runs-on: "docker-build-gaudi"
    steps:
      - uses: docker/login-action@v3.2.0
--- a/.github/workflows/pr-chart-e2e.yml
+++ b/.github/workflows/pr-chart-e2e.yml
@@ -19,6 +19,9 @@ concurrency:
 jobs:
  job1:
    name: Get-Test-Matrix
+    permissions:
+      contents: read
+      pull-requests: read
    runs-on: ubuntu-latest
    outputs:
      run_matrix: ${{ steps.get-test-matrix.outputs.run_matrix }}
@@ -46,6 +49,8 @@ jobs:
              example=$(echo "$values_file" | cut -d'/' -f1) # CodeGen
              if [[ "$valuefile" == *"gaudi"* ]]; then
                hardware="gaudi"
+              elif [[ "$valuefile" == *"rocm"* ]]; then
+                hardware="rocm"
              elif [[ "$valuefile" == *"nv"* ]]; then
                continue
              else
@@ -65,7 +70,7 @@ jobs:

  helm-chart-test:
    needs: [job1]
-    if: always() && ${{ needs.job1.outputs.run_matrix.example.length > 0 }}
+    if: always() && ${{ fromJSON(needs.job1.outputs.run_matrix).length != 0 }}
    uses: ./.github/workflows/_helm-e2e.yml
    strategy:
      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
--- a/.github/workflows/pr-docker-compose-e2e.yml
+++ b/.github/workflows/pr-docker-compose-e2e.yml
@@ -32,15 +32,16 @@ jobs:

  example-test:
    needs: [get-test-matrix]
+    if: ${{ needs.get-test-matrix.outputs.run_matrix != '' }}
    strategy:
      matrix: ${{ fromJSON(needs.get-test-matrix.outputs.run_matrix) }}
      fail-fast: false
-    if: ${{ !github.event.pull_request.draft }}
    uses: ./.github/workflows/_run-docker-compose.yml
    with:
      registry: "opea"
      tag: "ci"
      example: ${{ matrix.example }}
      hardware: ${{ matrix.hardware }}
+      use_model_cache: true
      diff_excluded_files: '\.github|\.md|\.txt|kubernetes|gmc|assets|benchmark'
    secrets: inherit
--- a/.github/workflows/pr-link-path-scan.yml
+++ b/.github/workflows/pr-link-path-scan.yml
@@ -76,13 +76,7 @@ jobs:
          cd ${{github.workspace}}
          fail="FALSE"
          repo_name=${{ github.event.pull_request.head.repo.full_name }}
-          if [ "$(echo "$repo_name"|cut -d'/' -f1)" != "opea-project" ]; then
-            owner=$(echo "${{ github.event.pull_request.head.repo.full_name }}" |cut -d'/' -f1)
-            branch="https://github.com/$owner/GenAIExamples/tree/${{ github.event.pull_request.head.ref }}"
-          else
-            branch="https://github.com/opea-project/GenAIExamples/blob/${{ github.event.pull_request.head.ref }}"
-          fi
-          link_head="https://github.com/opea-project/GenAIExamples/blob/main"
+          branch="https://github.com/$repo_name/blob/${{ github.event.pull_request.head.ref }}"

          merged_commit=$(git log -1 --format='%H')
          changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
--- a/.github/workflows/push-image-build.yml
+++ b/.github/workflows/push-image-build.yml
@@ -24,6 +24,7 @@ jobs:

  image-build:
    needs: job1
+    if: ${{ needs.job1.outputs.run_matrix != '{"include":[]}' }}
    strategy:
      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
      fail-fast: false
--- a/.github/workflows/push-infra-issue-creation.yml
+++ b/.github/workflows/push-infra-issue-creation.yml
@@ -54,6 +54,6 @@ jobs:

            ${{ env.changed_files }}

-            Please verify if the helm charts and manifests need to be changed accordingly.
+            Please verify if the helm charts need to be changed accordingly.

            > This issue was created automatically by CI.
--- a/.github/workflows/scripts/codeScan/hadolint.sh
+++ b/.github/workflows/scripts/codeScan/hadolint.sh
@@ -7,7 +7,7 @@ source /GenAIExamples/.github/workflows/scripts/change_color
 log_dir=/GenAIExamples/.github/workflows/scripts/codeScan
 ERROR_WARN=false

-find . -type f \( -name "Dockerfile*" \) -print -exec hadolint --ignore DL3006 --ignore DL3007 --ignore DL3008 --ignore DL3013 {} \; > ${log_dir}/hadolint.log
+find . -type f \( -name "Dockerfile*" \) -print -exec hadolint --ignore DL3006 --ignore DL3007 --ignore DL3008 --ignore DL3013 --ignore DL3018 --ignore DL3016 {} \; > ${log_dir}/hadolint.log

 if [[ $(grep -c "error" ${log_dir}/hadolint.log) != 0 ]]; then
    $BOLD_RED && echo "Error!! Please Click on the artifact button to download and check error details." && $RESET
--- a/.github/workflows/scripts/docker_compose_clean_up.sh
+++ b/.github/workflows/scripts/docker_compose_clean_up.sh
@@ -30,13 +30,20 @@ case "$1" in
        echo "$ports"
        for port in $ports; do
          if [[ $port =~ [a-zA-Z_-] ]]; then
-            port=$(grep -E "export $port=" tests/$test_case | cut -d'=' -f2)
+            echo "Search port value $port from the test case..."
+            port_fix=$(grep -E "export $port=" tests/$test_case | cut -d'=' -f2)
+            if [[ "$port_fix" == "" ]]; then
+              echo "Can't find the port value from the test case, use the default value in yaml..."
+              port_fix=$(yq '.services[].ports[]' $yaml_file | grep $port | cut -d':' -f2 |  grep -o '[0-9a-zA-Z]\+')
+            fi
+            port=$port_fix
          fi
          if [[ $port =~ [0-9] ]]; then
            if [[ $port == 5000 ]]; then
              echo "Error: Port 5000 is used by local docker registry, please DO NOT use it in docker compose deployment!!!"
              exit 1
            fi
+            echo "Check port $port..."
            cid=$(docker ps --filter "publish=${port}" --format "{{.ID}}")
            if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && echo "release $port"; fi
          fi
--- a/.github/workflows/scripts/get_test_matrix.sh
+++ b/.github/workflows/scripts/get_test_matrix.sh
@@ -12,6 +12,7 @@ run_matrix="{\"include\":["

 examples=$(printf '%s\n' "${changed_files[@]}" | grep '/' | cut -d'/' -f1 | sort -u)
 for example in ${examples}; do
+    if [[ ! -d $WORKSPACE/$example ]]; then continue; fi
    cd $WORKSPACE/$example
    if [[ ! $(find . -type f | grep ${test_mode}) ]]; then continue; fi
    cd tests
@@ -26,7 +27,10 @@ for example in ${examples}; do

    run_hardware=""
    if [[ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | cut -d'/' -f2 | grep -E '\.py|Dockerfile*|ui|docker_image_build' ) ]]; then
-        # run test on all hardware if megaservice or ui code change
+        echo "run test on all hardware if megaservice or ui code change..."
+        run_hardware=$hardware_list
+    elif [[ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | grep 'tests'| cut -d'/' -f3 | grep -vE '^test_|^_test' ) ]]; then
+        echo "run test on all hardware if common test scripts change..."
        run_hardware=$hardware_list
    else
        for hardware in ${hardware_list}; do
--- a/.github/workflows/weekly-example-test.yml
+++ b/.github/workflows/weekly-example-test.yml
@@ -0,0 +1,55 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Weekly test all examples on multiple HWs
+
+on:
+  schedule:
+    - cron: "30 2 * * 6" # UTC time
+  workflow_dispatch:
+
+env:
+  EXAMPLES: ${{ vars.NIGHTLY_RELEASE_EXAMPLES }}
+  NODES: "gaudi,xeon,rocm,arc"
+
+jobs:
+  get-test-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      examples: ${{ steps.get-matrix.outputs.examples }}
+      nodes: ${{ steps.get-matrix.outputs.nodes }}
+    steps:
+      - name: Create Matrix
+        id: get-matrix
+        run: |
+          examples=($(echo ${EXAMPLES} | tr ',' ' '))
+          examples_json=$(printf '%s\n' "${examples[@]}" | sort -u | jq -R '.' | jq -sc '.')
+          echo "examples=$examples_json" >> $GITHUB_OUTPUT
+          nodes=($(echo ${NODES} | tr ',' ' '))
+          nodes_json=$(printf '%s\n' "${nodes[@]}" | sort -u | jq -R '.' | jq -sc '.')
+          echo "nodes=$nodes_json" >> $GITHUB_OUTPUT
+
+  build-comps-base:
+    needs: [get-test-matrix]
+    strategy:
+      matrix:
+        node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
+    uses: ./.github/workflows/_build_comps_base_image.yml
+    with:
+      node: ${{ matrix.node }}
+
+  run-examples:
+    needs: [get-test-matrix, build-comps-base]
+    strategy:
+      matrix:
+        example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }}
+        node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
+      fail-fast: false
+    uses: ./.github/workflows/_example-workflow.yml
+    with:
+      node: ${{ matrix.node }}
+      example: ${{ matrix.example }}
+      build: true
+      test_compose: true
+      test_helmchart: true
+    secrets: inherit
--- a/.github/workflows/weekly-update-images.yml
+++ b/.github/workflows/weekly-update-images.yml
@@ -1,11 +1,9 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-name: Weekly update base images and 3rd party images
+name: Weekly update 3rd party images

 on:
-  schedule:
-    - cron: "0 0 * * 0"
  workflow_dispatch:

 permissions:
@@ -16,8 +14,8 @@ jobs:
  freeze-images:
    runs-on: ubuntu-latest
    env:
-      USER_NAME: "NeuralChatBot"
-      USER_EMAIL: "grp_neural_chat_bot@intel.com"
+      USER_NAME: "CICD-at-OPEA"
+      USER_EMAIL: "CICD@opea.dev"
      BRANCH_NAME: "update_images_tag"
    steps:
      - name: Checkout repository
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -74,7 +74,7 @@ repos:
        name: Unused noqa

  - repo: https://github.com/pycqa/isort
-    rev: 5.13.2
+    rev: 6.0.1
    hooks:
      - id: isort

@@ -100,7 +100,7 @@ repos:
          - prettier@3.2.5

  - repo: https://github.com/psf/black.git
-    rev: 24.10.0
+    rev: 25.1.0
    hooks:
      - id: black
        files: (.*\.py)$
@@ -114,7 +114,7 @@ repos:
          - black==24.10.0

  - repo: https://github.com/codespell-project/codespell
-    rev: v2.3.0
+    rev: v2.4.1
    hooks:
      - id: codespell
        args: [-w]
@@ -122,7 +122,7 @@ repos:
          - tomli

  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.6
+    rev: v0.11.4
    hooks:
      - id: ruff
        args: [--fix, --exit-non-zero-on-fix, --no-cache]
--- a/AgentQnA/README.md
+++ b/AgentQnA/README.md
@@ -1,8 +1,17 @@
 # Agents for Question Answering

+## Table of contents
+
+1. [Overview](#overview)
+2. [Deploy with Docker](#deploy-with-docker)
+3. [How to interact with the agent system with UI](#how-to-interact-with-the-agent-system-with-ui)
+4. [Validate Services](#validate-services)
+5. [Register Tools](#how-to-register-other-tools-with-the-ai-agent)
+6. [Monitoring and Tracing](#monitor-and-tracing)
+
 ## Overview

-This example showcases a hierarchical multi-agent system for question-answering applications. The architecture diagram is shown below. The supervisor agent interfaces with the user and dispatch tasks to two worker agents to gather information and come up with answers. The worker RAG agent uses the retrieval tool to retrieve relevant documents from the knowledge base (a vector database). The worker SQL agent retrieve relevant data from the SQL database. Although not included in this example, but other tools such as a web search tool or a knowledge graph query tool can be used by the supervisor agent to gather information from additional sources.
+This example showcases a hierarchical multi-agent system for question-answering applications. The architecture diagram below shows a supervisor agent that interfaces with the user and dispatches tasks to two worker agents to gather information and come up with answers. The worker RAG agent uses the retrieval tool to retrieve relevant documents from a knowledge base - a vector database. The worker SQL agent retrieves relevant data from a SQL database. Although not included in this example by default, other tools such as a web search tool or a knowledge graph query tool can be used by the supervisor agent to gather information from additional sources.
 ![Architecture Overview](assets/img/agent_qna_arch.png)

 The AgentQnA example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
@@ -75,201 +84,178 @@ flowchart LR

 ```

-### Why Agent for question answering?
+### Why should AI Agents be used for question-answering?

-1. Improve relevancy of retrieved context.
-   RAG agent can rephrase user queries, decompose user queries, and iterate to get the most relevant context for answering user's questions. Compared to conventional RAG, RAG agent can significantly improve the correctness and relevancy of the answer.
-2. Expand scope of the agent.
-   The supervisor agent can interact with multiple worker agents that specialize in different domains with different skills (e.g., retrieve documents, write SQL queries, etc.), and thus can answer questions in multiple domains.
-3. Hierarchical multi-agents can improve performance.
-   Expert worker agents, such as RAG agent and SQL agent, can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information together to provide a comprehensive answer. If we only use one agent and provide all the tools to this single agent, it may get overwhelmed and not able to provide accurate answers.
+1. **Improve relevancy of retrieved context.**
+   RAG agents can rephrase user queries, decompose user queries, and iterate to get the most relevant context for answering a user's question. Compared to conventional RAG, RAG agents significantly improve the correctness and relevancy of the answer because of the iterations it goes through.
+2. **Expand scope of skills.**
+   The supervisor agent interacts with multiple worker agents that specialize in different skills (e.g., retrieve documents, write SQL queries, etc.). Thus, it can answer questions with different methods.
+3. **Hierarchical multi-agents improve performance.**
+   Expert worker agents, such as RAG agents and SQL agents, can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information to provide a comprehensive answer. If only one agent is used and all tools are provided to this single agent, it can lead to large overhead or not use the best tool to provide accurate answers.

-## Deployment with docker
+## Deploy with docker

-1. Build agent docker image [Optional]
+### 1. Set up environment </br>

-> [!NOTE]
-> the step is optional. The docker images will be automatically pulled when running the docker compose commands. This step is only needed if pulling images failed.
-
-First, clone the opea GenAIComps repo.
+#### First, clone the `GenAIExamples` repo.

 ```
 export WORKDIR=<your-work-directory>
 cd $WORKDIR
-git clone https://github.com/opea-project/GenAIComps.git
+git clone https://github.com/opea-project/GenAIExamples.git
 ```

-Then build the agent docker image. Both the supervisor agent and the worker agent will use the same docker image, but when we launch the two agents we will specify different strategies and register different tools.
+#### Second, set up environment variables.
+
+##### For proxy environments only

 ```
-cd GenAIComps
-docker build -t opea/agent:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/src/Dockerfile .
+export http_proxy="Your_HTTP_Proxy"
+export https_proxy="Your_HTTPs_Proxy"
+# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+export no_proxy="Your_No_Proxy"
 ```

-2. Set up environment for this example </br>
+##### For using open-source llms

-   First, clone this repo.
+```
+export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
+export HF_CACHE_DIR=<directory-where-llms-are-downloaded> #so that no need to redownload every time
+```

-   ```
-   export WORKDIR=<your-work-directory>
-   cd $WORKDIR
-   git clone https://github.com/opea-project/GenAIExamples.git
-   ```
+##### [Optional] OPANAI_API_KEY to use OpenAI models

-   Second, set up env vars.
+```
+export OPENAI_API_KEY=<your-openai-key>
+```

-   ```
-   # Example: host_ip="192.168.1.1" or export host_ip="External_Public_IP"
-   export host_ip=$(hostname -I | awk '{print $1}')
-   # if you are in a proxy environment, also set the proxy-related environment variables
-   export http_proxy="Your_HTTP_Proxy"
-   export https_proxy="Your_HTTPs_Proxy"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy"
+#### Third, set up environment variables for the selected hardware using the corresponding `set_env.sh`

-   export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
-   # for using open-source llms
-   export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
-   export HF_CACHE_DIR=<directory-where-llms-are-downloaded> #so that no need to redownload every time
+##### Gaudi

-   # optional: OPANAI_API_KEY if you want to use OpenAI models
-   export OPENAI_API_KEY=<your-openai-key>
-   ```
+```
+source $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+```

-3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
+##### Xeon

-   First, launch the mega-service.
+```
+source $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
+```

-   ```
-   cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
-   bash launch_retrieval_tool.sh
-   ```
+### 2. Launch the multi-agent system. </br>

-   Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
+We make it convenient to launch the whole system with docker compose, which includes microservices for LLM, agents, UI, retrieval tool, vector database, dataprep, and telemetry. There are 3 docker compose files, which make it easy for users to pick and choose. Users can choose a different retrieval tool other than the `DocIndexRetriever` example provided in our GenAIExamples repo. Users can choose not to launch the telemetry containers.

-   ```
-   bash run_ingest_data.sh
-   ```
+#### Launch on Gaudi

-4. Prepare SQL database
-   In this example, we will use the Chinook SQLite database. Run the commands below.
+On Gaudi, `meta-llama/Meta-Llama-3.3-70B-Instruct` will be served using vllm. The command below will launch the multi-agent system with the `DocIndexRetriever` as the retrieval tool for the Worker RAG agent.

-   ```
-   # Download data
-   cd $WORKDIR
-   git clone https://github.com/lerocha/chinook-database.git
-   cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite $WORKDIR/GenAIExamples/AgentQnA/tests/
-   ```
+```bash
+cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/
+docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml up -d
+```

-5. Launch other tools. </br>
-   In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
+> **Note**: To enable the web search tool, skip this step and proceed to the "[Optional] Web Search Tool Support" section.

-   ```
-   docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
-   ```
+To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
+Gaudi example with Open Telemetry feature:

-6. Launch multi-agent system. </br>
-   We provide two options for `llm_engine` of the agents: 1. open-source LLMs on Intel Gaudi2, 2. OpenAI models via API calls.
+```bash
+cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/
+docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml -f compose.telemetry.yaml up -d
+```

-   ::::{tab-set}
-   :::{tab-item} Gaudi
-   :sync: Gaudi
+##### [Optional] Web Search Tool Support

-   On Gaudi2 we will serve `meta-llama/Meta-Llama-3.1-70B-Instruct` using vllm.
+<details>
+<summary> Instructions </summary>
+A web search tool is supported in this example and can be enabled by running docker compose with the `compose.webtool.yaml` file.
+The Google Search API is used. Follow the [instructions](https://python.langchain.com/docs/integrations/tools/google_search) to create an API key and enable the Custom Search API on a Google account. The environment variables `GOOGLE_CSE_ID` and `GOOGLE_API_KEY` need to be set.

-   First build vllm-gaudi docker image.
+```bash
+cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/
+export GOOGLE_CSE_ID="YOUR_ID"
+export GOOGLE_API_KEY="YOUR_API_KEY"
+docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml -f compose.webtool.yaml up -d
+```

-   ```bash
-   cd $WORKDIR
-   git clone https://github.com/vllm-project/vllm.git
-   cd ./vllm
-   git checkout v0.6.6
-   docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
-   ```
+</details>

-   Then launch vllm on Gaudi2 with the command below.
+#### Launch on Xeon

-   ```bash
-   vllm_port=8086
-   model="meta-llama/Meta-Llama-3.1-70B-Instruct"
-   docker run -d --runtime=habana --rm --name "vllm-gaudi-server" -e HABANA_VISIBLE_DEVICES=0,1,2,3 -p $vllm_port:8000 -v $vllm_volume:/data -e HF_TOKEN=$HF_TOKEN -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_HOME=/data -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e VLLM_SKIP_WARMUP=true --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model ${model} --max-seq-len-to-capture 16384 --tensor-parallel-size 4
-   ```
+On Xeon, only OpenAI models are supported. The command below will launch the multi-agent system with the `DocIndexRetriever` as the retrieval tool for the Worker RAG agent.

-   Then launch Agent microservices.
+```bash
+export OPENAI_API_KEY=<your-openai-key>
+cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
+docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose_openai.yaml up -d
+```

-   ```bash
-   cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/
-   bash launch_agent_service_gaudi.sh
-   ```
+### 3. Ingest Data into the vector database

-   :::
-   :::{tab-item} Xeon
-   :sync: Xeon
+The `run_ingest_data.sh` script will use an example jsonl file to ingest example documents into a vector database. Other ways to ingest data and other types of documents supported can be found in the OPEA dataprep microservice located in the opea-project/GenAIComps repo.

-   To use OpenAI models, run commands below.
+```bash
+cd  $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool/
+bash run_ingest_data.sh
+```

-   ```
-   export OPENAI_API_KEY=<your-openai-key>
-   cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
-   bash launch_agent_service_openai.sh
-   ```
+> **Note**: This is a one-time operation.

-   :::
-   ::::
+## How to interact with the agent system with UI

-## Deploy using Helm Chart
+The UI microservice is launched in the previous step with the other microservices.
+To see the UI, open a web browser to `http://${ip_address}:5173` to access the UI. Note the `ip_address` here is the host IP of the UI microservice.
+
+1. `create Admin Account` with a random value
+2. add opea agent endpoint `http://$ip_address:9090/v1` which is a openai compatible api
+
+![opea-agent-setting](assets/img/opea-agent-setting.png)
+
+3. test opea agent with ui
+
+![opea-agent-test](assets/img/opea-agent-test.png)
+
+## [Optional] Deploy using Helm Charts

 Refer to the [AgentQnA helm chart](./kubernetes/helm/README.md) for instructions on deploying AgentQnA on Kubernetes.

-## Validate services
+## Validate Services

-First look at logs of the agent docker containers:
+1. First look at logs for each of the agent docker containers:

-```
+```bash
 # worker RAG agent
 docker logs rag-agent-endpoint

 # worker SQL agent
 docker logs sql-agent-endpoint
-```

-```
 # supervisor agent
 docker logs react-agent-endpoint
 ```

-You should see something like "HTTP server setup successful" if the docker containers are started successfully.</p>
+Look for the message "HTTP server setup successful" to confirm the agent docker container has started successfully.</p>

-Second, validate worker RAG agent:
+2. Use python to validate each agent is working properly:

-```
-curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "messages": "Michael Jackson song Thriller"
-    }'
+```bash
+# RAG worker agent
+python $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "Tell me about Michael Jackson song Thriller" --agent_role "worker" --ext_port 9095
+
+# SQL agent
+python $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "How many employees in company" --agent_role "worker" --ext_port 9096
+
+# supervisor agent: this will test a two-turn conversation
+python $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --agent_role "supervisor" --ext_port 9090
 ```

-Third, validate worker SQL agent:
+## How to register other tools with the AI agent

-```
-curl http://${host_ip}:9096/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "messages": "How many employees are in the company"
-    }'
-```
+The [tools](./tools) folder contains YAML and Python files for additional tools for the supervisor and worker agents. Refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/src/README.md) to add tools and customize the AI agents.

-Finally, validate supervisor agent:
+## Monitor and Tracing

-```
-curl http://${host_ip}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "messages": "How many albums does Iron Maiden have?"
-    }'
-```
-
-## Deploy AgentQnA UI
-
-The AgentQnA UI can be deployed locally or using Docker.
-
-For detailed instructions on deploying AgentQnA UI, refer to the [AgentQnA UI Guide](./ui/svelte/README.md).
-
-## How to register your own tools with agent
-
-You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/src/README.md).
+Follow [OpenTelemetry OPEA Guide](https://opea-project.github.io/latest/tutorial/OpenTelemetry/OpenTelemetry_OPEA_Guide.html) to understand how to use OpenTelemetry tracing and metrics in OPEA.  
+For AgentQnA specific tracing and metrics monitoring, follow [OpenTelemetry on AgentQnA](https://opea-project.github.io/latest/tutorial/OpenTelemetry/deploy/AgentQnA.html) section.
--- a/AgentQnA/assets/img/opea-agent-setting.png
+++ b/AgentQnA/assets/img/opea-agent-setting.png
--- a/AgentQnA/assets/img/opea-agent-test.png
+++ b/AgentQnA/assets/img/opea-agent-test.png
--- a/AgentQnA/docker_compose/amd/gpu/rocm/README.md
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/README.md
@@ -1,101 +1,342 @@
-# Single node on-prem deployment with Docker Compose on AMD GPU
+# Build Mega Service of AgentQnA on AMD ROCm GPU

-This example showcases a hierarchical multi-agent system for question-answering applications. We deploy the example on Xeon. For LLMs, we use OpenAI models via API calls. For instructions on using open-source LLMs, please refer to the deployment guide [here](../../../../README.md).
+## Build Docker Images

-## Deployment with docker
+### 1. Build Docker Image

-1. First, clone this repo.
-   ```
-   export WORKDIR=<your-work-directory>
-   cd $WORKDIR
-   git clone https://github.com/opea-project/GenAIExamples.git
-   ```
-2. Set up environment for this example </br>
+- #### Create application install directory and go to it:

-   ```
-   # Example: host_ip="192.168.1.1" or export host_ip="External_Public_IP"
-   export host_ip=$(hostname -I | awk '{print $1}')
-   # if you are in a proxy environment, also set the proxy-related environment variables
-   export http_proxy="Your_HTTP_Proxy"
-   export https_proxy="Your_HTTPs_Proxy"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy"
+  ```bash
+  mkdir ~/agentqna-install && cd agentqna-install
+  ```

-   export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
-   #OPANAI_API_KEY if you want to use OpenAI models
-   export OPENAI_API_KEY=<your-openai-key>
-   # Set AMD GPU settings
-   export AGENTQNA_CARD_ID="card1"
-   export AGENTQNA_RENDER_ID="renderD136"
-   ```
+- #### Clone the repository GenAIExamples (the default repository branch "main" is used here):

-3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
+  ```bash
+  git clone https://github.com/opea-project/GenAIExamples.git
+  ```

-   First, launch the mega-service.
+  If you need to use a specific branch/tag of the GenAIExamples repository, then (v1.3 replace with its own value):

-   ```
-   cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
-   bash launch_retrieval_tool.sh
-   ```
+  ```bash
+  git clone https://github.com/opea-project/GenAIExamples.git && cd GenAIExamples && git checkout v1.3
+  ```

-   Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
+  We remind you that when using a specific version of the code, you need to use the README from this version:

-   ```
-   bash run_ingest_data.sh
-   ```
+- #### Go to build directory:

-4. Launch Tool service
-   In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
-   ```
-   docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
-   ```
-5. Launch `Agent` service
+  ```bash
+  cd ~/agentqna-install/GenAIExamples/AgentQnA/docker_image_build
+  ```

-   ```
-   cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/amd/gpu/rocm
-   bash launch_agent_service_tgi_rocm.sh
-   ```
+- Cleaning up the GenAIComps repository if it was previously cloned in this directory.
+  This is necessary if the build was performed earlier and the GenAIComps folder exists and is not empty:

-6. [Optional] Build `Agent` docker image if pulling images failed.
+  ```bash
+  echo Y | rm -R GenAIComps
+  ```

-   ```
-   git clone https://github.com/opea-project/GenAIComps.git
-   cd GenAIComps
-   docker build -t opea/agent:latest -f comps/agent/src/Dockerfile .
-   ```
+- #### Clone the repository GenAIComps (the default repository branch "main" is used here):

-## Validate services
-
-First look at logs of the agent docker containers:
-
-```
-# worker agent
-docker logs rag-agent-endpoint
+```bash
+git clone https://github.com/opea-project/GenAIComps.git
 ```

-```
-# supervisor agent
-docker logs react-agent-endpoint
+We remind you that when using a specific version of the code, you need to use the README from this version.
+
+- #### Setting the list of images for the build (from the build file.yaml)
+
+  If you want to deploy a vLLM-based or TGI-based application, then the set of services is installed as follows:
+
+  #### vLLM-based application
+
+  ```bash
+  service_list="vllm-rocm agent agent-ui"
+  ```
+
+  #### TGI-based application
+
+  ```bash
+  service_list="agent agent-ui"
+  ```
+
+- #### Optional. Pull TGI Docker Image (Do this if you want to use TGI)
+
+  ```bash
+  docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  ```
+
+- #### Build Docker Images
+
+  ```bash
+  docker compose -f build.yaml build ${service_list} --no-cache
+  ```
+
+- #### Build DocIndexRetriever Docker Images
+
+  ```bash
+  cd ~/agentqna-install/GenAIExamples/DocIndexRetriever/docker_image_build/
+  git clone https://github.com/opea-project/GenAIComps.git
+  service_list="doc-index-retriever dataprep embedding retriever reranking"
+  docker compose -f build.yaml build ${service_list} --no-cache
+  ```
+
+- #### Pull DocIndexRetriever Docker Images
+
+  ```bash
+  docker pull redis/redis-stack:7.2.0-v9
+  docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+  ```
+
+  After the build, we check the list of images with the command:
+
+  ```bash
+  docker image ls
+  ```
+
+  The list of images should include:
+
+  ##### vLLM-based application:
+
+  - opea/vllm-rocm:latest
+  - opea/agent:latest
+  - redis/redis-stack:7.2.0-v9
+  - ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+  - opea/embedding:latest
+  - opea/retriever:latest
+  - opea/reranking:latest
+  - opea/doc-index-retriever:latest
+
+  ##### TGI-based application:
+
+  - ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  - opea/agent:latest
+  - redis/redis-stack:7.2.0-v9
+  - ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+  - opea/embedding:latest
+  - opea/retriever:latest
+  - opea/reranking:latest
+  - opea/doc-index-retriever:latest
+
+---
+
+## Deploy the AgentQnA Application
+
+### Docker Compose Configuration for AMD GPUs
+
+To enable GPU support for AMD GPUs, the following configuration is added to the Docker Compose file:
+
+- compose_vllm.yaml - for vLLM-based application
+- compose.yaml - for TGI-based
+
+```yaml
+shm_size: 1g
+devices:
+  - /dev/kfd:/dev/kfd
+  - /dev/dri:/dev/dri
+cap_add:
+  - SYS_PTRACE
+group_add:
+  - video
+security_opt:
+  - seccomp:unconfined
 ```

-You should see something like "HTTP server setup successful" if the docker containers are started successfully.</p>
+This configuration forwards all available GPUs to the container. To use a specific GPU, specify its `cardN` and `renderN` device IDs. For example:

-Second, validate worker agent:
-
-```
-curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "query": "Most recent album by Taylor Swift"
-    }'
+```yaml
+shm_size: 1g
+devices:
+  - /dev/kfd:/dev/kfd
+  - /dev/dri/card0:/dev/dri/card0
+  - /dev/dri/render128:/dev/dri/render128
+cap_add:
+  - SYS_PTRACE
+group_add:
+  - video
+security_opt:
+  - seccomp:unconfined
 ```

-Third, validate supervisor agent:
+**How to Identify GPU Device IDs:**
+Use AMD GPU driver utilities to determine the correct `cardN` and `renderN` IDs for your GPU.

-```
-curl http://${host_ip}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "query": "Most recent album by Taylor Swift"
-    }'
+### Set deploy environment variables
+
+#### Setting variables in the operating system environment:
+
+```bash
+### Replace the string 'server_address' with your local server IP address
+export host_ip='server_address'
+### Replace the string 'your_huggingfacehub_token' with your HuggingFacehub repository access token.
+export HUGGINGFACEHUB_API_TOKEN='your_huggingfacehub_token'
+### Replace the string 'your_langchain_api_key' with your LANGCHAIN API KEY.
+export LANGCHAIN_API_KEY='your_langchain_api_key'
+export LANGCHAIN_TRACING_V2=""
 ```

-## How to register your own tools with agent
+### Start the services:

-You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/src/README.md).
+#### If you use vLLM
+
+```bash
+cd ~/agentqna-install/GenAIExamples/AgentQnA/docker_compose/amd/gpu/rocm
+bash launch_agent_service_vllm_rocm.sh
+```
+
+#### If you use TGI
+
+```bash
+cd ~/agentqna-install/GenAIExamples/AgentQnA/docker_compose/amd/gpu/rocm
+bash launch_agent_service_tgi_rocm.sh
+```
+
+All containers should be running and should not restart:
+
+##### If you use vLLM:
+
+- dataprep-redis-server
+- doc-index-retriever-server
+- embedding-server
+- rag-agent-endpoint
+- react-agent-endpoint
+- redis-vector-db
+- reranking-tei-xeon-server
+- retriever-redis-server
+- sql-agent-endpoint
+- tei-embedding-server
+- tei-reranking-server
+- vllm-service
+
+##### If you use TGI:
+
+- dataprep-redis-server
+- doc-index-retriever-server
+- embedding-server
+- rag-agent-endpoint
+- react-agent-endpoint
+- redis-vector-db
+- reranking-tei-xeon-server
+- retriever-redis-server
+- sql-agent-endpoint
+- tei-embedding-server
+- tei-reranking-server
+- tgi-service
+
+---
+
+## Validate the Services
+
+### 1. Validate the vLLM/TGI Service
+
+#### If you use vLLM:
+
+```bash
+DATA='{"model": "Intel/neural-chat-7b-v3-3t", '\
+'"messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 256}'
+
+curl http://${HOST_IP}:${VLLM_SERVICE_PORT}/v1/chat/completions \
+  -X POST \
+  -d "$DATA" \
+  -H 'Content-Type: application/json'
+```
+
+Checking the response from the service. The response should be similar to JSON:
+
+```json
+{
+  "id": "chatcmpl-142f34ef35b64a8db3deedd170fed951",
+  "object": "chat.completion",
+  "created": 1742270316,
+  "model": "Intel/neural-chat-7b-v3-3",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "",
+        "tool_calls": []
+      },
+      "logprobs": null,
+      "finish_reason": "length",
+      "stop_reason": null
+    }
+  ],
+  "usage": { "prompt_tokens": 66, "total_tokens": 322, "completion_tokens": 256, "prompt_tokens_details": null },
+  "prompt_logprobs": null
+}
+```
+
+If the service response has a meaningful response in the value of the "choices.message.content" key,
+then we consider the vLLM service to be successfully launched
+
+#### If you use TGI:
+
+```bash
+DATA='{"inputs":"What is Deep Learning?",'\
+'"parameters":{"max_new_tokens":256,"do_sample": true}}'
+
+curl http://${HOST_IP}:${TGI_SERVICE_PORT}/generate \
+  -X POST \
+  -d "$DATA" \
+  -H 'Content-Type: application/json'
+```
+
+Checking the response from the service. The response should be similar to JSON:
+
+```json
+{
+  "generated_text": " "
+}
+```
+
+If the service response has a meaningful response in the value of the "generated_text" key,
+then we consider the TGI service to be successfully launched
+
+### 2. Validate Agent Services
+
+#### Validate Rag Agent Service
+
+```bash
+export agent_port=${WORKER_RAG_AGENT_PORT}
+prompt="Tell me about Michael Jackson song Thriller"
+python3 ~/agentqna-install/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port
+```
+
+The response must contain the meaningful text of the response to the request from the "prompt" variable
+
+#### Validate Sql Agent Service
+
+```bash
+export agent_port=${WORKER_SQL_AGENT_PORT}
+prompt="How many employees are there in the company?"
+python3 ~/agentqna-install/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port
+```
+
+The answer should make sense - "8 employees in the company"
+
+#### Validate React (Supervisor) Agent Service
+
+```bash
+export agent_port=${SUPERVISOR_REACT_AGENT_PORT}
+python3 ~/agentqna-install/GenAIExamples/AgentQnA/tests/test.py --agent_role "supervisor" --ext_port $agent_port --stream
+```
+
+The response should contain "Iron Maiden"
+
+### 3. Stop application
+
+#### If you use vLLM
+
+```bash
+cd ~/agentqna-install/GenAIExamples/AgentQnA/docker_compose/amd/gpu/rocm
+bash stop_agent_service_vllm_rocm.sh
+```
+
+#### If you use TGI
+
+```bash
+cd ~/agentqna-install/GenAIExamples/AgentQnA/docker_compose/amd/gpu/rocm
+bash stop_agent_service_tgi_rocm.sh
+```
--- a/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml
@@ -1,26 +1,24 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2025 Advanced Micro Devices, Inc.

 services:
-  agent-tgi-server:
-    image: ${AGENTQNA_TGI_IMAGE}
-    container_name: agent-tgi-server
+  tgi-service:
+    image: ghcr.io/huggingface/text-generation-inference:3.0.0-rocm
+    container_name: tgi-service
    ports:
-      - "${AGENTQNA_TGI_SERVICE_PORT-8085}:80"
+      - "${TGI_SERVICE_PORT-8085}:80"
    volumes:
-      - /var/opea/agent-service/:/data
+      - "${MODEL_CACHE:-./data}:/data"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      TGI_LLM_ENDPOINT: "http://${HOST_IP}:${AGENTQNA_TGI_SERVICE_PORT}"
+      TGI_LLM_ENDPOINT: "http://${ip_address}:${TGI_SERVICE_PORT}"
      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-    shm_size: 1g
+    shm_size: 32g
    devices:
      - /dev/kfd:/dev/kfd
-      - /dev/dri/${AGENTQNA_CARD_ID}:/dev/dri/${AGENTQNA_CARD_ID}
-      - /dev/dri/${AGENTQNA_RENDER_ID}:/dev/dri/${AGENTQNA_RENDER_ID}
+      - /dev/dri:/dev/dri
    cap_add:
      - SYS_PTRACE
    group_add:
@@ -31,17 +29,17 @@ services:
    command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192

  worker-rag-agent:
-    image: opea/agent:latest
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
    container_name: rag-agent-endpoint
    volumes:
-      # - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
-      - ${TOOLSET_PATH}:/home/user/tools/
+      - "${TOOLSET_PATH}:/home/user/tools/"
    ports:
-      - "9095:9095"
+      - "${WORKER_RAG_AGENT_PORT:-9095}:9095"
    ipc: host
    environment:
      ip_address: ${ip_address}
      strategy: rag_agent_llama
+      with_memory: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: tgi
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -61,21 +59,49 @@ services:
      LANGCHAIN_PROJECT: "opea-worker-agent-service"
      port: 9095

-  supervisor-react-agent:
-    image: opea/agent:latest
-    container_name: react-agent-endpoint
-    depends_on:
-      - agent-tgi-server
-      - worker-rag-agent
+  worker-sql-agent:
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
+    container_name: sql-agent-endpoint
    volumes:
-      # - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
-      - ${TOOLSET_PATH}:/home/user/tools/
+      - "${WORKDIR}/tests/Chinook_Sqlite.sqlite:/home/user/chinook-db/Chinook_Sqlite.sqlite:rw"
    ports:
-      - "${AGENTQNA_FRONTEND_PORT}:9090"
+      - "${WORKER_SQL_AGENT_PORT:-9096}:9096"
    ipc: host
    environment:
      ip_address: ${ip_address}
-      strategy: react_langgraph
+      strategy: sql_agent_llama
+      with_memory: false
+      db_name: ${db_name}
+      db_path: ${db_path}
+      use_hints: false
+      recursion_limit: ${recursion_limit_worker}
+      llm_engine: vllm
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      llm_endpoint_url: ${LLM_ENDPOINT_URL}
+      model: ${LLM_MODEL_ID}
+      temperature: ${temperature}
+      max_new_tokens: ${max_new_tokens}
+      stream: false
+      require_human_feedback: false
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      port: 9096
+
+  supervisor-react-agent:
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
+    container_name: react-agent-endpoint
+    depends_on:
+      - worker-rag-agent
+    volumes:
+      - "${TOOLSET_PATH}:/home/user/tools/"
+    ports:
+      - "${SUPERVISOR_REACT_AGENT_PORT:-9090}:9090"
+    ipc: host
+    environment:
+      ip_address: ${ip_address}
+      strategy: react_llama
+      with_memory: true
      recursion_limit: ${recursion_limit_supervisor}
      llm_engine: tgi
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -83,7 +109,7 @@ services:
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
      max_new_tokens: ${max_new_tokens}
-      stream: false
+      stream: true
      tools: /home/user/tools/supervisor_agent_tools.yaml
      require_human_feedback: false
      no_proxy: ${no_proxy}
@@ -92,6 +118,7 @@ services:
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-supervisor-agent-service"
-      CRAG_SERVER: $CRAG_SERVER
-      WORKER_AGENT_URL: $WORKER_AGENT_URL
+      CRAG_SERVER: ${CRAG_SERVER}
+      WORKER_AGENT_URL: ${WORKER_AGENT_URL}
+      SQL_AGENT_URL: ${SQL_AGENT_URL}
      port: 9090
--- a/AgentQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
@@ -0,0 +1,128 @@
+# Copyright (C) 2025 Advanced Micro Devices, Inc.
+
+services:
+  vllm-service:
+    image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
+    container_name: vllm-service
+    ports:
+      - "${VLLM_SERVICE_PORT:-8081}:8011"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      WILM_USE_TRITON_FLASH_ATTENTION: 0
+      PYTORCH_JIT: 0
+    volumes:
+      - "${MODEL_CACHE:-./data}:/data"
+    shm_size: 20G
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri/:/dev/dri/
+    cap_add:
+      - SYS_PTRACE
+    group_add:
+      - video
+    security_opt:
+      - seccomp:unconfined
+      - apparmor=unconfined
+    command: "--model ${VLLM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
+    ipc: host
+
+  worker-rag-agent:
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
+    container_name: rag-agent-endpoint
+    volumes:
+      - ${TOOLSET_PATH}:/home/user/tools/
+    ports:
+      - "${WORKER_RAG_AGENT_PORT:-9095}:9095"
+    ipc: host
+    environment:
+      ip_address: ${ip_address}
+      strategy: rag_agent_llama
+      with_memory: false
+      recursion_limit: ${recursion_limit_worker}
+      llm_engine: vllm
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      llm_endpoint_url: ${LLM_ENDPOINT_URL}
+      model: ${LLM_MODEL_ID}
+      temperature: ${temperature}
+      max_new_tokens: ${max_new_tokens}
+      stream: false
+      tools: /home/user/tools/worker_agent_tools.yaml
+      require_human_feedback: false
+      RETRIEVAL_TOOL_URL: ${RETRIEVAL_TOOL_URL}
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-worker-agent-service"
+      port: 9095
+
+  worker-sql-agent:
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
+    container_name: sql-agent-endpoint
+    volumes:
+      - "${WORKDIR}/tests/Chinook_Sqlite.sqlite:/home/user/chinook-db/Chinook_Sqlite.sqlite:rw"
+    ports:
+      - "${WORKER_SQL_AGENT_PORT:-9096}:9096"
+    ipc: host
+    environment:
+      ip_address: ${ip_address}
+      strategy: sql_agent_llama
+      with_memory: false
+      db_name: ${db_name}
+      db_path: ${db_path}
+      use_hints: false
+      recursion_limit: ${recursion_limit_worker}
+      llm_engine: vllm
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      llm_endpoint_url: ${LLM_ENDPOINT_URL}
+      model: ${LLM_MODEL_ID}
+      temperature: ${temperature}
+      max_new_tokens: ${max_new_tokens}
+      stream: false
+      require_human_feedback: false
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      port: 9096
+
+  supervisor-react-agent:
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
+    container_name: react-agent-endpoint
+    depends_on:
+      - worker-rag-agent
+    volumes:
+      - ${TOOLSET_PATH}:/home/user/tools/
+    ports:
+      - "${SUPERVISOR_REACT_AGENT_PORT:-9090}:9090"
+    ipc: host
+    environment:
+      ip_address: ${ip_address}
+      strategy: react_llama
+      with_memory: true
+      recursion_limit: ${recursion_limit_supervisor}
+      llm_engine: vllm
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      llm_endpoint_url: ${LLM_ENDPOINT_URL}
+      model: ${LLM_MODEL_ID}
+      temperature: ${temperature}
+      max_new_tokens: ${max_new_tokens}
+      stream: true
+      tools: /home/user/tools/supervisor_agent_tools.yaml
+      require_human_feedback: false
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-supervisor-agent-service"
+      CRAG_SERVER: ${CRAG_SERVER}
+      WORKER_AGENT_URL: ${WORKER_AGENT_URL}
+      SQL_AGENT_URL: ${SQL_AGENT_URL}
+      port: 9090
--- a/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh
@@ -1,47 +1,87 @@
 # Copyright (C) 2024 Advanced Micro Devices, Inc.
 # SPDX-License-Identifier: Apache-2.0

-WORKPATH=$(dirname "$PWD")/..
-export ip_address=${host_ip}
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
-export AGENTQNA_TGI_IMAGE=ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
-export AGENTQNA_TGI_SERVICE_PORT="8085"
+# Before start script:
+# export host_ip="your_host_ip_or_host_name"
+# export HUGGINGFACEHUB_API_TOKEN="your_huggingface_api_token"
+# export LANGCHAIN_API_KEY="your_langchain_api_key"
+# export LANGCHAIN_TRACING_V2=""

-# LLM related environment variables
-export AGENTQNA_CARD_ID="card1"
-export AGENTQNA_RENDER_ID="renderD136"
-export HF_CACHE_DIR=${HF_CACHE_DIR}
-ls $HF_CACHE_DIR
-export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
-#export NUM_SHARDS=4
-export LLM_ENDPOINT_URL="http://${ip_address}:${AGENTQNA_TGI_SERVICE_PORT}"
+# Set server hostname or IP address
+export ip_address=${host_ip}
+
+# Set services IP ports
+export TGI_SERVICE_PORT="18110"
+export WORKER_RAG_AGENT_PORT="18111"
+export WORKER_SQL_AGENT_PORT="18112"
+export SUPERVISOR_REACT_AGENT_PORT="18113"
+export CRAG_SERVER_PORT="18114"
+
+export WORKPATH=$(dirname "$PWD")
+export WORKDIR=${WORKPATH}/../../../
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export HF_CACHE_DIR="./data"
+export MODEL_CACHE="./data"
+export TOOLSET_PATH=${WORKPATH}/../../../tools/
+export recursion_limit_worker=12
+export LLM_ENDPOINT_URL=http://${ip_address}:${TGI_SERVICE_PORT}
 export temperature=0.01
 export max_new_tokens=512
-
-# agent related environment variables
-export AGENTQNA_WORKER_AGENT_SERVICE_PORT="9095"
-export TOOLSET_PATH=/home/huggingface/datamonsters/amd-opea/GenAIExamples/AgentQnA/tools/
-echo "TOOLSET_PATH=${TOOLSET_PATH}"
+export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
+export LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY}
+export LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2}
+export db_name=Chinook
+export db_path="sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
 export recursion_limit_worker=12
 export recursion_limit_supervisor=10
-export WORKER_AGENT_URL="http://${ip_address}:${AGENTQNA_WORKER_AGENT_SERVICE_PORT}/v1/chat/completions"
-export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
-export CRAG_SERVER=http://${ip_address}:18881
-
-export AGENTQNA_FRONTEND_PORT="9090"
-
-#retrieval_tool
+export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
+export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
+export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
+export HF_CACHE_DIR=${HF_CACHE_DIR}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export no_proxy=${no_proxy}
+export http_proxy=${http_proxy}
+export https_proxy=${https_proxy}
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
 export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
-export REDIS_URL="redis://${host_ip}:26379"
+export REDIS_URL="redis://${host_ip}:6379"
 export INDEX_NAME="rag-redis"
+export RERANK_TYPE="tei"
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export RERANK_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
 export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get"
-export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete"

+echo ${WORKER_RAG_AGENT_PORT} > ${WORKPATH}/WORKER_RAG_AGENT_PORT_tmp
+echo ${WORKER_SQL_AGENT_PORT} > ${WORKPATH}/WORKER_SQL_AGENT_PORT_tmp
+echo ${SUPERVISOR_REACT_AGENT_PORT} > ${WORKPATH}/SUPERVISOR_REACT_AGENT_PORT_tmp
+echo ${CRAG_SERVER_PORT} > ${WORKPATH}/CRAG_SERVER_PORT_tmp
+
+echo "Downloading chinook data..."
+echo Y | rm -R chinook-database
+git clone https://github.com/lerocha/chinook-database.git
+echo Y | rm -R ../../../../../AgentQnA/tests/Chinook_Sqlite.sqlite
+cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite ../../../../../AgentQnA/tests
+
+docker compose -f ../../../../../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml up -d
 docker compose -f compose.yaml up -d
+
+n=0
+until [[ "$n" -ge 100 ]]; do
+    docker logs tgi-service > ${WORKPATH}/tgi_service_start.log
+    if grep -q Connected ${WORKPATH}/tgi_service_start.log; then
+        break
+    fi
+    sleep 10s
+    n=$((n+1))
+done
+
+echo "Starting CRAG server"
+docker run -d --runtime=runc --name=kdd-cup-24-crag-service -p=${CRAG_SERVER_PORT}:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
--- a/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_vllm_rocm.sh
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_vllm_rocm.sh
@@ -0,0 +1,88 @@
+# Copyright (C) 2024 Advanced Micro Devices, Inc.
+# SPDX-License-Identifier: Apache-2.0
+
+# Before start script:
+# export host_ip="your_host_ip_or_host_name"
+# export HUGGINGFACEHUB_API_TOKEN="your_huggingface_api_token"
+# export LANGCHAIN_API_KEY="your_langchain_api_key"
+# export LANGCHAIN_TRACING_V2=""
+
+# Set server hostname or IP address
+export ip_address=${host_ip}
+
+# Set services IP ports
+export VLLM_SERVICE_PORT="18110"
+export WORKER_RAG_AGENT_PORT="18111"
+export WORKER_SQL_AGENT_PORT="18112"
+export SUPERVISOR_REACT_AGENT_PORT="18113"
+export CRAG_SERVER_PORT="18114"
+
+export WORKPATH=$(dirname "$PWD")
+export WORKDIR=${WORKPATH}/../../../
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export VLLM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export HF_CACHE_DIR="./data"
+export MODEL_CACHE="./data"
+export TOOLSET_PATH=${WORKPATH}/../../../tools/
+export recursion_limit_worker=12
+export LLM_ENDPOINT_URL=http://${ip_address}:${VLLM_SERVICE_PORT}
+export LLM_MODEL_ID=${VLLM_LLM_MODEL_ID}
+export temperature=0.01
+export max_new_tokens=512
+export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
+export LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY}
+export LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2}
+export db_name=Chinook
+export db_path="sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
+export recursion_limit_worker=12
+export recursion_limit_supervisor=10
+export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
+export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
+export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
+export HF_CACHE_DIR=${HF_CACHE_DIR}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export no_proxy=${no_proxy}
+export http_proxy=${http_proxy}
+export https_proxy=${https_proxy}
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
+export RERANK_TYPE="tei"
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete"
+
+echo ${WORKER_RAG_AGENT_PORT} > ${WORKPATH}/WORKER_RAG_AGENT_PORT_tmp
+echo ${WORKER_SQL_AGENT_PORT} > ${WORKPATH}/WORKER_SQL_AGENT_PORT_tmp
+echo ${SUPERVISOR_REACT_AGENT_PORT} > ${WORKPATH}/SUPERVISOR_REACT_AGENT_PORT_tmp
+echo ${CRAG_SERVER_PORT} > ${WORKPATH}/CRAG_SERVER_PORT_tmp
+
+echo "Downloading chinook data..."
+echo Y | rm -R chinook-database
+git clone https://github.com/lerocha/chinook-database.git
+echo Y | rm -R ../../../../../AgentQnA/tests/Chinook_Sqlite.sqlite
+cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite ../../../../../AgentQnA/tests
+
+docker compose -f ../../../../../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml up -d
+docker compose -f compose_vllm.yaml up -d
+
+n=0
+until [[ "$n" -ge 500 ]]; do
+    docker logs vllm-service >& "${WORKPATH}"/vllm-service_start.log
+    if grep -q "Application startup complete" "${WORKPATH}"/vllm-service_start.log; then
+        break
+    fi
+    sleep 20s
+    n=$((n+1))
+done
+
+echo "Starting CRAG server"
+docker run -d --runtime=runc --name=kdd-cup-24-crag-service -p=${CRAG_SERVER_PORT}:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
--- a/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_tgi_rocm.sh
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_tgi_rocm.sh
@@ -6,7 +6,7 @@
 WORKPATH=$(dirname "$PWD")/..
 export ip_address=${host_ip}
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
-export AGENTQNA_TGI_IMAGE=ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+export AGENTQNA_TGI_IMAGE=ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
 export AGENTQNA_TGI_SERVICE_PORT="19001"

 # LLM related environment variables
@@ -14,7 +14,7 @@ export AGENTQNA_CARD_ID="card1"
 export AGENTQNA_RENDER_ID="renderD136"
 export HF_CACHE_DIR=${HF_CACHE_DIR}
 ls $HF_CACHE_DIR
-export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export NUM_SHARDS=4
 export LLM_ENDPOINT_URL="http://${ip_address}:${AGENTQNA_TGI_SERVICE_PORT}"
 export temperature=0.01
@@ -44,3 +44,19 @@ export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
 export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
 export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get"
 export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete"
+
+echo "Removing chinook data..."
+echo Y | rm -R chinook-database
+if [ -d "chinook-database" ]; then
+    rm -rf chinook-database
+fi
+echo "Chinook data removed!"
+
+echo "Stopping CRAG server"
+docker rm kdd-cup-24-crag-service --force
+
+echo "Stopping Agent services"
+docker compose -f compose.yaml down
+
+echo "Stopping Retrieval services"
+docker compose -f ../../../../../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml down
--- a/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_vllm_rocm.sh
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_vllm_rocm.sh
@@ -0,0 +1,84 @@
+# Copyright (C) 2024 Advanced Micro Devices, Inc.
+# SPDX-License-Identifier: Apache-2.0
+
+
+# Before start script:
+# export host_ip="your_host_ip_or_host_name"
+# export HUGGINGFACEHUB_API_TOKEN="your_huggingface_api_token"
+# export LANGCHAIN_API_KEY="your_langchain_api_key"
+# export LANGCHAIN_TRACING_V2=""
+
+# Set server hostname or IP address
+export ip_address=${host_ip}
+
+# Set services IP ports
+export VLLM_SERVICE_PORT="18110"
+export WORKER_RAG_AGENT_PORT="18111"
+export WORKER_SQL_AGENT_PORT="18112"
+export SUPERVISOR_REACT_AGENT_PORT="18113"
+export CRAG_SERVER_PORT="18114"
+
+export WORKPATH=$(dirname "$PWD")
+export WORKDIR=${WORKPATH}/../../../
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export VLLM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export HF_CACHE_DIR="./data"
+export MODEL_CACHE="./data"
+export TOOLSET_PATH=${WORKPATH}/../../../tools/
+export recursion_limit_worker=12
+export LLM_ENDPOINT_URL=http://${ip_address}:${VLLM_SERVICE_PORT}
+export LLM_MODEL_ID=${VLLM_LLM_MODEL_ID}
+export temperature=0.01
+export max_new_tokens=512
+export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
+export LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY}
+export LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2}
+export db_name=Chinook
+export db_path="sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
+export recursion_limit_worker=12
+export recursion_limit_supervisor=10
+export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
+export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
+export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
+export HF_CACHE_DIR=${HF_CACHE_DIR}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export no_proxy=${no_proxy}
+export http_proxy=${http_proxy}
+export https_proxy=${https_proxy}
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
+export RERANK_TYPE="tei"
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete"
+
+echo ${WORKER_RAG_AGENT_PORT} > ${WORKPATH}/WORKER_RAG_AGENT_PORT_tmp
+echo ${WORKER_SQL_AGENT_PORT} > ${WORKPATH}/WORKER_SQL_AGENT_PORT_tmp
+echo ${SUPERVISOR_REACT_AGENT_PORT} > ${WORKPATH}/SUPERVISOR_REACT_AGENT_PORT_tmp
+echo ${CRAG_SERVER_PORT} > ${WORKPATH}/CRAG_SERVER_PORT_tmp
+
+echo "Removing chinook data..."
+echo Y | rm -R chinook-database
+if [ -d "chinook-database" ]; then
+    rm -rf chinook-database
+fi
+echo "Chinook data removed!"
+
+echo "Stopping CRAG server"
+docker rm kdd-cup-24-crag-service --force
+
+echo "Stopping Agent services"
+docker compose -f compose_vllm.yaml down
+
+echo "Stopping Retrieval services"
+docker compose -f ../../../../../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml down
--- a/AgentQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/README.md
@@ -1,123 +1,3 @@
 # Single node on-prem deployment with Docker Compose on Xeon Scalable processors

-This example showcases a hierarchical multi-agent system for question-answering applications. We deploy the example on Xeon. For LLMs, we use OpenAI models via API calls. For instructions on using open-source LLMs, please refer to the deployment guide [here](../../../../README.md).
-
-## Deployment with docker
-
-1. First, clone this repo.
-   ```
-   export WORKDIR=<your-work-directory>
-   cd $WORKDIR
-   git clone https://github.com/opea-project/GenAIExamples.git
-   ```
-2. Set up environment for this example </br>
-
-   ```
-   # Example: host_ip="192.168.1.1" or export host_ip="External_Public_IP"
-   export host_ip=$(hostname -I | awk '{print $1}')
-   # if you are in a proxy environment, also set the proxy-related environment variables
-   export http_proxy="Your_HTTP_Proxy"
-   export https_proxy="Your_HTTPs_Proxy"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy"
-
-   export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
-   #OPANAI_API_KEY if you want to use OpenAI models
-   export OPENAI_API_KEY=<your-openai-key>
-   ```
-
-3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
-
-   First, launch the mega-service.
-
-   ```
-   cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
-   bash launch_retrieval_tool.sh
-   ```
-
-   Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
-
-   ```
-   bash run_ingest_data.sh
-   ```
-
-4. Prepare SQL database
-   In this example, we will use the SQLite database provided in the [TAG-Bench](https://github.com/TAG-Research/TAG-Bench/tree/main). Run the commands below.
-
-   ```
-   # Download data
-   cd $WORKDIR
-   git clone https://github.com/TAG-Research/TAG-Bench.git
-   cd TAG-Bench/setup
-   chmod +x get_dbs.sh
-   ./get_dbs.sh
-   ```
-
-5. Launch Tool service
-   In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
-   ```
-   docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
-   ```
-6. Launch multi-agent system
-
-   The configurations of the supervisor agent and the worker agents are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM.
-
-   ```
-   cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
-   bash launch_agent_service_openai.sh
-   ```
-
-7. [Optional] Build `Agent` docker image if pulling images failed.
-
-   ```
-   git clone https://github.com/opea-project/GenAIComps.git
-   cd GenAIComps
-   docker build -t opea/agent:latest -f comps/agent/src/Dockerfile .
-   ```
-
-## Validate services
-
-First look at logs of the agent docker containers:
-
-```
-# worker RAG agent
-docker logs rag-agent-endpoint
-
-# worker SQL agent
-docker logs sql-agent-endpoint
-```
-
-```
-# supervisor agent
-docker logs react-agent-endpoint
-```
-
-You should see something like "HTTP server setup successful" if the docker containers are started successfully.</p>
-
-Second, validate worker RAG agent:
-
-```
-curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "messages": "Michael Jackson song Thriller"
-    }'
-```
-
-Third, validate worker SQL agent:
-
-```
-curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "messages": "How many employees are in the company?"
-    }'
-```
-
-Finally, validate supervisor agent:
-
-```
-curl http://${host_ip}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "messages": "How many albums does Iron Maiden have?"
-    }'
-```
-
-## How to register your own tools with agent
-
-You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/src/README.md).
+This example showcases a hierarchical multi-agent system for question-answering applications. To deploy the example on Xeon, OpenAI LLM models via API calls are used. For instructions, refer to the deployment guide [here](../../../../README.md).
--- a/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
@@ -13,6 +13,7 @@ services:
    environment:
      ip_address: ${ip_address}
      strategy: rag_agent
+      with_memory: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: openai
      OPENAI_API_KEY: ${OPENAI_API_KEY}
@@ -35,17 +36,17 @@ services:
    image: opea/agent:latest
    container_name: sql-agent-endpoint
    volumes:
-      - ${WORKDIR}/TAG-Bench/:/home/user/TAG-Bench # SQL database
+      - ${WORKDIR}/GenAIExamples/AgentQnA/tests:/home/user/chinook-db # SQL database
    ports:
      - "9096:9096"
    ipc: host
    environment:
      ip_address: ${ip_address}
      strategy: sql_agent
+      with_memory: false
      db_name: ${db_name}
      db_path: ${db_path}
      use_hints: false
-      hints_file: /home/user/TAG-Bench/${db_name}_hints.csv
      recursion_limit: ${recursion_limit_worker}
      llm_engine: openai
      OPENAI_API_KEY: ${OPENAI_API_KEY}
@@ -64,6 +65,7 @@ services:
    container_name: react-agent-endpoint
    depends_on:
      - worker-rag-agent
+      - worker-sql-agent
    volumes:
      - ${TOOLSET_PATH}:/home/user/tools/
    ports:
@@ -71,14 +73,15 @@ services:
    ipc: host
    environment:
      ip_address: ${ip_address}
-      strategy: react_langgraph
+      strategy: react_llama
+      with_memory: true
      recursion_limit: ${recursion_limit_supervisor}
      llm_engine: openai
      OPENAI_API_KEY: ${OPENAI_API_KEY}
      model: ${model}
      temperature: ${temperature}
      max_new_tokens: ${max_new_tokens}
-      stream: false
+      stream: true
      tools: /home/user/tools/supervisor_agent_tools.yaml
      require_human_feedback: false
      no_proxy: ${no_proxy}
@@ -89,4 +92,21 @@ services:
      LANGCHAIN_PROJECT: "opea-supervisor-agent-service"
      CRAG_SERVER: $CRAG_SERVER
      WORKER_AGENT_URL: $WORKER_AGENT_URL
+      SQL_AGENT_URL: $SQL_AGENT_URL
      port: 9090
+  mock-api:
+    image: docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
+    container_name: mock-api
+    ports:
+      - "8080:8000"
+    ipc: host
+  agent-ui:
+    image: opea/agent-ui
+    container_name: agent-ui
+    ports:
+      - "5173:8080"
+    ipc: host
+
+networks:
+  default:
+    driver: bridge
--- a/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh
@@ -1,22 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-pushd "../../../../../" > /dev/null
-source .set_env.sh
-popd > /dev/null
-export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
-export ip_address=$(hostname -I | awk '{print $1}')
-export recursion_limit_worker=12
-export recursion_limit_supervisor=10
-export model="gpt-4o-mini-2024-07-18"
-export temperature=0
-export max_new_tokens=4096
-export OPENAI_API_KEY=${OPENAI_API_KEY}
-export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
-export SQL_AGENT_URL="http://${ip_address}:9096/v1/chat/completions"
-export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
-export CRAG_SERVER=http://${ip_address}:8080
-export db_name=california_schools
-export db_path="sqlite:////home/user/TAG-Bench/dev_folder/dev_databases/${db_name}/${db_name}.sqlite"
-
-docker compose -f compose_openai.yaml up -d
--- a/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -0,0 +1,57 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+pushd "../../../../../" > /dev/null
+source .set_env.sh
+popd > /dev/null
+
+if [[ -z "${WORKDIR}" ]]; then
+	echo "Please set WORKDIR environment variable"
+	exit 0
+fi
+echo "WORKDIR=${WORKDIR}"
+export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
+export ip_address=$(hostname -I | awk '{print $1}')
+export recursion_limit_worker=12
+export recursion_limit_supervisor=10
+export model="gpt-4o-mini-2024-07-18"
+export temperature=0
+export max_new_tokens=4096
+export OPENAI_API_KEY=${OPENAI_API_KEY}
+export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
+export SQL_AGENT_URL="http://${ip_address}:9096/v1/chat/completions"
+export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
+export CRAG_SERVER=http://${ip_address}:8080
+export db_name=Chinook
+export db_path="sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
+
+if [ ! -f $WORKDIR/GenAIExamples/AgentQnA/tests/Chinook_Sqlite.sqlite ]; then
+    echo "Download Chinook_Sqlite!"
+    wget  -O $WORKDIR/GenAIExamples/AgentQnA/tests/Chinook_Sqlite.sqlite  https://github.com/lerocha/chinook-database/releases/download/v1.4.5/Chinook_Sqlite.sqlite
+fi
+
+# retriever
+export host_ip=$(hostname -I | awk '{print $1}')
+export HF_CACHE_DIR=${HF_CACHE_DIR}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export no_proxy=${no_proxy}
+export http_proxy=${http_proxy}
+export https_proxy=${https_proxy}
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
+export RERANK_TYPE="tei"
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete"
+
+
+export no_proxy="$no_proxy,rag-agent-endpoint,sql-agent-endpoint,react-agent-endpoint,agent-ui"
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -1,147 +1,3 @@
 # Single node on-prem deployment AgentQnA on Gaudi

-This example showcases a hierarchical multi-agent system for question-answering applications. We deploy the example on Gaudi using open-source LLMs.
-For more details, please refer to the deployment guide [here](../../../../README.md).
-
-## Deployment with docker
-
-1. First, clone this repo.
-   ```
-   export WORKDIR=<your-work-directory>
-   cd $WORKDIR
-   git clone https://github.com/opea-project/GenAIExamples.git
-   ```
-2. Set up environment for this example </br>
-
-   ```
-   # Example: host_ip="192.168.1.1" or export host_ip="External_Public_IP"
-   export host_ip=$(hostname -I | awk '{print $1}')
-   # if you are in a proxy environment, also set the proxy-related environment variables
-   export http_proxy="Your_HTTP_Proxy"
-   export https_proxy="Your_HTTPs_Proxy"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy"
-
-   export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
-   # for using open-source llms
-   export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
-   # Example export HF_CACHE_DIR=$WORKDIR so that no need to redownload every time
-   export HF_CACHE_DIR=<directory-where-llms-are-downloaded>
-
-   ```
-
-3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
-
-   First, launch the mega-service.
-
-   ```
-   cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
-   bash launch_retrieval_tool.sh
-   ```
-
-   Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
-
-   ```
-   bash run_ingest_data.sh
-   ```
-
-4. Prepare SQL database
-   In this example, we will use the Chinook SQLite database. Run the commands below.
-
-   ```
-   # Download data
-   cd $WORKDIR
-   git clone https://github.com/lerocha/chinook-database.git
-   cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite $WORKDIR/GenAIExamples/AgentQnA/tests/
-   ```
-
-5. Launch Tool service
-   In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
-   ```
-   docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
-   ```
-6. Launch multi-agent system
-
-   On Gaudi2 we will serve `meta-llama/Meta-Llama-3.1-70B-Instruct` using vllm.
-
-   First build vllm-gaudi docker image.
-
-   ```bash
-   cd $WORKDIR
-   git clone https://github.com/vllm-project/vllm.git
-   cd ./vllm
-   git checkout v0.6.6
-   docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
-   ```
-
-   Then launch vllm on Gaudi2 with the command below.
-
-   ```bash
-   vllm_port=8086
-   model="meta-llama/Meta-Llama-3.1-70B-Instruct"
-   docker run -d --runtime=habana --rm --name "vllm-gaudi-server" -e HABANA_VISIBLE_DEVICES=0,1,2,3 -p $vllm_port:8000 -v $vllm_volume:/data -e HF_TOKEN=$HF_TOKEN -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_HOME=/data -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e VLLM_SKIP_WARMUP=true --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model ${model} --max-seq-len-to-capture 16384 --tensor-parallel-size 4
-   ```
-
-   Then launch Agent microservices.
-
-   ```bash
-   cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/
-   bash launch_agent_service_gaudi.sh
-   ```
-
-7. [Optional] Build `Agent` docker image if pulling images failed.
-
-   If docker image pulling failed in Step 6 above, build the agent docker image with the commands below. After image build, try Step 6 again.
-
-   ```
-   git clone https://github.com/opea-project/GenAIComps.git
-   cd GenAIComps
-   docker build -t opea/agent:latest -f comps/agent/src/Dockerfile .
-   ```
-
-## Validate services
-
-First look at logs of the agent docker containers:
-
-```
-# worker RAG agent
-docker logs rag-agent-endpoint
-
-# worker SQL agent
-docker logs sql-agent-endpoint
-```
-
-```
-# supervisor agent
-docker logs react-agent-endpoint
-```
-
-You should see something like "HTTP server setup successful" if the docker containers are started successfully.</p>
-
-Second, validate worker RAG agent:
-
-```
-curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "messages": "Michael Jackson song Thriller"
-    }'
-```
-
-Third, validate worker SQL agent:
-
-```
-curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "messages": "How many employees are in the company?"
-    }'
-```
-
-Finally, validate supervisor agent:
-
-```
-curl http://${host_ip}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "messages": "How many albums does Iron Maiden have?"
-    }'
-```
-
-## How to register your own tools with agent
-
-You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/src/README.md).
+This example showcases a hierarchical multi-agent system for question-answering applications. To deploy the example on Gaudi using open-source LLMs, refer to the deployment guide [here](../../../../README.md).
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.telemetry.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.telemetry.yaml
@@ -0,0 +1,93 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  tei-embedding-service:
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+  tei-reranking-service:
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+  jaeger:
+    image: jaegertracing/all-in-one:1.67.0
+    container_name: jaeger
+    ports:
+      - "16686:16686"
+      - "4317:4317"
+      - "4318:4318"
+      - "9411:9411"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      COLLECTOR_ZIPKIN_HOST_PORT: 9411
+    restart: unless-stopped
+  prometheus:
+    image: prom/prometheus:v2.52.0
+    container_name: prometheus
+    user: root
+    volumes:
+      - ./prometheus.yaml:/etc/prometheus/prometheus.yaml
+      - ./prometheus_data:/prometheus
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yaml'
+    ports:
+      - '9091:9090'
+    ipc: host
+    restart: unless-stopped
+  grafana:
+    image: grafana/grafana:11.0.0
+    container_name: grafana
+    volumes:
+      - ./grafana_data:/var/lib/grafana
+      - ./grafana/dashboards:/var/lib/grafana/dashboards
+      - ./grafana/provisioning:/etc/grafana/provisioning
+    user: root
+    environment:
+      GF_SECURITY_ADMIN_PASSWORD: admin
+      GF_RENDERING_CALLBACK_URL: http://grafana:3000/
+      GF_LOG_FILTERS: rendering:debug
+    depends_on:
+      - prometheus
+    ports:
+      - '3000:3000'
+    ipc: host
+    restart: unless-stopped
+  node-exporter:
+    image: prom/node-exporter
+    container_name: node-exporter
+    volumes:
+      - /proc:/host/proc:ro
+      - /sys:/host/sys:ro
+      - /:/rootfs:ro
+    command:
+      - '--path.procfs=/host/proc'
+      - '--path.sysfs=/host/sys'
+      - --collector.filesystem.ignored-mount-points
+      - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)"
+    ports:
+      - 9100:9100
+    restart: always
+    deploy:
+      mode: global
+  gaudi-exporter:
+    image: vault.habana.ai/gaudi-metric-exporter/metric-exporter:1.19.2-32
+    container_name: gaudi-exporter
+    volumes:
+      - /proc:/host/proc:ro
+      - /sys:/host/sys:ro
+      - /:/rootfs:ro
+      - /dev:/dev
+    ports:
+      - 41612:41611
+    restart: always
+    deploy:
+      mode: global
+  worker-rag-agent:
+    environment:
+      - TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
+  worker-sql-agent:
+    environment:
+      - TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
+  supervisor-react-agent:
+    environment:
+      - TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.webtool.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.webtool.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  supervisor-react-agent:
+    environment:
+      - tools=/home/user/tools/supervisor_agent_webtools.yaml
+      - GOOGLE_CSE_ID=${GOOGLE_CSE_ID}
+      - GOOGLE_API_KEY=${GOOGLE_API_KEY}
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -3,7 +3,7 @@

 services:
  worker-rag-agent:
-    image: opea/agent:latest
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
    container_name: rag-agent-endpoint
    volumes:
      - ${TOOLSET_PATH}:/home/user/tools/
@@ -13,6 +13,7 @@ services:
    environment:
      ip_address: ${ip_address}
      strategy: rag_agent_llama
+      with_memory: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -33,7 +34,7 @@ services:
      port: 9095

  worker-sql-agent:
-    image: opea/agent:latest
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
    container_name: sql-agent-endpoint
    volumes:
      - ${WORKDIR}/GenAIExamples/AgentQnA/tests:/home/user/chinook-db # test db
@@ -43,6 +44,7 @@ services:
    environment:
      ip_address: ${ip_address}
      strategy: sql_agent_llama
+      with_memory: false
      db_name: ${db_name}
      db_path: ${db_path}
      use_hints: false
@@ -61,7 +63,7 @@ services:
      port: 9096

  supervisor-react-agent:
-    image: opea/agent:latest
+    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
    container_name: react-agent-endpoint
    depends_on:
      - worker-rag-agent
@@ -74,6 +76,7 @@ services:
    environment:
      ip_address: ${ip_address}
      strategy: react_llama
+      with_memory: true
      recursion_limit: ${recursion_limit_supervisor}
      llm_engine: vllm
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -81,7 +84,7 @@ services:
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
      max_new_tokens: ${max_new_tokens}
-      stream: false
+      stream: true
      tools: /home/user/tools/supervisor_agent_tools.yaml
      require_human_feedback: false
      no_proxy: ${no_proxy}
@@ -94,3 +97,45 @@ services:
      WORKER_AGENT_URL: $WORKER_AGENT_URL
      SQL_AGENT_URL: $SQL_AGENT_URL
      port: 9090
+  mock-api:
+    image: docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
+    container_name: mock-api
+    ports:
+      - "8080:8000"
+    ipc: host
+  agent-ui:
+    image: ${REGISTRY:-opea}/agent-ui:${TAG:-latest}
+    container_name: agent-ui
+    environment:
+      host_ip: ${host_ip}
+    ports:
+      - "5173:8080"
+    ipc: host
+  vllm-service:
+    image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
+    container_name: vllm-gaudi-server
+    ports:
+      - "8086:8000"
+    volumes:
+      - "${MODEL_CACHE:-./data}:/data"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      VLLM_TORCH_PROFILER_DIR: "/mnt"
+      VLLM_SKIP_WARMUP: true
+      PT_HPU_ENABLE_LAZY_COLLECTIVES: true
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://$host_ip:8086/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 100
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model $LLM_MODEL_ID --tensor-parallel-size 4 --host 0.0.0.0 --port 8000 --block-size 128 --max-num-seqs 256 --max-seq-len-to-capture 16384
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh
@@ -0,0 +1,10 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+rm *.json
+wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/chatqna_megaservice_grafana.json
+mv chatqna_megaservice_grafana.json agentqna_microervices_grafana.json
+wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/vllm_grafana.json
+wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/tgi_grafana.json
+wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/node_grafana.json
+wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/gaudi_grafana.json
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/grafana/provisioning/dashboards/local.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/grafana/provisioning/dashboards/local.yaml
@@ -0,0 +1,14 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: 1
+
+providers:
+- name: 'default'
+  orgId: 1
+  folder: ''
+  type: file
+  disableDeletion: false
+  updateIntervalSeconds: 10 #how often Grafana will scan for changed dashboards
+  options:
+    path: /var/lib/grafana/dashboards
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/grafana/provisioning/datasources/datasource.yml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/grafana/provisioning/datasources/datasource.yml
@@ -0,0 +1,54 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# config file version
+apiVersion: 1
+
+# list of datasources that should be deleted from the database
+deleteDatasources:
+  - name: Prometheus
+    orgId: 1
+
+# list of datasources to insert/update depending
+# what's available in the database
+datasources:
+  # <string, required> name of the datasource. Required
+- name: Prometheus
+  # <string, required> datasource type. Required
+  type: prometheus
+  # <string, required> access mode. direct or proxy. Required
+  access: proxy
+  # <int> org id. will default to orgId 1 if not specified
+  orgId: 1
+  # <string> url
+  url: http://prometheus:9090
+  # <string> database password, if used
+  password:
+  # <string> database user, if used
+  user:
+  # <string> database name, if used
+  database:
+  # <bool> enable/disable basic auth
+  basicAuth: false
+  # <string> basic auth username, if used
+  basicAuthUser:
+  # <string> basic auth password, if used
+  basicAuthPassword:
+  # <bool> enable/disable with credentials headers
+  withCredentials:
+  # <bool> mark as default datasource. Max one per org
+  isDefault: true
+  # <map> fields that will be converted to json and stored in json_data
+  jsonData:
+     httpMethod: GET
+     graphiteVersion: "1.1"
+     tlsAuth: false
+     tlsAuthWithCACert: false
+  # <string> json object of data that will be encrypted.
+  secureJsonData:
+    tlsCACert: "..."
+    tlsClientCert: "..."
+    tlsClientKey: "..."
+  version: 1
+  # <bool> allow users to edit datasources from the UI.
+  editable: true
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_gaudi.sh
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_gaudi.sh
@@ -1,36 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-pushd "../../../../../" > /dev/null
-source .set_env.sh
-popd > /dev/null
-WORKPATH=$(dirname "$PWD")/..
-# export WORKDIR=$WORKPATH/../../
-echo "WORKDIR=${WORKDIR}"
-export ip_address=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-
-# LLM related environment variables
-export HF_CACHE_DIR=${HF_CACHE_DIR}
-ls $HF_CACHE_DIR
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
-export NUM_SHARDS=4
-export LLM_ENDPOINT_URL="http://${ip_address}:8086"
-export temperature=0
-export max_new_tokens=4096
-
-# agent related environment variables
-export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
-echo "TOOLSET_PATH=${TOOLSET_PATH}"
-export recursion_limit_worker=12
-export recursion_limit_supervisor=10
-export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
-export SQL_AGENT_URL="http://${ip_address}:9096/v1/chat/completions"
-export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
-export CRAG_SERVER=http://${ip_address}:8080
-
-export db_name=Chinook
-export db_path="sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
-
-docker compose -f compose.yaml up -d
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/launch_tgi_gaudi.sh
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/launch_tgi_gaudi.sh
@@ -1,25 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-# LLM related environment variables
-export HF_CACHE_DIR=${HF_CACHE_DIR}
-ls $HF_CACHE_DIR
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
-export NUM_SHARDS=4
-
-docker compose -f tgi_gaudi.yaml up -d
-
-sleep 5s
-echo "Waiting tgi gaudi ready"
-n=0
-until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
-    docker logs tgi-server &> tgi-gaudi-service.log
-    n=$((n+1))
-    if grep -q Connected tgi-gaudi-service.log; then
-        break
-    fi
-    sleep 5s
-done
-sleep 5s
-echo "Service started successfully"
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/prometheus.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/prometheus.yaml
@@ -0,0 +1,55 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+global:
+  scrape_interval: 5s
+  external_labels:
+    monitor: "my-monitor"
+scrape_configs:
+  - job_name: "prometheus"
+    static_configs:
+      - targets: ["prometheus:9090"]
+  - job_name: "vllm"
+    metrics_path: /metrics
+    static_configs:
+      - targets: ["vllm-gaudi-server:8000"]
+  - job_name: "tgi"
+    metrics_path: /metrics
+    static_configs:
+      - targets: ["tgi-gaudi-server:80"]
+  - job_name: "tei-embedding"
+    metrics_path: /metrics
+    static_configs:
+      - targets: ["tei-embedding-server:80"]
+  - job_name: "tei-reranking"
+    metrics_path: /metrics
+    static_configs:
+      - targets: ["tei-reranking-server:80"]
+  - job_name: "retriever"
+    metrics_path: /metrics
+    static_configs:
+      - targets: ["retriever:7000"]
+  - job_name: "dataprep-redis-service"
+    metrics_path: /metrics
+    static_configs:
+      - targets: ["dataprep-redis-service:5000"]
+  - job_name: "prometheus-node-exporter"
+    metrics_path: /metrics
+    static_configs:
+      - targets: ["node-exporter:9100"]
+  - job_name: "prometheus-gaudi-exporter"
+    metrics_path: /metrics
+    static_configs:
+      - targets: ["gaudi-exporter:41611"]
+  - job_name: "supervisor-react-agent"
+    metrics_path: /metrics
+    static_configs:
+      - targets: ["react-agent-endpoint:9090"]
+  - job_name: "worker-rag-agent"
+    metrics_path: /metrics
+    static_configs:
+      - targets: ["rag-agent-endpoint:9095"]
+  - job_name: "worker-sql-agent"
+    metrics_path: /metrics
+    static_configs:
+      - targets: ["sql-agent-endpoint:9096"]
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -0,0 +1,72 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+pushd "../../../../../" > /dev/null
+source .set_env.sh
+popd > /dev/null
+WORKPATH=$(dirname "$PWD")/..
+# export WORKDIR=$WORKPATH/../../
+if [[ -z "${WORKDIR}" ]]; then
+	echo "Please set WORKDIR environment variable"
+	exit 0
+fi
+echo "WORKDIR=${WORKDIR}"
+export ip_address=$(hostname -I | awk '{print $1}')
+
+# LLM related environment variables
+export HF_CACHE_DIR=${HF_CACHE_DIR}
+ls $HF_CACHE_DIR
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
+export NUM_SHARDS=4
+export LLM_ENDPOINT_URL="http://${ip_address}:8086"
+export temperature=0
+export max_new_tokens=4096
+
+# agent related environment variables
+export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
+echo "TOOLSET_PATH=${TOOLSET_PATH}"
+export recursion_limit_worker=12
+export recursion_limit_supervisor=10
+export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
+export SQL_AGENT_URL="http://${ip_address}:9096/v1/chat/completions"
+export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
+export CRAG_SERVER=http://${ip_address}:8080
+
+export db_name=Chinook
+export db_path="sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
+if [ ! -f $WORKDIR/GenAIExamples/AgentQnA/tests/Chinook_Sqlite.sqlite ]; then
+    echo "Download Chinook_Sqlite!"
+    wget  -O $WORKDIR/GenAIExamples/AgentQnA/tests/Chinook_Sqlite.sqlite  https://github.com/lerocha/chinook-database/releases/download/v1.4.5/Chinook_Sqlite.sqlite
+fi
+
+# configure agent ui
+# echo "AGENT_URL = 'http://$ip_address:9090/v1/chat/completions'" | tee ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env
+
+# retriever
+export host_ip=$(hostname -I | awk '{print $1}')
+export no_proxy=${no_proxy}
+export http_proxy=${http_proxy}
+export https_proxy=${https_proxy}
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
+export RERANK_TYPE="tei"
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete"
+# Set OpenTelemetry Tracing Endpoint
+export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
+export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
+export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
+
+export no_proxy="$no_proxy,rag-agent-endpoint,sql-agent-endpoint,react-agent-endpoint,agent-ui,vllm-gaudi-server,jaeger,grafana,prometheus,node-exporter,gaudi-exporter,127.0.0.1,localhost,0.0.0.0,$host_ip,,$JAEGER_IP"
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-server:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
+    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
    container_name: tgi-server
    ports:
      - "8085:80"
--- a/AgentQnA/docker_image_build/build.yaml
+++ b/AgentQnA/docker_image_build/build.yaml
@@ -17,3 +17,15 @@ services:
      dockerfile: ./docker/Dockerfile
    extends: agent
    image: ${REGISTRY:-opea}/agent-ui:${TAG:-latest}
+  vllm-gaudi:
+    build:
+      context: vllm-fork
+      dockerfile: Dockerfile.hpu
+    extends: agent
+    image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
+  vllm-rocm:
+    build:
+      context: GenAIComps
+      dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu
+    extends: agent
+    image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
--- a/AgentQnA/kubernetes/helm/cpu-values.yaml
+++ b/AgentQnA/kubernetes/helm/cpu-values.yaml
@@ -0,0 +1,22 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+tgi:
+  enabled: false
+vllm:
+  enabled: true
+  LLM_MODEL_ID: "meta-llama/Meta-Llama-3-8B-Instruct"
+  extraCmdArgs: ["--max-seq-len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
+
+supervisor:
+  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Meta-Llama-3-8B-Instruct"
+ragagent:
+  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Meta-Llama-3-8B-Instruct"
+sqlagent:
+  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Meta-Llama-3-8B-Instruct"
--- a/AgentQnA/kubernetes/helm/gaudi-values.yaml
+++ b/AgentQnA/kubernetes/helm/gaudi-values.yaml
@@ -4,13 +4,32 @@
 # Accelerate inferencing in heaviest components to improve performance
 # by overriding their subchart values

+tgi:
+  enabled: false
 vllm:
  enabled: true
+  accelDevice: "gaudi"
  image:
    repository: opea/vllm-gaudi
+  resources:
+    limits:
+      habana.ai/gaudi: 4
+  LLM_MODEL_ID: "meta-llama/Llama-3.3-70B-Instruct"
+  OMPI_MCA_btl_vader_single_copy_mechanism: none
+  PT_HPU_ENABLE_LAZY_COLLECTIVES: true
+  VLLM_SKIP_WARMUP: true
+  shmSize: 16Gi
+  extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq-len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
+
 supervisor:
  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Llama-3.3-70B-Instruct"
 ragagent:
  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Llama-3.3-70B-Instruct"
 sqlagent:
  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+  llm_engine: vllm
+  model: "meta-llama/Llama-3.3-70B-Instruct"
--- a/AgentQnA/retrieval_tool/run_ingest_data.sh
+++ b/AgentQnA/retrieval_tool/run_ingest_data.sh
@@ -1,7 +1,22 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

+host_ip=$(hostname -I | awk '{print $1}')
+port=6007
 FILEDIR=${WORKDIR}/GenAIExamples/AgentQnA/example_data/
 FILENAME=test_docs_music.jsonl

-python3 index_data.py --filedir ${FILEDIR} --filename ${FILENAME} --host_ip $host_ip
+# AgentQnA ingestion script requires following packages
+packages=("requests" "tqdm")
+
+# Check if packages are installed
+for package in "${packages[@]}"; do
+  if pip freeze | grep -q "$package="; then
+    echo "$package is installed"
+  else
+    echo "$package is not installed"
+    pip install --no-cache-dir "$package"
+  fi
+done
+
+python3 index_data.py --filedir ${FILEDIR} --filename ${FILENAME} --host_ip $host_ip --port $port
--- a/AgentQnA/tests/_test_compose_openai_on_xeon.sh
+++ b/AgentQnA/tests/_test_compose_openai_on_xeon.sh
@@ -20,23 +20,30 @@ function stop_agent_and_api_server() {

 function stop_retrieval_tool() {
    echo "Stopping Retrieval tool"
-    docker compose -f $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool/docker/docker-compose-retrieval-tool.yaml down
+    local RETRIEVAL_TOOL_PATH=$WORKPATH/../DocIndexRetriever
+    cd $RETRIEVAL_TOOL_PATH/docker_compose/intel/cpu/xeon/
+    container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
+    for container_name in $container_list; do
+        cid=$(docker ps -aq --filter "name=$container_name")
+        echo "Stopping container $container_name"
+        if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
+    done
 }

 echo "=================== #1 Building docker images===================="
-bash 1_build_images.sh
+bash step1_build_images.sh xeon
 echo "=================== #1 Building docker images completed===================="

 echo "=================== #2 Start retrieval tool===================="
-bash 2_start_retrieval_tool.sh
+bash step2_start_retrieval_tool.sh
 echo "=================== #2 Retrieval tool started===================="

 echo "=================== #3 Ingest data and validate retrieval===================="
-bash 3_ingest_data_and_validate_retrieval.sh
+bash step3_ingest_data_and_validate_retrieval.sh
 echo "=================== #3 Data ingestion and validation completed===================="

 echo "=================== #4 Start agent and API server===================="
-bash 4_launch_and_validate_agent_openai.sh
+bash step4_launch_and_validate_agent_openai.sh
 echo "=================== #4 Agent test passed ===================="

 echo "=================== #5 Stop agent and API server===================="
--- a/AgentQnA/tests/step1_build_images.sh
+++ b/AgentQnA/tests/step1_build_images.sh
@@ -15,41 +15,52 @@ function get_genai_comps() {
    fi
 }

-
 function build_docker_images_for_retrieval_tool(){
    cd $WORKDIR/GenAIExamples/DocIndexRetriever/docker_image_build/
    get_genai_comps
    echo "Build all the images with --no-cache..."
-    service_list="doc-index-retriever dataprep embedding retriever reranking"
-    docker compose -f build.yaml build ${service_list} --no-cache
-    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-
+    docker compose -f build.yaml build --no-cache
    docker images && sleep 1s
 }

-function build_agent_docker_image() {
+function build_agent_docker_image_xeon() {
    cd $WORKDIR/GenAIExamples/AgentQnA/docker_image_build/
    get_genai_comps
+
    echo "Build agent image with --no-cache..."
-    docker compose -f build.yaml build --no-cache
+    service_list="agent agent-ui"
+    docker compose -f build.yaml build ${service_list} --no-cache
 }

-function build_vllm_docker_image() {
-    echo "Building the vllm docker image"
-    cd $WORKPATH
-    echo $WORKPATH
-    if [ ! -d "./vllm-fork" ]; then
-        git clone https://github.com/HabanaAI/vllm-fork.git
-    fi
-    cd ./vllm-fork
-    git checkout v0.6.4.post2+Gaudi-1.19.0
-    docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:ci --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
-    if [ $? -ne 0 ]; then
-        echo "opea/vllm-gaudi:ci failed"
-        exit 1
-    else
-        echo "opea/vllm-gaudi:ci successful"
-    fi
+function build_agent_docker_image_gaudi_vllm() {
+    cd $WORKDIR/GenAIExamples/AgentQnA/docker_image_build/
+    get_genai_comps
+
+    git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
+    VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
+    git checkout ${VLLM_VER} &> /dev/null && cd ../
+
+    echo "Build agent image with --no-cache..."
+    service_list="agent agent-ui vllm-gaudi"
+    docker compose -f build.yaml build ${service_list} --no-cache
+}
+
+function build_agent_docker_image_rocm() {
+    cd $WORKDIR/GenAIExamples/AgentQnA/docker_image_build/
+    get_genai_comps
+
+    echo "Build agent image with --no-cache..."
+    service_list="agent agent-ui"
+    docker compose -f build.yaml build ${service_list} --no-cache
+}
+
+function build_agent_docker_image_rocm_vllm() {
+    cd $WORKDIR/GenAIExamples/AgentQnA/docker_image_build/
+    get_genai_comps
+
+    echo "Build agent image with --no-cache..."
+    service_list="agent agent-ui vllm-rocm"
+    docker compose -f build.yaml build ${service_list} --no-cache
 }


@@ -58,15 +69,32 @@ function main() {
    build_docker_images_for_retrieval_tool
    echo "==================== Build docker images for retrieval tool completed ===================="

-    echo "==================== Build agent docker image ===================="
-    build_agent_docker_image
-    echo "==================== Build agent docker image completed ===================="
+    sleep 3s

-    echo "==================== Build vllm docker image ===================="
-    build_vllm_docker_image
-    echo "==================== Build vllm docker image completed ===================="
+    case $1 in
+        "rocm")
+            echo "==================== Build agent docker image for ROCm ===================="
+            build_agent_docker_image_rocm
+            ;;
+        "rocm_vllm")
+            echo "==================== Build agent docker image for ROCm VLLM ===================="
+            build_agent_docker_image_rocm_vllm
+            ;;
+        "gaudi_vllm")
+            echo "==================== Build agent docker image for Gaudi ===================="
+            build_agent_docker_image_gaudi_vllm
+            ;;
+        "xeon")
+            echo "==================== Build agent docker image for Xeon ===================="
+            build_agent_docker_image_xeon
+            ;;
+        *)
+            echo "Invalid argument"
+            exit 1
+            ;;
+    esac

    docker image ls | grep vllm
 }

-main
+main $1
--- a/AgentQnA/tests/step2_start_retrieval_tool.sh
+++ b/AgentQnA/tests/step2_start_retrieval_tool.sh
@@ -9,7 +9,7 @@ echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
 export host_ip=${ip_address}

-export HF_CACHE_DIR=$WORKDIR/hf_cache
+export HF_CACHE_DIR=${model_cache:-"$WORKDIR/hf_cache"}
 if [ ! -d "$HF_CACHE_DIR" ]; then
    echo "Creating HF_CACHE directory"
    mkdir -p "$HF_CACHE_DIR"
--- a/AgentQnA/tests/step2_start_retrieval_tool_rocm_vllm.sh
+++ b/AgentQnA/tests/step2_start_retrieval_tool_rocm_vllm.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+WORKPATH=$(dirname "$PWD")
+export WORKDIR=$WORKPATH/../
+echo "WORKDIR=${WORKDIR}"
+export ip_address=$(hostname -I | awk '{print $1}')
+export host_ip=${ip_address}
+
+export HF_CACHE_DIR=$WORKPATH/hf_cache
+if [ ! -d "$HF_CACHE_DIR" ]; then
+    echo "Creating HF_CACHE directory"
+    mkdir -p "$HF_CACHE_DIR"
+fi
+
+function start_retrieval_tool() {
+    echo "Starting Retrieval tool"
+    cd $WORKPATH/../DocIndexRetriever/docker_compose/intel/cpu/xeon
+    host_ip=$(hostname -I | awk '{print $1}')
+    export HF_CACHE_DIR=${HF_CACHE_DIR}
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export no_proxy=${no_proxy}
+    export http_proxy=${http_proxy}
+    export https_proxy=${https_proxy}
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+    export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
+    export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+    export REDIS_URL="redis://${host_ip}:6379"
+    export INDEX_NAME="rag-redis"
+    export RERANK_TYPE="tei"
+    export MEGA_SERVICE_HOST_IP=${host_ip}
+    export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+    export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+    export RERANK_SERVICE_HOST_IP=${host_ip}
+    export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
+    export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
+    export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get"
+    export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete"
+
+    docker compose -f compose.yaml up -d
+}
+
+echo "==================== Start retrieval tool ===================="
+start_retrieval_tool
+sleep 20 # needed for downloading the models
+echo "==================== Retrieval tool started ===================="
--- a/AgentQnA/tests/step3_ingest_data_and_validate_retrieval_rocm_vllm.sh
+++ b/AgentQnA/tests/step3_ingest_data_and_validate_retrieval_rocm_vllm.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+
+WORKPATH=$(dirname "$PWD")
+export WORKDIR=$WORKPATH/../../
+echo "WORKDIR=${WORKDIR}"
+export ip_address=$(hostname -I | awk '{print $1}')
+export host_ip=$ip_address
+echo "ip_address=${ip_address}"
+
+
+function validate() {
+    local CONTENT="$1"
+    local EXPECTED_RESULT="$2"
+    local SERVICE_NAME="$3"
+
+    if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
+        echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT"
+        echo 0
+    else
+        echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
+        echo 1
+    fi
+}
+
+function ingest_data_and_validate() {
+    echo "Ingesting data"
+    cd $WORKPATH/retrieval_tool/
+    echo $PWD
+    local CONTENT=$(bash run_ingest_data.sh)
+    local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-server")
+    echo "$EXIT_CODE"
+    local EXIT_CODE="${EXIT_CODE:0-1}"
+    echo "return value is $EXIT_CODE"
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs dataprep-redis-server
+        return 1
+    fi
+}
+
+function validate_retrieval_tool() {
+    echo "----------------Test retrieval tool ----------------"
+    local CONTENT=$(http_proxy="" curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
+     "text": "Who sang Thriller"
+    }')
+    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "retrieval-tool")
+
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs retrievaltool-xeon-backend-server
+        exit 1
+    fi
+}
+
+function main(){
+
+    echo "==================== Ingest data ===================="
+    ingest_data_and_validate
+    echo "==================== Data ingestion completed ===================="
+
+    echo "==================== Validate retrieval tool ===================="
+    validate_retrieval_tool
+    echo "==================== Retrieval tool validated ===================="
+}
+
+main
--- a/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh
+++ b/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh
@@ -8,12 +8,14 @@ WORKPATH=$(dirname "$PWD")
 export WORKDIR=$WORKPATH/../../
 echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
+export host_ip=$ip_address
+echo "ip_address=${ip_address}"
 export TOOLSET_PATH=$WORKPATH/tools/
 export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-model="meta-llama/Meta-Llama-3.1-70B-Instruct"
+model="meta-llama/Llama-3.3-70B-Instruct" #"meta-llama/Meta-Llama-3.1-70B-Instruct"

-export HF_CACHE_DIR=/data2/huggingface
+export HF_CACHE_DIR=${model_cache:-"/data2/huggingface"}
 if [ ! -d "$HF_CACHE_DIR" ]; then
    HF_CACHE_DIR=$WORKDIR/hf_cache
    mkdir -p "$HF_CACHE_DIR"
@@ -24,21 +26,23 @@ ls $HF_CACHE_DIR
 vllm_port=8086
 vllm_volume=${HF_CACHE_DIR}

-function start_tgi(){
-    echo "Starting tgi-gaudi server"
-    cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
-    bash launch_tgi_gaudi.sh

+function start_agent_service() {
+    echo "Starting agent service"
+    cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
+    source set_env.sh
+    docker compose -f compose.yaml up -d
 }

-function start_vllm_service_70B() {
+function start_all_services() {

    echo "token is ${HF_TOKEN}"

    echo "start vllm gaudi service"
    echo "**************model is $model**************"
-    vllm_image=opea/vllm-gaudi:ci
-    docker run -d --runtime=habana --rm --name "vllm-gaudi-server" -e HABANA_VISIBLE_DEVICES=0,1,2,3 -p $vllm_port:8000 -v $vllm_volume:/data -e HF_TOKEN=$HF_TOKEN -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_HOME=/data -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e VLLM_SKIP_WARMUP=true --cap-add=sys_nice --ipc=host $vllm_image --model ${model} --max-seq-len-to-capture 16384 --tensor-parallel-size 4
+    cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
+    source set_env.sh
+    docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml -f compose.telemetry.yaml up -d
    sleep 5s
    echo "Waiting vllm gaudi ready"
    n=0
@@ -60,23 +64,6 @@ function start_vllm_service_70B() {
    echo "Service started successfully"
 }

-
-function prepare_data() {
-    cd $WORKDIR
-
-    echo "Downloading data..."
-    git clone https://github.com/TAG-Research/TAG-Bench.git
-    cd TAG-Bench/setup
-    chmod +x get_dbs.sh
-    ./get_dbs.sh
-
-    echo "Split data..."
-    cd $WORKPATH/tests/sql_agent_test
-    bash run_data_split.sh
-
-    echo "Data preparation done!"
-}
-
 function download_chinook_data(){
    echo "Downloading chinook data..."
    cd $WORKDIR
@@ -84,16 +71,6 @@ function download_chinook_data(){
    cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite $WORKDIR/GenAIExamples/AgentQnA/tests/
 }

-function start_agent_and_api_server() {
-    echo "Starting CRAG server"
-    docker run -d --runtime=runc --name=kdd-cup-24-crag-service -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
-
-    echo "Starting Agent services"
-    cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
-    bash launch_agent_service_gaudi.sh
-    sleep 2m
-}
-
 function validate() {
    local CONTENT="$1"
    local EXPECTED_RESULT="$2"
@@ -112,8 +89,9 @@ function validate_agent_service() {
    # # test worker rag agent
    echo "======================Testing worker rag agent======================"
    export agent_port="9095"
+    export agent_ip="127.0.0.1"
    prompt="Tell me about Michael Jackson song Thriller"
-    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt")
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ip_addr $agent_ip  --ext_port $agent_port)
    # echo $CONTENT
    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint")
    echo $EXIT_CODE
@@ -127,7 +105,7 @@ function validate_agent_service() {
    echo "======================Testing worker sql agent======================"
    export agent_port="9096"
    prompt="How many employees are there in the company?"
-    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt")
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ip_addr $agent_ip --ext_port $agent_port)
    local EXIT_CODE=$(validate "$CONTENT" "8" "sql-agent-endpoint")
    echo $CONTENT
    # echo $EXIT_CODE
@@ -140,9 +118,8 @@ function validate_agent_service() {
    # test supervisor react agent
    echo "======================Testing supervisor react agent======================"
    export agent_port="9090"
-    prompt="How many albums does Iron Maiden have?"
-    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt")
-    local EXIT_CODE=$(validate "$CONTENT" "21" "react-agent-endpoint")
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --agent_role "supervisor" --ip_addr $agent_ip  --ext_port $agent_port --stream)
+    local EXIT_CODE=$(validate "$CONTENT" "Iron" "react-agent-endpoint")
    # echo $CONTENT
    echo $EXIT_CODE
    local EXIT_CODE="${EXIT_CODE:0-1}"
@@ -153,15 +130,6 @@ function validate_agent_service() {

 }

-function remove_data() {
-    echo "Removing data..."
-    cd $WORKDIR
-    if [ -d "TAG-Bench" ]; then
-        rm -rf TAG-Bench
-    fi
-    echo "Data removed!"
-}
-
 function remove_chinook_data(){
    echo "Removing chinook data..."
    cd $WORKDIR
@@ -171,26 +139,59 @@ function remove_chinook_data(){
    echo "Chinook data removed!"
 }

+function ingest_data_and_validate() {
+    echo "Ingesting data"
+    cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool/
+    echo $PWD
+    local CONTENT=$(bash run_ingest_data.sh)
+    local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-server")
+    echo "$EXIT_CODE"
+    local EXIT_CODE="${EXIT_CODE:0-1}"
+    echo "return value is $EXIT_CODE"
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs dataprep-redis-server
+        return 1
+    fi
+}
+
+function validate_retrieval_tool() {
+    echo "----------------Test retrieval tool ----------------"
+    local CONTENT=$(http_proxy="" curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
+     "text": "Who sang Thriller"
+    }')
+    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "retrieval-tool")
+
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs retrievaltool-xeon-backend-server
+        exit 1
+    fi
+}
+
 function main() {
    echo "==================== Prepare data ===================="
    download_chinook_data
    echo "==================== Data prepare done ===================="

-    echo "==================== Start VLLM service ===================="
-    start_vllm_service_70B
-    echo "==================== VLLM service started ===================="
+    echo "==================== Start all services ===================="
+    start_all_services
+    echo "==================== all services started ===================="

-    echo "==================== Start agent ===================="
-    start_agent_and_api_server
-    echo "==================== Agent started ===================="
+    echo "==================== Ingest data ===================="
+    ingest_data_and_validate
+    echo "==================== Data ingestion completed ===================="
+
+    echo "==================== Validate retrieval tool ===================="
+    validate_retrieval_tool
+    echo "==================== Retrieval tool validated ===================="

    echo "==================== Validate agent service ===================="
    validate_agent_service
    echo "==================== Agent service validated ===================="
 }

-remove_data
+
 remove_chinook_data
+
 main
-remove_data
+
 remove_chinook_data
--- a/AgentQnA/tests/step4_launch_and_validate_agent_openai.sh
+++ b/AgentQnA/tests/step4_launch_and_validate_agent_openai.sh
@@ -11,13 +11,22 @@ echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
 export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/

+
+function download_chinook_data(){
+    echo "Downloading chinook data..."
+    cd $WORKDIR
+    git clone https://github.com/lerocha/chinook-database.git
+    cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite $WORKDIR/GenAIExamples/AgentQnA/tests/
+}
+
 function start_agent_and_api_server() {
    echo "Starting CRAG server"
    docker run -d --runtime=runc --name=kdd-cup-24-crag-service -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0

    echo "Starting Agent services"
-    cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
+    cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon/
    bash launch_agent_service_openai.sh
+    sleep 2m
 }

 function validate() {
@@ -35,19 +44,64 @@ function validate() {
 }

 function validate_agent_service() {
-    echo "----------------Test agent ----------------"
-    local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "query": "Tell me about Michael Jackson song thriller"
-    }')
-    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
-    docker logs react-agent-endpoint
+    # # test worker rag agent
+    echo "======================Testing worker rag agent======================"
+    export agent_port="9095"
+    prompt="Tell me about Michael Jackson song Thriller"
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port)
+    # echo $CONTENT
+    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint")
+    echo $EXIT_CODE
+    local EXIT_CODE="${EXIT_CODE:0-1}"
    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs rag-agent-endpoint
+        exit 1
+    fi
+
+    # # test worker sql agent
+    echo "======================Testing worker sql agent======================"
+    export agent_port="9096"
+    prompt="How many employees are there in the company?"
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port)
+    local EXIT_CODE=$(validate "$CONTENT" "8" "sql-agent-endpoint")
+    echo $CONTENT
+    # echo $EXIT_CODE
+    local EXIT_CODE="${EXIT_CODE:0-1}"
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs sql-agent-endpoint
+        exit 1
+    fi
+
+    # test supervisor react agent
+    echo "======================Testing supervisor react agent======================"
+    export agent_port="9090"
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --agent_role "supervisor" --ext_port $agent_port --stream)
+    local EXIT_CODE=$(validate "$CONTENT" "Iron" "react-agent-endpoint")
+    # echo $CONTENT
+    echo $EXIT_CODE
+    local EXIT_CODE="${EXIT_CODE:0-1}"
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs react-agent-endpoint
        exit 1
    fi

 }

+function remove_chinook_data(){
+    echo "Removing chinook data..."
+    cd $WORKDIR
+    if [ -d "chinook-database" ]; then
+        rm -rf chinook-database
+    fi
+    echo "Chinook data removed!"
+}
+
+
 function main() {
+    echo "==================== Prepare data ===================="
+    download_chinook_data
+    echo "==================== Data prepare done ===================="
+
    echo "==================== Start agent ===================="
    start_agent_and_api_server
    echo "==================== Agent started ===================="
@@ -57,4 +111,9 @@ function main() {
    echo "==================== Agent service validated ===================="
 }

+
+remove_chinook_data
+
 main
+
+remove_chinook_data
--- a/AgentQnA/tests/step4_launch_and_validate_agent_rocm_vllm.sh
+++ b/AgentQnA/tests/step4_launch_and_validate_agent_rocm_vllm.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+
+WORKPATH=$(dirname "$PWD")
+export LOG_PATH=${WORKPATH}
+export WORKDIR=$WORKPATH/../../
+echo "WORKDIR=${WORKDIR}"
+export ip_address=$(hostname -I | awk '{print $1}')
+export host_ip=${ip_address}
+export TOOLSET_PATH=$WORKPATH/tools/
+
+export HF_CACHE_DIR=$WORKPATH/data2/huggingface
+if [ ! -d "$HF_CACHE_DIR" ]; then
+    HF_CACHE_DIR=$WORKDIR/hf_cache
+    mkdir -p "$HF_CACHE_DIR"
+fi
+
+function download_chinook_data(){
+    echo "Downloading chinook data..."
+    cd $WORKDIR
+    git clone https://github.com/lerocha/chinook-database.git
+    cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite ${WORKPATH}/tests/
+}
+
+function start_agent_and_api_server() {
+    echo "Starting Agent services"
+    cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/amd/gpu/rocm
+    bash launch_agent_service_vllm_rocm.sh
+}
+
+function validate() {
+    local CONTENT="$1"
+    local EXPECTED_RESULT="$2"
+    local SERVICE_NAME="$3"
+
+    if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
+        echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT"
+        echo 0
+    else
+        echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
+        echo 1
+    fi
+}
+
+function validate_agent_service() {
+    # # test worker rag agent
+    echo "======================Testing worker rag agent======================"
+    export agent_port=$(cat ${WORKPATH}/docker_compose/amd/gpu/WORKER_RAG_AGENT_PORT_tmp)
+    prompt="Tell me about Michael Jackson song Thriller"
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port)
+    # echo $CONTENT
+    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint")
+    echo $EXIT_CODE
+    local EXIT_CODE="${EXIT_CODE:0-1}"
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs rag-agent-endpoint
+        exit 1
+    fi
+
+     # test worker sql agent
+    echo "======================Testing worker sql agent======================"
+    export agent_port=$(cat ${WORKPATH}/docker_compose/amd/gpu/WORKER_SQL_AGENT_PORT_tmp)
+    prompt="How many employees are there in the company?"
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port)
+    local EXIT_CODE=$(validate "$CONTENT" "8" "sql-agent-endpoint")
+    echo $CONTENT
+    # echo $EXIT_CODE
+    local EXIT_CODE="${EXIT_CODE:0-1}"
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs sql-agent-endpoint
+        exit 1
+    fi
+
+    # test supervisor react agent
+    echo "======================Testing supervisor react agent======================"
+    export agent_port=$(cat ${WORKPATH}/docker_compose/amd/gpu/SUPERVISOR_REACT_AGENT_PORT_tmp)
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --agent_role "supervisor" --ext_port $agent_port --stream)
+    local EXIT_CODE=$(validate "$CONTENT" "Iron" "react-agent-endpoint")
+    # echo $CONTENT
+    echo $EXIT_CODE
+    local EXIT_CODE="${EXIT_CODE:0-1}"
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs react-agent-endpoint
+        exit 1
+    fi
+
+}
+
+function remove_chinook_data(){
+    echo "Removing chinook data..."
+    cd $WORKDIR
+    if [ -d "chinook-database" ]; then
+        rm -rf chinook-database
+    fi
+    echo "Chinook data removed!"
+}
+
+function main() {
+    echo "==================== Prepare data ===================="
+    download_chinook_data
+    echo "==================== Data prepare done ===================="
+
+    echo "==================== Start agent ===================="
+    start_agent_and_api_server
+    echo "==================== Agent started ===================="
+
+    echo "==================== Validate agent service ===================="
+    validate_agent_service
+    echo "==================== Agent service validated ===================="
+}
+
+
+remove_chinook_data
+
+main
+
+remove_chinook_data
--- a/AgentQnA/tests/step4a_launch_and_validate_agent_tgi_on_rocm.sh
+++ b/AgentQnA/tests/step4a_launch_and_validate_agent_tgi_on_rocm.sh
@@ -2,26 +2,30 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-set -ex
+set -e

 WORKPATH=$(dirname "$PWD")
+export LOG_PATH=${WORKPATH}
 export WORKDIR=$WORKPATH/../../
 echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
-export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export host_ip=${ip_address}
+export TOOLSET_PATH=$WORKPATH/tools/

-export HF_CACHE_DIR=$WORKDIR/hf_cache
+export HF_CACHE_DIR=$WORKPATH/data2/huggingface
 if [ ! -d "$HF_CACHE_DIR" ]; then
+    HF_CACHE_DIR=$WORKDIR/hf_cache
    mkdir -p "$HF_CACHE_DIR"
 fi
-ls $HF_CACHE_DIR

+function download_chinook_data(){
+    echo "Downloading chinook data..."
+    cd $WORKDIR
+    git clone https://github.com/lerocha/chinook-database.git
+    cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite ${WORKPATH}/tests/
+}

 function start_agent_and_api_server() {
-    echo "Starting CRAG server"
-    docker run -d --runtime=runc --name=kdd-cup-24-crag-service -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
-
    echo "Starting Agent services"
    cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/amd/gpu/rocm
    bash launch_agent_service_tgi_rocm.sh
@@ -42,28 +46,63 @@ function validate() {
 }

 function validate_agent_service() {
-    echo "----------------Test agent ----------------"
-    local CONTENT=$(http_proxy="" curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "query": "Tell me about Michael Jackson song thriller"
-    }')
-    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
-    docker logs rag-agent-endpoint
+    # # test worker rag agent
+    echo "======================Testing worker rag agent======================"
+    export agent_port=$(cat ${WORKPATH}/docker_compose/amd/gpu/WORKER_RAG_AGENT_PORT_tmp)
+    prompt="Tell me about Michael Jackson song Thriller"
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port)
+    # echo $CONTENT
+    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint")
+    echo $EXIT_CODE
+    local EXIT_CODE="${EXIT_CODE:0-1}"
    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs rag-agent-endpoint
        exit 1
    fi

-    local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "query": "Tell me about Michael Jackson song thriller"
-    }')
-    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
-    docker logs react-agent-endpoint
+     # test worker sql agent
+    echo "======================Testing worker sql agent======================"
+    export agent_port=$(cat ${WORKPATH}/docker_compose/amd/gpu/WORKER_SQL_AGENT_PORT_tmp)
+    prompt="How many employees are there in the company?"
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port)
+    local EXIT_CODE=$(validate "$CONTENT" "8" "sql-agent-endpoint")
+    echo $CONTENT
+    # echo $EXIT_CODE
+    local EXIT_CODE="${EXIT_CODE:0-1}"
    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs sql-agent-endpoint
+        exit 1
+    fi
+
+    # test supervisor react agent
+    echo "======================Testing supervisor react agent======================"
+    export agent_port=$(cat ${WORKPATH}/docker_compose/amd/gpu/SUPERVISOR_REACT_AGENT_PORT_tmp)
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --agent_role "supervisor" --ext_port $agent_port --stream)
+    local EXIT_CODE=$(validate "$CONTENT" "Iron" "react-agent-endpoint")
+    # echo $CONTENT
+    echo $EXIT_CODE
+    local EXIT_CODE="${EXIT_CODE:0-1}"
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs react-agent-endpoint
        exit 1
    fi

 }

+function remove_chinook_data(){
+    echo "Removing chinook data..."
+    cd $WORKDIR
+    if [ -d "chinook-database" ]; then
+        rm -rf chinook-database
+    fi
+    echo "Chinook data removed!"
+}
+
 function main() {
+    echo "==================== Prepare data ===================="
+    download_chinook_data
+    echo "==================== Data prepare done ===================="
+
    echo "==================== Start agent ===================="
    start_agent_and_api_server
    echo "==================== Agent started ===================="
@@ -73,4 +112,9 @@ function main() {
    echo "==================== Agent service validated ===================="
 }

+
+remove_chinook_data
+
 main
+
+remove_chinook_data
--- a/AgentQnA/tests/test.py
+++ b/AgentQnA/tests/test.py
@@ -1,34 +1,20 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

 import argparse
-import os
+import json
+import uuid

 import requests


-def generate_answer_agent_api(url, prompt):
-    proxies = {"http": ""}
-    payload = {
-        "messages": prompt,
-    }
-    response = requests.post(url, json=payload, proxies=proxies)
-    answer = response.json()["text"]
-    return answer
-
-
 def process_request(url, query, is_stream=False):
    proxies = {"http": ""}
-
-    payload = {
-        "messages": query,
-    }
-
+    content = json.dumps(query) if query is not None else None
    try:
-        resp = requests.post(url=url, json=payload, proxies=proxies, stream=is_stream)
+        resp = requests.post(url=url, data=content, proxies=proxies, stream=is_stream)
        if not is_stream:
            ret = resp.json()["text"]
-            print(ret)
        else:
            for line in resp.iter_lines(decode_unicode=True):
                print(line)
@@ -38,19 +24,54 @@ def process_request(url, query, is_stream=False):
        return ret
    except requests.exceptions.RequestException as e:
        ret = f"An error occurred:{e}"
-        print(ret)
-        return False
+        return None
+
+
+def test_worker_agent(args):
+    url = f"http://{args.ip_addr}:{args.ext_port}/v1/chat/completions"
+    query = {"role": "user", "messages": args.prompt, "stream": "false"}
+    ret = process_request(url, query)
+    print("Response: ", ret)
+
+
+def add_message_and_run(url, user_message, thread_id, stream=False):
+    print("User message: ", user_message)
+    query = {"role": "user", "messages": user_message, "thread_id": thread_id, "stream": stream}
+    ret = process_request(url, query, is_stream=stream)
+    print("Response: ", ret)
+
+
+def test_chat_completion_multi_turn(args):
+    url = f"http://{args.ip_addr}:{args.ext_port}/v1/chat/completions"
+    thread_id = f"{uuid.uuid4()}"
+
+    # first turn
+    print("===============First turn==================")
+    user_message = "Which artist has the most albums in the database?"
+    add_message_and_run(url, user_message, thread_id, stream=args.stream)
+    print("===============End of first turn==================")
+
+    # second turn
+    print("===============Second turn==================")
+    user_message = "Give me a few examples of the artist's albums?"
+    add_message_and_run(url, user_message, thread_id, stream=args.stream)
+    print("===============End of second turn==================")


 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
-    parser.add_argument("--prompt", type=str)
-    parser.add_argument("--stream", action="store_true")
-    args = parser.parse_args()
+    parser.add_argument("--ip_addr", type=str, default="127.0.0.1", help="endpoint ip address")
+    parser.add_argument("--ext_port", type=str, default="9090", help="endpoint port")
+    parser.add_argument("--stream", action="store_true", help="streaming mode")
+    parser.add_argument("--prompt", type=str, help="prompt message")
+    parser.add_argument("--agent_role", type=str, default="supervisor", help="supervisor or worker")
+    args, _ = parser.parse_known_args()

-    ip_address = os.getenv("ip_address", "localhost")
-    agent_port = os.getenv("agent_port", "9090")
-    url = f"http://{ip_address}:{agent_port}/v1/chat/completions"
-    prompt = args.prompt
+    print(args)

-    process_request(url, prompt, args.stream)
+    if args.agent_role == "supervisor":
+        test_chat_completion_multi_turn(args)
+    elif args.agent_role == "worker":
+        test_worker_agent(args)
+    else:
+        raise ValueError("Invalid agent role")
--- a/AgentQnA/tests/test_compose_on_gaudi.sh
+++ b/AgentQnA/tests/test_compose_on_gaudi.sh
@@ -1,7 +1,6 @@
 #!/bin/bash
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
-
 set -xe

 WORKPATH=$(dirname "$PWD")
@@ -10,6 +9,15 @@ echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
 export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
+export no_proxy="$no_proxy,rag-agent-endpoint,sql-agent-endpoint,react-agent-endpoint,agent-ui,vllm-gaudi-server,jaeger,grafana,prometheus,127.0.0.1,localhost,0.0.0.0,$ip_address"
+IMAGE_REPO=${IMAGE_REPO:-"opea"}
+IMAGE_TAG=${IMAGE_TAG:-"latest"}
+echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
+echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
+export REGISTRY=${IMAGE_REPO}
+export TAG=${IMAGE_TAG}
+export MODEL_CACHE=${model_cache:-"./data"}
+

 function stop_crag() {
    cid=$(docker ps -aq --filter "name=kdd-cup-24-crag-service")
@@ -17,7 +25,7 @@ function stop_crag() {
    if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
 }

-function stop_agent_docker() {
+function stop_agent_containers() {
    cd $WORKPATH/docker_compose/intel/hpu/gaudi/
    container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
    for container_name in $container_list; do
@@ -27,6 +35,18 @@ function stop_agent_docker() {
    done
 }

+function stop_telemetry_containers(){
+    cd $WORKPATH/docker_compose/intel/hpu/gaudi/
+    container_list=$(cat compose.telemetry.yaml | grep container_name | cut -d':' -f2)
+    for container_name in $container_list; do
+        cid=$(docker ps -aq --filter "name=$container_name")
+        echo "Stopping container $container_name"
+        if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
+    done
+    container_list=$(cat compose.telemetry.yaml | grep container_name | cut -d':' -f2)
+
+}
+
 function stop_llm(){
    cd $WORKPATH/docker_compose/intel/hpu/gaudi/
    container_list=$(cat tgi_gaudi.yaml | grep container_name | cut -d':' -f2)
@@ -58,37 +78,31 @@ function stop_retrieval_tool() {
    done
 }
 echo "workpath: $WORKPATH"
-echo "=================== Stop containers ===================="
-stop_crag
+echo "::group::=================== Stop containers ===================="
 stop_llm
-stop_agent_docker
+stop_crag
+stop_agent_containers
 stop_retrieval_tool
+stop_telemetry_containers
+echo "::endgroup::"

 cd $WORKPATH/tests

-echo "=================== #1 Building docker images===================="
-bash step1_build_images.sh
-echo "=================== #1 Building docker images completed===================="
+echo "::group::=================== Building docker images===================="
+bash step1_build_images.sh gaudi_vllm > docker_image_build.log
+echo "::endgroup::"

-echo "=================== #2 Start retrieval tool===================="
-bash step2_start_retrieval_tool.sh
-echo "=================== #2 Retrieval tool started===================="
+echo "::group::=================== Start agent, API server, retrieval, and ingest data===================="
+bash step4_launch_and_validate_agent_gaudi.sh
+echo "::endgroup::"

-echo "=================== #3 Ingest data and validate retrieval===================="
-bash step3_ingest_data_and_validate_retrieval.sh
-echo "=================== #3 Data ingestion and validation completed===================="
-
-echo "=================== #4 Start agent and API server===================="
-bash step4_launch_and_validate_agent_tgi.sh
-echo "=================== #4 Agent test passed ===================="
-
-echo "=================== #5 Stop agent and API server===================="
-stop_crag
-stop_agent_docker
-stop_retrieval_tool
+echo "::group::=================== Stop agent and API server===================="
 stop_llm
-echo "=================== #5 Agent and API server stopped===================="
-
+stop_crag
+stop_agent_containers
+stop_retrieval_tool
+stop_telemetry_containers
 echo y | docker system prune
+echo "::endgroup::"

 echo "ALL DONE!!"
--- a/AgentQnA/tests/test_compose_on_rocm.sh
+++ b/AgentQnA/tests/test_compose_on_rocm.sh
@@ -5,11 +5,19 @@
 set -xe

 WORKPATH=$(dirname "$PWD")
+ls $WORKPATH
 export WORKDIR=$WORKPATH/../../
 echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
 export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
+export TOOLSET_PATH=$WORKPATH/tools/
+IMAGE_REPO=${IMAGE_REPO:-"opea"}
+IMAGE_TAG=${IMAGE_TAG:-"latest"}
+echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
+echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
+export REGISTRY=${IMAGE_REPO}
+export TAG=${IMAGE_TAG}
+export MODEL_CACHE=${model_cache:-"./data"}

 function stop_crag() {
    cid=$(docker ps -aq --filter "name=kdd-cup-24-crag-service")
@@ -19,13 +27,7 @@ function stop_crag() {

 function stop_agent_docker() {
    cd $WORKPATH/docker_compose/amd/gpu/rocm
-    # docker compose -f compose.yaml down
-    container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
-    for container_name in $container_list; do
-        cid=$(docker ps -aq --filter "name=$container_name")
-        echo "Stopping container $container_name"
-        if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
-    done
+    bash stop_agent_service_tgi_rocm.sh
 }

 function stop_retrieval_tool() {
@@ -41,34 +43,35 @@ function stop_retrieval_tool() {
    done
 }
 echo "workpath: $WORKPATH"
-echo "=================== Stop containers ===================="
+echo "::group::=================== Stop containers ===================="
 stop_crag
 stop_agent_docker
 stop_retrieval_tool
+echo "::endgroup::=================== Stop containers completed ===================="

 cd $WORKPATH/tests

-echo "=================== #1 Building docker images===================="
-bash step1_build_images.sh
-echo "=================== #1 Building docker images completed===================="
+echo "::group::=================== #1 Building docker images===================="
+bash step1_build_images.sh rocm > docker_image_build.log
+echo "::endgroup::=================== #1 Building docker images completed===================="

-echo "=================== #2 Start retrieval tool===================="
+echo "::group::=================== #2 Start retrieval tool===================="
 bash step2_start_retrieval_tool.sh
-echo "=================== #2 Retrieval tool started===================="
+echo "::endgroup::=================== #2 Retrieval tool started===================="

-echo "=================== #3 Ingest data and validate retrieval===================="
+echo "::group::=================== #3 Ingest data and validate retrieval===================="
 bash step3_ingest_data_and_validate_retrieval.sh
-echo "=================== #3 Data ingestion and validation completed===================="
+echo "::endgroup::=================== #3 Data ingestion and validation completed===================="

-echo "=================== #4 Start agent and API server===================="
+echo "::group::=================== #4 Start agent and API server===================="
 bash step4a_launch_and_validate_agent_tgi_on_rocm.sh
-echo "=================== #4 Agent test passed ===================="
+echo "::endgroup::=================== #4 Agent test passed ===================="

-echo "=================== #5 Stop agent and API server===================="
+echo "::group::=================== #5 Stop agent and API server===================="
 stop_crag
 stop_agent_docker
 stop_retrieval_tool
-echo "=================== #5 Agent and API server stopped===================="
+echo "::endgroup::=================== #5 Agent and API server stopped===================="

 echo y | docker system prune

--- a/AgentQnA/tests/test_compose_vllm_on_rocm.sh
+++ b/AgentQnA/tests/test_compose_vllm_on_rocm.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+# Copyright (C) 2024 Advanced Micro Devices, Inc.
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+
+WORKPATH=$(dirname "$PWD")
+export WORKDIR=${WORKPATH}/../../
+echo "WORKDIR=${WORKDIR}"
+export ip_address=$(hostname -I | awk '{print $1}')
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export TOOLSET_PATH=$WORKPATH/tools/
+IMAGE_REPO=${IMAGE_REPO:-"opea"}
+IMAGE_TAG=${IMAGE_TAG:-"latest"}
+echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
+echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
+export REGISTRY=${IMAGE_REPO}
+export TAG=${IMAGE_TAG}
+export MODEL_CACHE=${model_cache:-"./data"}
+
+function stop_crag() {
+    cid=$(docker ps -aq --filter "name=kdd-cup-24-crag-service")
+    echo "Stopping container kdd-cup-24-crag-service with cid $cid"
+    if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
+}
+
+function stop_agent_docker() {
+    cd $WORKPATH/docker_compose/amd/gpu/rocm
+    bash stop_agent_service_vllm_rocm.sh
+}
+
+function stop_retrieval_tool() {
+    echo "Stopping Retrieval tool"
+    local RETRIEVAL_TOOL_PATH=$WORKDIR/GenAIExamples/DocIndexRetriever
+    cd $RETRIEVAL_TOOL_PATH/docker_compose/intel/cpu/xeon/
+    docker compose -f compose.yaml down
+}
+
+echo "workpath: $WORKPATH"
+echo "::group::=================== Stop containers ===================="
+stop_crag
+stop_agent_docker
+stop_retrieval_tool
+echo "::endgroup::"
+
+cd $WORKPATH/tests
+
+echo "::group::=================== #1 Building docker images===================="
+bash step1_build_images.sh rocm_vllm > docker_image_build.log
+echo "::endgroup::=================== #1 Building docker images completed===================="
+
+echo "::group::=================== #2 Start retrieval tool===================="
+bash step2_start_retrieval_tool_rocm_vllm.sh
+echo "::endgroup::=================== #2 Retrieval tool started===================="
+
+echo "::group::=================== #3 Ingest data and validate retrieval===================="
+bash step3_ingest_data_and_validate_retrieval_rocm_vllm.sh
+echo "::endgroup::=================== #3 Data ingestion and validation completed===================="
+
+echo "::group::=================== #4 Start agent and API server===================="
+bash step4_launch_and_validate_agent_rocm_vllm.sh
+echo "::endgroup::=================== #4 Agent test passed ===================="
+
+echo "::group::=================== #5 Stop agent and API server===================="
+stop_crag
+stop_agent_docker
+stop_retrieval_tool
+echo "::endgroup::=================== #5 Agent and API server stopped===================="
+
+echo y | docker system prune
+
+echo "ALL DONE!!"
--- a/AgentQnA/tools/supervisor_agent_webtools.yaml
+++ b/AgentQnA/tools/supervisor_agent_webtools.yaml
@@ -0,0 +1,77 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+search_web_base:
+  description: Search a web base for a given query. Returns text related to the query.
+  callable_api: tools.py:search_web_base
+  args_schema:
+    query:
+      type: str
+      description: query
+  return_output: retrieved_data
+
+search_knowledge_base:
+  description: Search a knowledge base for a given query. Returns text related to the query.
+  callable_api: tools.py:search_knowledge_base
+  args_schema:
+    query:
+      type: str
+      description: query
+  return_output: retrieved_data
+
+search_artist_database:
+  description: Search a SQL database on artists and their music with a natural language query. Returns text related to the query.
+  callable_api: tools.py:search_sql_database
+  args_schema:
+    query:
+      type: str
+      description: natural language query
+  return_output: retrieved_data
+
+get_artist_birth_place:
+  description: Get the birth place of an artist.
+  callable_api: tools.py:get_artist_birth_place
+  args_schema:
+    artist_name:
+      type: str
+      description: artist name
+  return_output: birth_place
+
+get_billboard_rank_date:
+  description: Get Billboard ranking for a specific rank and date.
+  callable_api: tools.py:get_billboard_rank_date
+  args_schema:
+    rank:
+      type: int
+      description: the rank of interest, for example 1 for top 1
+    date:
+      type: str
+      description: date
+  return_output: billboard_info
+
+get_song_release_date:
+  description: Get the release date of a song.
+  callable_api: tools.py:get_song_release_date
+  args_schema:
+    song_name:
+      type: str
+      description: song name
+  return_output: release_date
+
+get_members:
+  description: Get the member list of a band.
+  callable_api: tools.py:get_members
+  args_schema:
+    band_name:
+      type: str
+      description: band name
+  return_output: members
+
+get_grammy_best_artist_by_year:
+  description: Get the Grammy Best New Artist for a specific year.
+  callable_api: tools.py:get_grammy_best_artist_by_year
+  args_schema:
+    year:
+      type: int
+      description: year
+  return_output: grammy_best_new_artist
--- a/AgentQnA/tools/tools.py
+++ b/AgentQnA/tools/tools.py
@@ -4,9 +4,30 @@
 import os

 import requests
+from comps.cores.telemetry.opea_telemetry import opea_telemetry, tracer
 from tools.pycragapi import CRAG


+@opea_telemetry
+def search_web_base(query: str) -> str:
+    import os
+
+    from langchain_core.tools import Tool
+    from langchain_google_community import GoogleSearchAPIWrapper
+
+    search = GoogleSearchAPIWrapper()
+
+    tool = Tool(
+        name="google_search",
+        description="Search Google for recent results.",
+        func=search.run,
+    )
+
+    response = tool.run(query)
+    return response
+
+
+@opea_telemetry
 def search_knowledge_base(query: str) -> str:
    """Search a knowledge base about music and singers for a given query.

@@ -22,6 +43,7 @@ def search_knowledge_base(query: str) -> str:
    return response.json()["text"]


+@opea_telemetry
 def search_sql_database(query: str) -> str:
    """Search a SQL database on artists and their music with a natural language query.

@@ -37,6 +59,7 @@ def search_sql_database(query: str) -> str:
    return response.json()["text"]


+@opea_telemetry
 def get_grammy_best_artist_by_year(year: int) -> dict:
    """Get the Grammy Best New Artist for a specific year."""
    api = CRAG()
@@ -44,18 +67,21 @@ def get_grammy_best_artist_by_year(year: int) -> dict:
    return api.music_grammy_get_best_artist_by_year(year)


+@opea_telemetry
 def get_members(band_name: str) -> dict:
    """Get the member list of a band."""
    api = CRAG()
    return api.music_get_members(band_name)


+@opea_telemetry
 def get_artist_birth_place(artist_name: str) -> dict:
    """Get the birthplace of an artist."""
    api = CRAG()
    return api.music_get_artist_birth_place(artist_name)


+@opea_telemetry
 def get_billboard_rank_date(rank: int, date: str = None) -> dict:
    """Get Billboard ranking for a specific rank and date."""
    api = CRAG()
@@ -63,6 +89,7 @@ def get_billboard_rank_date(rank: int, date: str = None) -> dict:
    return api.music_get_billboard_rank_date(rank, date)


+@opea_telemetry
 def get_song_release_date(song_name: str) -> dict:
    """Get the release date of a song."""
    api = CRAG()
--- a/AgentQnA/tools/worker_agent_tools.py
+++ b/AgentQnA/tools/worker_agent_tools.py
@@ -12,7 +12,7 @@ def search_knowledge_base(query: str) -> str:
    print(url)
    proxies = {"http": ""}
    payload = {
-        "text": query,
+        "messages": query,
    }
    response = requests.post(url, json=payload, proxies=proxies)
    print(response)
--- a/AgentQnA/ui/docker/Dockerfile
+++ b/AgentQnA/ui/docker/Dockerfile
@@ -1,26 +1,203 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-# Use node 20.11.1 as the base image
-FROM node:20.11.1
+# syntax=docker/dockerfile:1
+# Initialize device type args
+# use build args in the docker build command with --build-arg="BUILDARG=true"
+ARG USE_CUDA=false
+ARG USE_OLLAMA=false
+# Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default)
+ARG USE_CUDA_VER=cu121
+# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
+# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard
+# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
+# IMPORTANT: If you change the embedding model (sentence-transformers/all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
+ARG USE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
+ARG USE_RERANKING_MODEL=""

-# Update package manager and install Git
-RUN apt-get update -y && apt-get install -y git
+# Tiktoken encoding name; models to use can be found at https://huggingface.co/models?library=tiktoken
+ARG USE_TIKTOKEN_ENCODING_NAME="cl100k_base"

-# Copy the front-end code repository
-COPY svelte /home/user/svelte
+ARG BUILD_HASH=dev-build
+# Override at your own risk - non-root configurations are untested
+ARG UID=0
+ARG GID=0

-# Set the working directory
-WORKDIR /home/user/svelte
+######## WebUI frontend ########
+FROM --platform=$BUILDPLATFORM node:22-alpine3.20 AS build
+ARG BUILD_HASH

-# Install front-end dependencies
-RUN npm install
+WORKDIR /app

-# Build the front-end application
+COPY open_webui_patches /app/patches
+ARG WEBUI_VERSION=v0.5.20
+RUN apk add --no-cache git
+
+# Clone code and use patch
+RUN git config --global user.name "opea" && \
+    git config --global user.email "" && \
+    git clone https://github.com/open-webui/open-webui.git
+
+WORKDIR /app/open-webui
+
+RUN git checkout ${WEBUI_VERSION} && git am /app/patches/*.patch
+
+WORKDIR /app
+
+RUN mv open-webui/* . && rm -fr open-webui && ls -lrth /app/backend/
+
+RUN npm install onnxruntime-node --onnxruntime-node-install-cuda=skip
+RUN apk update && \
+    apk add --no-cache wget && \
+    wget https://github.com/microsoft/onnxruntime/releases/download/v1.20.1/onnxruntime-linux-x64-gpu-1.20.1.tgz
+
+ENV APP_BUILD_HASH=${BUILD_HASH}
 RUN npm run build

-# Expose the port of the front-end application
-EXPOSE 5173
+######## WebUI backend ########
+FROM python:3.11-slim-bookworm AS base

-# Run the front-end application in preview mode
-CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
+# Use args
+ARG USE_CUDA
+ARG USE_OLLAMA
+ARG USE_CUDA_VER
+ARG USE_EMBEDDING_MODEL
+ARG USE_RERANKING_MODEL
+ARG UID
+ARG GID
+
+## Basis ##
+ENV ENV=prod \
+    PORT=8080 \
+    # pass build args to the build
+    USE_OLLAMA_DOCKER=${USE_OLLAMA} \
+    USE_CUDA_DOCKER=${USE_CUDA} \
+    USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \
+    USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL} \
+    USE_RERANKING_MODEL_DOCKER=${USE_RERANKING_MODEL}
+
+## Basis URL Config ##
+ENV OLLAMA_BASE_URL="/ollama" \
+    OPENAI_API_BASE_URL=""
+
+## API Key and Security Config ##
+ENV OPENAI_API_KEY="" \
+    WEBUI_SECRET_KEY="" \
+    SCARF_NO_ANALYTICS=true \
+    DO_NOT_TRACK=true \
+    ANONYMIZED_TELEMETRY=false
+
+#### Other models #########################################################
+## whisper TTS model settings ##
+ENV WHISPER_MODEL="base" \
+    WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
+
+## RAG Embedding model settings ##
+ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \
+    RAG_RERANKING_MODEL="$USE_RERANKING_MODEL_DOCKER" \
+    SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models"
+
+## Tiktoken model settings ##
+ENV TIKTOKEN_ENCODING_NAME="cl100k_base" \
+    TIKTOKEN_CACHE_DIR="/app/backend/data/cache/tiktoken"
+
+## Hugging Face download cache ##
+ENV HF_HOME="/app/backend/data/cache/embedding/models"
+
+## Torch Extensions ##
+# ENV TORCH_EXTENSIONS_DIR="/.cache/torch_extensions"
+
+#### Other models ##########################################################
+
+COPY --from=build /app/backend /app/backend
+
+WORKDIR /app/backend
+
+
+ENV HOME=/root
+# Create user and group if not root
+RUN if [ $UID -ne 0 ]; then \
+    if [ $GID -ne 0 ]; then \
+    addgroup --gid $GID app; \
+    fi; \
+    adduser --uid $UID --gid $GID --home $HOME --disabled-password --no-create-home app; \
+    fi
+
+RUN mkdir -p $HOME/.cache/chroma
+RUN printf 00000000-0000-0000-0000-000000000000 > $HOME/.cache/chroma/telemetry_user_id
+
+# Make sure the user has access to the app and root directory
+RUN chown -R $UID:$GID /app $HOME
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN if [ "$USE_OLLAMA" = "true" ]; then \
+    apt-get update && \
+    # Install pandoc and netcat
+    apt-get install -y --no-install-recommends git build-essential pandoc netcat-openbsd curl && \
+    apt-get install -y --no-install-recommends gcc python3-dev && \
+    # for RAG OCR
+    apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
+    # install helper tools
+    apt-get install -y --no-install-recommends curl jq && \
+    # install ollama
+    curl -fsSL https://ollama.com/install.sh | sh && \
+    # cleanup
+    rm -rf /var/lib/apt/lists/*; \
+    else \
+    apt-get update && \
+    # Install pandoc, netcat and gcc
+    apt-get install -y --no-install-recommends git build-essential pandoc gcc netcat-openbsd curl jq && \
+    apt-get install -y --no-install-recommends gcc python3-dev && \
+    # for RAG OCR
+    apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
+    # cleanup
+    rm -rf /var/lib/apt/lists/*; \
+    fi
+
+# install python dependencies
+# COPY --chown=$UID:$GID ./backend/requirements.txt ./requirements.txt
+# RUN cp /app/backend/requirements.txt ./requirements.txt
+
+RUN pip3 install --no-cache-dir uv && \
+    if [ "$USE_CUDA" = "true" ]; then \
+    # If you use CUDA the whisper and embedding model will be downloaded on first use
+    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
+    uv pip install --system -r requirements.txt --no-cache-dir && \
+    python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \
+    python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \
+    python -c "import os; import tiktoken; tiktoken.get_encoding(os.environ['TIKTOKEN_ENCODING_NAME'])"; \
+    else \
+    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
+    uv pip install --system -r requirements.txt --no-cache-dir && \
+    python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \
+    python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \
+    python -c "import os; import tiktoken; tiktoken.get_encoding(os.environ['TIKTOKEN_ENCODING_NAME'])"; \
+    fi; \
+    chown -R $UID:$GID /app/backend/data/
+
+
+
+# copy embedding weight from build
+# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
+# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
+
+# copy built frontend files
+COPY --chown=$UID:$GID --from=build /app/build /app/build
+COPY --chown=$UID:$GID --from=build /app/CHANGELOG.md /app/CHANGELOG.md
+COPY --chown=$UID:$GID --from=build /app/package.json /app/package.json
+
+# copy backend files
+# COPY --chown=$UID:$GID ./backend .
+
+EXPOSE 8080
+
+HEALTHCHECK CMD curl --silent --fail http://localhost:${PORT:-8080}/health | jq -ne 'input.status == true' || exit 1
+
+USER $UID:$GID
+
+ARG BUILD_HASH
+ENV WEBUI_BUILD_VERSION=${BUILD_HASH}
+ENV DOCKER=true
+
+CMD [ "bash", "start.sh"]
--- a/AgentQnA/ui/open_webui_patches/0001-compatible-opea-agent-tool-content.patch
+++ b/AgentQnA/ui/open_webui_patches/0001-compatible-opea-agent-tool-content.patch
@@ -0,0 +1,86 @@
+From d90ba418f866bc11848d7d6507aabc6b5e8cc3e2 Mon Sep 17 00:00:00 2001
+From: lkk12014402 <kaokao.lv@intel.com>
+Date: Mon, 7 Apr 2025 07:22:53 +0000
+Subject: [PATCH] compatible opea agent tool content
+
+---
+ backend/open_webui/utils/middleware.py | 56 ++++++++++++++++++++++++++
+ 1 file changed, 56 insertions(+)
+
+diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py
+index 289d887df..fddbe8ee1 100644
+--- a/backend/open_webui/utils/middleware.py
+++ b/backend/open_webui/utils/middleware.py
+@@ -1465,6 +1465,8 @@ async def process_chat_response(
+                 async def stream_body_handler(response):
+                     nonlocal content
+                     nonlocal content_blocks
+                    nonlocal events
+                    sources = []
+ 
+                     response_tool_calls = []
+ 
+@@ -1486,6 +1488,60 @@ async def process_chat_response(
+                         try:
+                             data = json.loads(data)
+ 
+                            tool_content_block = []
+                            if data.get("tool_name"):
+                                sources.append(
+                                    {
+                                        "source": {
+                                            "name": f"TOOL:{data.get('tool_name')}"},
+                                        "document": [data.get("tool_content")],
+                                        "metadata": [{
+                                            "source": f"TOOL:{data.get('tool_name')}"}],
+                                    }
+                                )
+                                events.append({"sources": sources})
+
+                                await event_emitter(
+                                    {
+                                        "type": "chat:completion",
+                                        "data": {"sources": sources},
+                                    }
+                                )
+                                tool_content_block = [
+                                    {
+                                        "type": "tool_calls",
+                                        "content": [
+                                            {"id": data.get('tool_name'), "function": {"name": data.get('tool_name')}}
+                                        ]
+                                    }
+                                ]
+
+                                await event_emitter(
+                                    {
+                                        "type": "chat:completion",
+                                        "data": {
+                                            "content": serialize_content_blocks(tool_content_block),
+                                        },
+                                    }
+                                )
+
+                                tool_content_block = [
+                                        {
+                                            "type": "tool_calls",
+                                            "content": [
+                                                {"id": data.get('tool_name'), "function": {"name": data.get('tool_name')}}
+                                            ],
+                                            "results": [
+                                                {"tool_call_id": data.get('tool_name'), "content": data.get("tool_content")}
+                                            ]
+                                        },
+                                        {
+                                            "type": "text",
+                                            "content": "",
+                                        }
+                                ]
+                                content_blocks.extend(tool_content_block)
+
+                             data, _ = await process_filter_functions(
+                                 request=request,
+                                 filter_functions=filter_functions,
+-- 
+2.34.1
+
--- a/AgentQnA/ui/open_webui_patches/0002-update-agent-icloud-upload-feature.patch
+++ b/AgentQnA/ui/open_webui_patches/0002-update-agent-icloud-upload-feature.patch
@@ -0,0 +1,531 @@
+From 8ad31e50644eab3c9e698d7828b1857919887841 Mon Sep 17 00:00:00 2001
+From: lkk12014402 <kaokao.lv@intel.com>
+Date: Tue, 8 Apr 2025 03:38:09 +0000
+Subject: [PATCH 2/2] update agent icloud upload feature
+
+---
+ src/lib/apis/knowledge/index.ts               |  60 +++++++
+ .../admin/Settings/Connections.svelte         |  50 +++++-
+ .../components/icons/UploadCloudIcon.svelte   |  18 ++
+ src/lib/components/workspace/Knowledge.svelte |  57 +++++-
+ .../KnowledgeBase/AddIcloudContentMenu.svelte | 164 ++++++++++++++++++
+ .../KnowledgeBase/IcloudFiles.svelte          |  37 ++++
+ src/lib/i18n/locales/zh-CN/translation.json   |  15 +-
+ 7 files changed, 396 insertions(+), 5 deletions(-)
+ create mode 100644 src/lib/components/icons/UploadCloudIcon.svelte
+ create mode 100644 src/lib/components/workspace/Knowledge/KnowledgeBase/AddIcloudContentMenu.svelte
+ create mode 100644 src/lib/components/workspace/Knowledge/KnowledgeBase/IcloudFiles.svelte
+
+diff --git a/src/lib/apis/knowledge/index.ts b/src/lib/apis/knowledge/index.ts
+index c5fad1323..32be528a7 100644
+--- a/src/lib/apis/knowledge/index.ts
+++ b/src/lib/apis/knowledge/index.ts
+@@ -345,3 +345,63 @@ export const deleteKnowledgeById = async (token: string, id: string) => {
+ 
+ 	return res;
+ };
+
+export const getIcloudFiles = async (ICLOUD_BASE_URLS: string) => {
+	let error = null;
+
+	const res = await fetch(`${ICLOUD_BASE_URLS}/dataprep/get`, {
+		method: 'POST',
+		headers: {
+			Accept: 'application/json',
+			'Content-Type': 'application/json',
+		}
+	})
+		.then(async (res) => {
+			if (!res.ok) throw await res.json();
+			return res.json();
+		})
+		.then((json) => {
+			return json;
+		})
+		.catch((err) => {
+			error = err.detail;
+
+			console.log(err);
+			return null;
+		});
+
+	if (error) {
+		throw error;
+	}
+
+	return res;
+};
+
+export const updateIcloudFiles = async (ICLOUD_BASE_URLS: string, formData: any) => {
+	let error = null;
+
+	const res = await fetch(`${ICLOUD_BASE_URLS}/dataprep/ingest`, {
+		method: 'POST',
+		body: formData
+	})
+		.then(async (res) => {
+			if (!res.ok) throw await res.json();
+			return res.json();
+		})
+		.then((json) => {
+			return json;
+		})
+		.catch((err) => {
+			error = err.detail;
+
+			console.log(err);
+			return null;
+		});
+
+	if (error) {
+		throw error;
+	}
+
+	return res;
+};
+
+diff --git a/src/lib/components/admin/Settings/Connections.svelte b/src/lib/components/admin/Settings/Connections.svelte
+index 2fcfadaec..3237744d5 100644
+--- a/src/lib/components/admin/Settings/Connections.svelte
+++ b/src/lib/components/admin/Settings/Connections.svelte
+@@ -47,6 +47,9 @@
+ 	let showAddOpenAIConnectionModal = false;
+ 	let showAddOllamaConnectionModal = false;
+ 
+	let ENABLE_ICLOUD_API: null | boolean = (localStorage.getItem('ENABLE_ICLOUD_API') === "enable");
+	let ICLOUD_BASE_URL = localStorage.getItem('ICLOUD_BASE_URL') || '';
+
+ 	const updateOpenAIHandler = async () => {
+ 		if (ENABLE_OPENAI_API !== null) {
+ 			// Remove trailing slashes
+@@ -193,10 +196,22 @@
+ 		}
+ 	});
+ 
+	const updateIcloudHandler = async () => {
+		if (ENABLE_ICLOUD_API) {
+			localStorage.setItem('ICLOUD_BASE_URL', ICLOUD_BASE_URL);
+			localStorage.setItem('ENABLE_ICLOUD_API', "enable");
+		} else {
+			localStorage.setItem('ICLOUD_BASE_URL', '');
+			localStorage.setItem('ENABLE_ICLOUD_API', "");
+		}
+		toast.success($i18n.t('Icloud API settings updated'));
+	};
+
+ 	const submitHandler = async () => {
+ 		updateOpenAIHandler();
+ 		updateOllamaHandler();
+ 		updateDirectConnectionsHandler();
+		updateIcloudHandler();
+ 
+ 		dispatch('save');
+ 	};
+@@ -301,7 +316,7 @@
+ 				</div>
+ 
+ 				{#if ENABLE_OLLAMA_API}
+-					<hr class=" border-gray-100 dark:border-gray-850 my-2" />
+					<hr class=" border-gray-100 dark:border-gray-850" />
+ 
+ 					<div class="">
+ 						<div class="flex justify-between items-center">
+@@ -358,6 +373,39 @@
+ 				{/if}
+ 			</div>
+ 
+			<hr class=" border-gray-50 dark:border-gray-850" />
+
+			<div class="pr-1.5 my-2">
+				<div class="flex justify-between items-center text-sm">
+					<div class="font-medium">{$i18n.t('Icloud File API')}</div>
+
+					<div class="mt-1">
+						<Switch 
+							bind:state={ENABLE_ICLOUD_API} 
+							on:change={async () => {
+								updateIcloudHandler();
+							}}
+						/>
+					</div>
+				</div>
+
+				{#if ENABLE_ICLOUD_API}
+					<hr class=" border-gray-50 dark:border-gray-850 my-2" />
+
+					<div class="">
+						<div class="flex w-full gap-1.5">
+							<div class="flex-1 flex flex-col gap-1.5">
+								<input
+									class="w-full text-sm bg-transparent outline-none"
+									placeholder={$i18n.t('Enter Icloud URL(e.g.') + 'http://localhost:6007/v1)'}
+									bind:value={ICLOUD_BASE_URL}
+								/>
+							</div>
+						</div>
+					</div>
+				{/if}
+			</div>
+
+ 			<hr class=" border-gray-100 dark:border-gray-850" />
+ 
+ 			<div class="pr-1.5 my-2">
+diff --git a/src/lib/components/icons/UploadCloudIcon.svelte b/src/lib/components/icons/UploadCloudIcon.svelte
+new file mode 100644
+index 000000000..eed3bd582
+--- /dev/null
+++ b/src/lib/components/icons/UploadCloudIcon.svelte
+@@ -0,0 +1,18 @@
+<script lang="ts">
+	export let className = 'w-4 h-4';
+</script>
+
+<svg
+	t="1744007283647"
+	viewBox="0 0 1491 1024"
+	version="1.1"
+	xmlns="http://www.w3.org/2000/svg"
+	p-id="1630"
+	class = {className}
+	><path
+		d="M546.047379 263.651842s-90.221363-91.423424-212.63125-16.762074c-109.521121 71.300031-90.154581 201.768179-90.154582 201.76818S0 498.498962 0 759.902727c5.431535 261.003078 264.186314 263.674325 264.186314 263.674326l388.443814 0.422947V744.565318H466.355181l279.434681-279.412421 279.390161 279.412421h-186.297208V1024l377.157796-0.422947s240.812904 0.222604 274.648698-248.092052c16.094262-271.576764-232.754643-325.113003-232.754643-325.113003S1286.205362 48.327085 936.761752 2.470681C637.181417-29.740104 546.047379 263.651842 546.047379 263.651842z"
+		fill="#507BFC"
+		p-id="1631"
+	></path></svg
+>
+
+diff --git a/src/lib/components/workspace/Knowledge.svelte b/src/lib/components/workspace/Knowledge.svelte
+index 57d45312d..43a1f305e 100644
+--- a/src/lib/components/workspace/Knowledge.svelte
+++ b/src/lib/components/workspace/Knowledge.svelte
+@@ -13,7 +13,8 @@
+ 	import {
+ 		getKnowledgeBases,
+ 		deleteKnowledgeById,
+-		getKnowledgeBaseList
+		getKnowledgeBaseList,
+		getIcloudFiles
+ 	} from '$lib/apis/knowledge';
+ 
+ 	import { goto } from '$app/navigation';
+@@ -26,6 +27,11 @@
+ 	import Spinner from '../common/Spinner.svelte';
+ 	import { capitalizeFirstLetter } from '$lib/utils';
+ 	import Tooltip from '../common/Tooltip.svelte';
+	import AddIcloudConnectionModal from '$lib/components/workspace/Knowledge/KnowledgeBase/AddIcloudContentMenu.svelte';
+	import IcloudFiles from '$lib/components/workspace/Knowledge/KnowledgeBase/IcloudFiles.svelte';
+
+	let showAddTextContentModal = false;
+	let IcloudFile = [];
+ 
+ 	let loaded = false;
+ 
+@@ -65,9 +71,26 @@
+ 	};
+ 
+ 	onMount(async () => {
+		await updateIcloudFiles();
+
+ 		knowledgeBases = await getKnowledgeBaseList(localStorage.token);
+ 		loaded = true;
+ 	});
+
+	async function updateIcloudFiles() {
+		let ICLOUD_BASE_URL = localStorage.getItem('ICLOUD_BASE_URL') || '';
+		console.log('ICLOUD_BASE_URL', ICLOUD_BASE_URL);
+		
+		if (ICLOUD_BASE_URL !== '') {
+			const res = await getIcloudFiles(ICLOUD_BASE_URL).catch((e) => {
+				toast.error(`${e}`);
+			});
+
+			if (res) {
+				IcloudFile = res;
+			}
+		}
+	}
+ </script>
+ 
+ <svelte:head>
+@@ -187,11 +210,39 @@
+ 		{/each}
+ 	</div>
+ 
+-	<div class=" text-gray-500 text-xs mt-1 mb-2">
+-		ⓘ {$i18n.t("Use '#' in the prompt input to load and include your knowledge.")}
+	<div class="flex justify-between items-center">
+		<div class="flex md:self-center text-xl font-medium px-0.5 items-center">
+			{$i18n.t('Icloud Knowledge')}
+			<div class="flex self-center w-[1px] h-6 mx-2.5 bg-gray-50 dark:bg-gray-850" />
+			<span class="text-lg font-medium text-gray-500 dark:text-gray-300">{IcloudFile.length}</span>
+		</div>
+		<div>
+			<button
+				class=" px-2 py-2 rounded-xl hover:bg-gray-700/10 dark:hover:bg-gray-100/10 dark:text-gray-300 dark:hover:text-white transition font-medium text-sm flex items-center space-x-1"
+				aria-label={$i18n.t('Upload to Icloud')}
+				on:click={() => {
+					showAddTextContentModal = !showAddTextContentModal;
+				}}
+			>
+				<Plus className="size-3.5" />
+			</button>
+		</div>
+	</div>
+	<hr class="border-gray-100 dark:border-gray-850 my-2" />
+	<div class=" flex overflow-y-auto w-full h-[15rem] scrollbar-hidden text-xs">
+		<IcloudFiles files={IcloudFile} />
+ 	</div>
+ {:else}
+ 	<div class="w-full h-full flex justify-center items-center">
+ 		<Spinner />
+ 	</div>
+ {/if}
+
+<AddIcloudConnectionModal
+	bind:show={showAddTextContentModal}
+	on:updateIcloudFile={async (e) => {
+		if (e.detail.status) {
+			await updateIcloudFiles();
+		}
+	}}
+/>
+diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase/AddIcloudContentMenu.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase/AddIcloudContentMenu.svelte
+new file mode 100644
+index 000000000..fb906a0d3
+--- /dev/null
+++ b/src/lib/components/workspace/Knowledge/KnowledgeBase/AddIcloudContentMenu.svelte
+@@ -0,0 +1,164 @@
+<script lang="ts">
+	import { toast } from 'svelte-sonner';
+	import { getContext, onMount, createEventDispatcher } from 'svelte';
+	import Modal from '$lib/components/common/Modal.svelte';
+	import UploadCloudIcon from '$lib/components/icons/UploadCloudIcon.svelte';
+	import Spinner from '$lib/components/common/Spinner.svelte';
+	import { updateIcloudFiles } from '$lib/apis/knowledge';
+
+	const i18n = getContext('i18n');
+	const dispatch = createEventDispatcher();
+
+	export let show = false;
+
+	let url = '';
+
+	let loading = false;
+
+	let selectedFile = null;
+
+	function handleFileSelect(event) {
+		selectedFile = event.target.files[0];
+	}
+
+	function parseAndValidateUrls(normalizedInput: string): string[] {
+		return normalizedInput
+			.split(',')
+			.map((candidate) => {
+				const processed = candidate.replace(/^["']+|["']+$/g, '').trim();
+
+				try {
+					new URL(processed);
+					return processed;
+				} catch {
+					return null;
+				}
+			})
+			.filter((url): url is string => url !== null);
+	}
+
+	async function submitHandler() {
+		loading = true;
+
+		if (!url && !selectedFile) {
+			loading = false;
+			show = false;
+
+			toast.error($i18n.t('URL or File are required'));
+			return;
+		}
+		if (url && selectedFile) {
+			loading = false;
+			show = false;
+
+			toast.error($i18n.t('Upload file or enter URL'));
+			url = '';
+			selectedFile = null;
+			return;
+		}
+
+		const formData = new FormData();
+		if (url) {
+			formData.append('link_list', JSON.stringify(parseAndValidateUrls(url)));
+		}
+		if (selectedFile) {
+			formData.append('files', selectedFile, selectedFile.name);
+		}
+		let ICLOUD_BASE_URL = localStorage.getItem('ICLOUD_BASE_URL') || '';
+		console.log('ICLOUD_BASE_URL', ICLOUD_BASE_URL);
+
+		if (ICLOUD_BASE_URL !== '') {
+			const res = await updateIcloudFiles(ICLOUD_BASE_URL, formData).catch((e) => {
+				toast.error(`${e}`);
+
+				return;
+			});
+
+			if (res) {
+				toast.success($i18n.t('Upload Succeed'));
+				dispatch('updateIcloudFile', { status: true });
+			}
+
+			url = '';
+			selectedFile = null;
+			loading = false;
+			show = false;
+		}
+	}
+</script>
+
+<Modal size="sm" bind:show>
+	<div class="flex flex-col justify-end">
+		<div class=" flex justify-between dark:text-gray-100 px-5 pt-4 pb-2">
+			<div class="flex-col text-lg font-medium self-center font-primary">
+				{$i18n.t('Upload Icloud file')}
+				<span class="text-sm text-gray-500">- {$i18n.t('choose URL or local file')}</span>
+			</div>
+
+			<button
+				class="self-center"
+				on:click={() => {
+					show = false;
+				}}
+			>
+				<svg
+					xmlns="http://www.w3.org/2000/svg"
+					viewBox="0 0 20 20"
+					fill="currentColor"
+					class="w-5 h-5"
+				>
+					<path
+						d="M6.28 5.22a.75.75 0 00-1.06 1.06L8.94 10l-3.72 3.72a.75.75 0 101.06 1.06L10 11.06l3.72 3.72a.75.75 0 101.06-1.06L11.06 10l3.72-3.72a.75.75 0 00-1.06-1.06L10 8.94 6.28 5.22z"
+					/>
+				</svg>
+			</button>
+		</div>
+
+		<div class="flex flex-col md:flex-row w-full px-4 pb-4 md:space-x-4 dark:text-gray-200">
+			<div class=" flex flex-col w-full sm:flex-row sm:justify-center sm:space-x-6">
+				<div class="flex items-center w-full">
+					<div class="flex-1 min-w-0 mr-2">
+						<div class="flex flex-col w-full my-8 mx-2">
+							<input
+								class="w-full text-sm bg-transparent placeholder:text-gray-300 outline-none border-b-solid border-b-2 border-blue-500 rounded p-2"
+								type="text"
+								bind:value={url}
+								placeholder={$i18n.t('Upload from URL')}
+							/>
+						</div>
+					</div>
+
+					<div class="flex-none w-[1px] h-[60%] mx-2.5 bg-gray-300"></div>
+
+					<div class="flex-1 min-w-0">
+						<input type="file" id="fileInput" hidden on:change={handleFileSelect} />
+
+						<label
+							for="fileInput"
+							class="cursor-pointer flex flex-col items-center hover:bg-gray-100 rounded-lg p-2 transition-colors"
+						>
+							<UploadCloudIcon className="w-12 h-12 text-gray-500" />
+							<div class="text-xs text-gray-500 pt-2">
+								{selectedFile ? selectedFile.name : '点击上传文件'}
+							</div>
+						</label>
+					</div>
+				</div>
+			</div>
+		</div>
+		{#if loading}
+			<Spinner className="my-4 size-4" />
+		{:else}
+			<button
+				class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-3 px-4 rounded text-sm"
+				on:click={(e) => {
+					e.preventDefault();
+					submitHandler();
+				}}
+			>
+				{$i18n.t('Upload Confirm')}
+			</button>
+		{/if}
+	</div>
+</Modal>
+
+diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase/IcloudFiles.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase/IcloudFiles.svelte
+new file mode 100644
+index 000000000..d6490dce2
+--- /dev/null
+++ b/src/lib/components/workspace/Knowledge/KnowledgeBase/IcloudFiles.svelte
+@@ -0,0 +1,37 @@
+<script lang="ts">
+	export let selectedFileId = null;
+	export let files = [];
+
+	export let small = false;
+</script>
+
+<div class="max-h-full flex flex-col w-full">
+	{#each files as file}
+		<div class="mt-1 px-2 flex hover:bg-gray-50 transition">
+			<div class="p-3 bg-black/20 dark:bg-white/10 text-white rounded-xl my-2">
+				<svg
+					xmlns="http://www.w3.org/2000/svg"
+					viewBox="0 0 24 24"
+					fill="currentColor"
+					class=" size-3"
+				>
+					<path
+						fill-rule="evenodd"
+						d="M5.625 1.5c-1.036 0-1.875.84-1.875 1.875v17.25c0 1.035.84 1.875 1.875 1.875h12.75c1.035 0 1.875-.84 1.875-1.875V12.75A3.75 3.75 0 0 0 16.5 9h-1.875a1.875 1.875 0 0 1-1.875-1.875V5.25A3.75 3.75 0 0 0 9 1.5H5.625ZM7.5 15a.75.75 0 0 1 .75-.75h7.5a.75.75 0 0 1 0 1.5h-7.5A.75.75 0 0 1 7.5 15Zm.75 2.25a.75.75 0 0 0 0 1.5H12a.75.75 0 0 0 0-1.5H8.25Z"
+						clip-rule="evenodd"
+					/>
+					<path
+						d="M12.971 1.816A5.23 5.23 0 0 1 14.25 5.25v1.875c0 .207.168.375.375.375H16.5a5.23 5.23 0 0 1 3.434 1.279 9.768 9.768 0 0 0-6.963-6.963Z"
+					/>
+				</svg>
+			</div>
+
+			<div class="flex flex-col justify-center -space-y-0.5 px-2.5 w-full">
+				<div class=" dark:text-gray-100 text-sm font-medium line-clamp-1 mb-1">
+					{file.name}
+				</div>
+			</div>
+		</div>
+	{/each}
+</div>
+
+diff --git a/src/lib/i18n/locales/zh-CN/translation.json b/src/lib/i18n/locales/zh-CN/translation.json
+index ebb53a1b5..d6b72e04d 100644
+--- a/src/lib/i18n/locales/zh-CN/translation.json
+++ b/src/lib/i18n/locales/zh-CN/translation.json
+@@ -1174,5 +1174,18 @@
+ 	"Your entire contribution will go directly to the plugin developer; Open WebUI does not take any percentage. However, the chosen funding platform might have its own fees.": "您的全部捐款将直接给到插件开发者，Open WebUI 不会收取任何比例。但众筹平台可能会有服务费、抽成。",
+ 	"Youtube": "YouTube",
+ 	"Youtube Language": "Youtube 语言",
+-	"Youtube Proxy URL": "Youtube 代理 URL"
+	"Youtube Proxy URL": "Youtube 代理 URL",
+	"Upload Icloud file": "上传到云端",
+	"choose URL or local file": "选择URL或本地文件",
+	"Upload from URL": "从URL上传",
+	"Upload Confirm": "确认上传",
+	"URL or File are required": "未上传文件",
+	"Upload file or enter URL": "文件与URL不能同时提交",
+	"Icloud File": "云端文件",
+	"Icloud File API": "云端存储API",
+	"Enter Icloud URL(e.g.": "输入云端存储URL（例如.",
+	"Upload to Icloud": "上传到云端",
+	"Icloud Knowledge": "云端数据库",
+	"Upload Succeed": "上传文件成功",
+	"Icloud API settings updated": "云端存储API设置已更新"
+ }
+-- 
+2.34.1
+
--- a/AgentQnA/ui/open_webui_patches/0003-update-build-script.patch
+++ b/AgentQnA/ui/open_webui_patches/0003-update-build-script.patch
@@ -0,0 +1,56 @@
+From ebf3218eef81897b536521e2140bdd9176f3ace3 Mon Sep 17 00:00:00 2001
+From: lkk12014402 <kaokao.lv@intel.com>
+Date: Tue, 8 Apr 2025 07:13:20 +0000
+Subject: [PATCH 3/3] update build script
+
+---
+ hatch_build.py | 23 ++++++++++++++++++-----
+ 1 file changed, 18 insertions(+), 5 deletions(-)
+
+diff --git a/hatch_build.py b/hatch_build.py
+index 8ddaf0749..e15d6e99d 100644
+--- a/hatch_build.py
+++ b/hatch_build.py
+@@ -3,21 +3,34 @@ import os
+ import shutil
+ import subprocess
+ from sys import stderr
+-
+ 
+ from hatchling.builders.hooks.plugin.interface import BuildHookInterface
+-
+-
+ 
+ 
+ class CustomBuildHook(BuildHookInterface):
+     def initialize(self, version, build_data):
+         super().initialize(version, build_data)
+-        stderr.write(">>> Building Open Webui frontend\n")
+        stderr.write(">>> Building DCAI小智 frontend\n")
+         npm = shutil.which("npm")
+         if npm is None:
+             raise RuntimeError(
+-                "NodeJS `npm` is required for building Open Webui but it was not found"
+                "NodeJS `npm` is required for building DCAI小智 but it was not found"
+             )
+        stderr.write("### Installing onnxruntime-node\n")
+        subprocess.run([npm, "install", "onnxruntime-node", "--onnxruntime-node-install-cuda=skip"], check=True)  # noqa: S603
+       
+        stderr.write("### Installing huggingface/transformers.js\n")
+        subprocess.run([npm, "i", "@huggingface/transformers"], check=True)  # noqa: S603
+       
+        ort_version = "1.20.1"
+        ort_url = f"https://github.com/microsoft/onnxruntime/releases/download/v{ort_version}/onnxruntime-linux-x64-gpu-{ort_version}.tgz"
+       
+        stderr.write(f"### Downloading onnxruntime binaries from {ort_url}\n")
+        subprocess.run(["curl", "-L", ort_url, "-o", f"onnxruntime-linux-x64-gpu-{ort_version}.tgz"], check=True)  # noqa: S603
+       
+         stderr.write("### npm install\n")
+         subprocess.run([npm, "install"], check=True)  # noqa: S603
+ 
+         stderr.write("\n### npm run build\n")
+         os.environ["APP_BUILD_HASH"] = version
+         subprocess.run([npm, "run", "build"], check=True)  # noqa: S603
+-- 
+2.34.1
+
--- a/AgentQnA/ui/open_webui_patches/0004-enhance-tool-formating.patch
+++ b/AgentQnA/ui/open_webui_patches/0004-enhance-tool-formating.patch
@@ -0,0 +1,31 @@
+From 36d61dab9306cb8f12c4497a32781d84f8cfb2e7 Mon Sep 17 00:00:00 2001
+From: lkk12014402 <kaokao.lv@intel.com>
+Date: Tue, 8 Apr 2025 07:22:36 +0000
+Subject: [PATCH 4/4] enhance tool formatting
+
+---
+ backend/open_webui/utils/middleware.py | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py
+index fddbe8ee1..9e44ed91a 100644
+--- a/backend/open_webui/utils/middleware.py
+++ b/backend/open_webui/utils/middleware.py
+@@ -1142,12 +1142,12 @@ async def process_chat_response(
+                                 result_display_content = f"{result_display_content}\n> {tool_name}: {result.get('content', '')}"
+ 
+                             if not raw:
+-                                content = f'{content}\n<details type="tool_calls" done="true" content="{html.escape(json.dumps(block_content))}" results="{html.escape(json.dumps(results))}">\n<summary>Tool Executed</summary>\n{result_display_content}\n</details>\n'
+                                content = f'{content}\n<details type="tool_calls" done="true" content="{html.escape(json.dumps(block_content))}" results="{html.escape(json.dumps(results))}">\n<summary> Tool: {tool_call.get('function', {}).get('name', '')} Executed</summary>\n{result_display_content}\n</details>\n'
+                         else:
+                             tool_calls_display_content = ""
+ 
+                             for tool_call in block_content:
+-                                tool_calls_display_content = f"{tool_calls_display_content}\n> Executing {tool_call.get('function', {}).get('name', '')}"
+                                tool_calls_display_content = f"{tool_calls_display_content}\n> Executing Tool: {tool_call.get('function', {}).get('name', '')}"
+ 
+                             if not raw:
+                                 content = f'{content}\n<details type="tool_calls" done="false" content="{html.escape(json.dumps(block_content))}">\n<summary>Tool Executing...</summary>\n{tool_calls_display_content}\n</details>\n'
+-- 
+2.34.1
+
--- a/AgentQnA/ui/open_webui_patches/0005-fix-tool-call-typo.patch
+++ b/AgentQnA/ui/open_webui_patches/0005-fix-tool-call-typo.patch
@@ -0,0 +1,25 @@
+From 4723fb2df86df3e1c300f12fc0649823ea1a753b Mon Sep 17 00:00:00 2001
+From: lkk12014402 <kaokao.lv@intel.com>
+Date: Tue, 8 Apr 2025 08:09:36 +0000
+Subject: [PATCH 5/5] fix tool call typo.
+
+---
+ backend/open_webui/utils/middleware.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py
+index 9e44ed91a..82aed5346 100644
+--- a/backend/open_webui/utils/middleware.py
+++ b/backend/open_webui/utils/middleware.py
+@@ -1142,7 +1142,7 @@ async def process_chat_response(
+                                 result_display_content = f"{result_display_content}\n> {tool_name}: {result.get('content', '')}"
+ 
+                             if not raw:
+-                                content = f'{content}\n<details type="tool_calls" done="true" content="{html.escape(json.dumps(block_content))}" results="{html.escape(json.dumps(results))}">\n<summary> Tool: {tool_call.get('function', {}).get('name', '')} Executed</summary>\n{result_display_content}\n</details>\n'
+                                content = f'{content}\n<details type="tool_calls" done="true" content="{html.escape(json.dumps(block_content))}" results="{html.escape(json.dumps(results))}">\n<summary> Tool: {tool_call.get("function", {}).get("name", "")} Executed</summary>\n{result_display_content}\n</details>\n'
+                         else:
+                             tool_calls_display_content = ""
+ 
+-- 
+2.34.1
+
--- a/AgentQnA/ui/svelte/.editorconfig
+++ b/AgentQnA/ui/svelte/.editorconfig
@@ -1,10 +0,0 @@
-[*]
-indent_style = tab
-
-[package.json]
-indent_style = space
-indent_size = 2
-
-[*.md]
-indent_style = space
-indent_size = 2
--- a/AgentQnA/ui/svelte/.env
+++ b/AgentQnA/ui/svelte/.env
@@ -1 +0,0 @@
-AGENT_URL = '/v1/chat/completions'
--- a/AgentQnA/ui/svelte/.eslintignore
+++ b/AgentQnA/ui/svelte/.eslintignore
@@ -1,13 +0,0 @@
-.DS_Store
-node_modules
-/build
-/.svelte-kit
-/package
-.env
-.env.*
-!.env.example
-
-# Ignore files for PNPM, NPM and YARN
-pnpm-lock.yaml
-package-lock.json
-yarn.lock
--- a/AgentQnA/ui/svelte/.eslintrc.cjs
+++ b/AgentQnA/ui/svelte/.eslintrc.cjs
@@ -1,20 +0,0 @@
-module.exports = {
-	root: true,
-	parser: "@typescript-eslint/parser",
-	extends: ["eslint:recommended", "plugin:@typescript-eslint/recommended", "prettier"],
-	plugins: ["svelte3", "@typescript-eslint", "neverthrow"],
-	ignorePatterns: ["*.cjs"],
-	overrides: [{ files: ["*.svelte"], processor: "svelte3/svelte3" }],
-	settings: {
-		"svelte3/typescript": () => require("typescript"),
-	},
-	parserOptions: {
-		sourceType: "module",
-		ecmaVersion: 2020,
-	},
-	env: {
-		browser: true,
-		es2017: true,
-		node: true,
-	},
-};
--- a/AgentQnA/ui/svelte/.prettierignore
+++ b/AgentQnA/ui/svelte/.prettierignore
@@ -1,13 +0,0 @@
-.DS_Store
-node_modules
-/build
-/.svelte-kit
-/package
-.env
-.env.*
-!.env.example
-
-# Ignore files for PNPM, NPM and YARN
-pnpm-lock.yaml
-package-lock.json
-yarn.lock
--- a/AgentQnA/ui/svelte/.prettierrc
+++ b/AgentQnA/ui/svelte/.prettierrc
@@ -1,13 +0,0 @@
-{
-	"pluginSearchDirs": [
-		"."
-	],
-	"overrides": [
-		{
-			"files": "*.svelte",
-			"options": {
-				"parser": "svelte"
-			}
-		}
-	]
-}
--- a/AgentQnA/ui/svelte/README.md
+++ b/AgentQnA/ui/svelte/README.md
@@ -1,60 +0,0 @@
-# AgentQnA
-
-## 📸 Project Screenshots
-
-![project-screenshot](../../assets/img/agent_ui.png)
-![project-screenshot](../../assets/img/agent_ui_result.png)
-
-## 🧐 Features
-
-Here're some of the project's features:
-
- Create Agent：Provide more precise answers based on user queries, showcase the high-quality output process of complex queries across different dimensions, and consolidate information to present comprehensive answers.
-
-## 🛠️ Get it Running
-
-1. Clone the repo.
-
-2. cd command to the current folder.
-
-   ```
-   cd AgentQnA/ui
-   ```
-
-3. Modify the required .env variables.
-
-   ```
-   AGENT_URL = ''
-   ```
-
-4. **For Local Development:**
-
- Install the dependencies:
-
-  ```
-  npm install
-  ```
-
- Start the development server:
-
-  ```
-  npm run dev
-  ```
-
- The application will be available at `http://localhost:3000`.
-
-5. **For Docker Setup:**
-
- Build the Docker image:
-
-  ```
-  docker build -t opea:agent-ui .
-  ```
-
- Run the Docker container:
-
-  ```
-  docker run -d -p 3000:3000 --name agent-ui opea:agent-ui
-  ```
-
- The application will be available at `http://localhost:3000`.
--- a/AgentQnA/ui/svelte/package.json
+++ b/AgentQnA/ui/svelte/package.json
@@ -1,60 +0,0 @@
-{
-  "name": "agent-example",
-  "version": "0.0.1",
-  "private": true,
-  "scripts": {
-    "dev": "vite dev --host 0.0.0.0",
-    "build": "vite build",
-    "preview": "vite preview",
-    "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
-    "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
-    "lint": "prettier --check . && eslint .",
-    "format": "prettier --write ."
-  },
-  "devDependencies": {
-    "@fortawesome/free-solid-svg-icons": "6.2.0",
-    "@sveltejs/adapter-auto": "1.0.0-next.75",
-    "@sveltejs/kit": "^1.20.1",
-    "@tailwindcss/typography": "0.5.7",
-    "@types/debug": "4.1.7",
-    "@typescript-eslint/eslint-plugin": "^5.27.0",
-    "@typescript-eslint/parser": "^5.27.0",
-    "autoprefixer": "^10.4.7",
-    "daisyui": "^2.52.0",
-    "debug": "4.3.4",
-    "eslint": "^8.16.0",
-    "eslint-config-prettier": "^8.3.0",
-    "eslint-plugin-neverthrow": "1.1.4",
-    "eslint-plugin-svelte3": "^4.0.0",
-    "neverthrow": "5.0.0",
-    "pocketbase": "0.7.0",
-    "postcss": "^8.4.23",
-    "postcss-load-config": "^4.0.1",
-    "postcss-preset-env": "^8.3.2",
-    "prettier": "^2.8.8",
-    "prettier-plugin-svelte": "^2.7.0",
-    "prettier-plugin-tailwindcss": "^0.3.0",
-    "svelte": "^3.59.1",
-    "svelte-check": "^2.7.1",
-    "svelte-fa": "3.0.3",
-    "svelte-preprocess": "^4.10.7",
-    "tailwindcss": "^3.1.5",
-    "ts-pattern": "4.0.5",
-    "tslib": "^2.3.1",
-    "typescript": "^4.7.4",
-    "vite": "^4.3.9"
-  },
-  "type": "module",
-  "dependencies": {
-    "@heroicons/vue": "^2.1.5",
-    "echarts": "^5.4.2",
-    "flowbite-svelte": "^0.38.5",
-    "flowbite-svelte-icons": "^0.3.6",
-    "fuse.js": "^6.6.2",
-    "marked": "^15.0.0",
-    "ramda": "^0.29.0",
-    "sjcl": "^1.0.8",
-    "sse.js": "^0.6.1",
-    "svelte-notifications": "^0.9.98"
-  }
-}
--- a/AgentQnA/ui/svelte/postcss.config.cjs
+++ b/AgentQnA/ui/svelte/postcss.config.cjs
@@ -1,13 +0,0 @@
-const tailwindcss = require("tailwindcss");
-const autoprefixer = require("autoprefixer");
-
-const config = {
-	plugins: [
-		//Some plugins, like tailwindcss/nesting, need to run before Tailwind,
-		tailwindcss(),
-		//But others, like autoprefixer, need to run after,
-		autoprefixer,
-	],
-};
-
-module.exports = config;
--- a/AgentQnA/ui/svelte/src/app.d.ts
+++ b/AgentQnA/ui/svelte/src/app.d.ts
@@ -1,50 +0,0 @@
-// Copyright (C) 2025 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-// See: https://kit.svelte.dev/docs/types#app
-// import { Result} from "neverthrow";
-
-declare namespace App {
-	interface Locals {
-		user?: User;
-	}
-	// interface PageData { }
-	// interface PageError {}
-	// interface Platform {}
-}
-
-interface User {
-	id?: string;
-	email: string;
-	password?: string;
-	token?: string;
-	[key: string]: any;
-}
-
-type AuthResponse = Result<User>;
-
-interface AuthAdapter {
-	login(props: { email: string; password: string }): Promise<AuthResponse>;
-	signup(props: { email: string; password: string; password_confirm: string }): Promise<AuthResponse>;
-	validate_session(props: { token: string }): Promise<AuthResponse>;
-	logout(props: { token: string; email: string }): Promise<Result<void>>;
-	forgotPassword(props: { email: string; password: string }): Promise<Result<void>>;
-}
-
-interface ChatAdapter {
-	modelList(props: {}): Promise<Result<void>>;
-	txt2img(props: {}): Promise<Result<void>>;
-}
-
-interface ChatMessage {
-	role: string;
-	content: string;
-}
-
-interface ChatMessageType {
-	model: string;
-	knowledge: string;
-	temperature: string;
-	max_new_tokens: string;
-	topk: string;
-}
--- a/AgentQnA/ui/svelte/src/app.html
+++ b/AgentQnA/ui/svelte/src/app.html
@@ -1,17 +0,0 @@
-<!--
-  Copyright (C) 2025 Intel Corporation
-  SPDX-License-Identifier: Apache-2.0
-->
-
-<!doctype html>
-<html lang="en">
-	<head>
-		<meta charset="utf-8" />
-		<link rel="icon" href="%sveltekit.assets%/favicon.png" />
-		<meta name="viewport" content="width=device-width" />
-		%sveltekit.head%
-	</head>
-	<body>
-		<div>%sveltekit.body%</div>
-	</body>
-</html>
--- a/AgentQnA/ui/svelte/src/app.postcss
+++ b/AgentQnA/ui/svelte/src/app.postcss
@@ -1,82 +0,0 @@
-/* Write your global styles here, in PostCSS syntax */
-@tailwind base;
-@tailwind components;
-@tailwind utilities;
-
-.btn {
-	@apply flex-nowrap;
-}
-a.btn {
-	@apply no-underline;
-}
-.input {
-	@apply text-base;
-}
-
-.bg-dark-blue {
-	background-color: #004a86;
-}
-
-.bg-light-blue {
-	background-color: #0068b5;
-}
-
-.bg-turquoise {
-	background-color: #00a3f6;
-}
-
-.bg-header {
-	background-color: #ffffff;
-}
-
-.bg-button {
-	background-color: #0068b5;
-}
-
-.bg-title {
-	background-color: #f7f7f7;
-}
-
-.text-header {
-	color: #0068b5;
-}
-
-.text-button {
-	color: #0071c5;
-}
-
-.text-title-color {
-	color: rgb(38,38,38);
-}
-
-.font-intel {
-	font-family: "intel-clear","tahoma",Helvetica,"helvetica",Arial,sans-serif;
-}
-
-.font-title-intel {
-	font-family: "intel-one","intel-clear",Helvetica,Arial,sans-serif;
-}
-
-.bg-footer {
-	background-color: #e7e7e7;
-}
-
-.bg-light-green {
-	background-color: #d7f3a1;
-}
-
-.bg-purple {
-	background-color: #653171;
-}
-
-.bg-dark-blue {
-	background-color: #224678;
-}
-
-.border-input-color {
-	border-color: #605e5c;
-}
-
-.w-12\/12 {
-	width: 100%
-}
--- a/AgentQnA/ui/svelte/src/lib/assets/Agent/createSub.svelte
+++ b/AgentQnA/ui/svelte/src/lib/assets/Agent/createSub.svelte
@@ -1,25 +0,0 @@
-<!--
-  Copyright (C) 2025 Intel Corporation
-  SPDX-License-Identifier: Apache-2.0
-->
-
-<svg
-	t="1731984271860"
-	class="w-8 h-8"
-	viewBox="0 0 1024 1024"
-	version="1.1"
-	xmlns="http://www.w3.org/2000/svg"
-	p-id="11418"
-	width="200"
-	height="200"
-	><path
-		d="M0 0m170.666667 0l682.666666 0q170.666667 0 170.666667 170.666667l0 682.666666q0 170.666667-170.666667 170.666667l-682.666666 0q-170.666667 0-170.666667-170.666667l0-682.666666q0-170.666667 170.666667-170.666667Z"
-		fill="#1890FF"
-		fill-opacity=".1"
-		p-id="11419"
-	/><path
-		d="M404.352 552.661333a63.018667 63.018667 0 1 0 0-125.994666 63.018667 63.018667 0 0 0 0 125.994666z m0 213.333334a63.018667 63.018667 0 1 0 0-125.994667 63.018667 63.018667 0 0 0 0 125.994667z m-213.333333-426.666667a63.018667 63.018667 0 1 0 0-125.994667 63.018667 63.018667 0 0 0 0 125.994667z m669.653333-10.88H376.362667a35.669333 35.669333 0 0 1-35.114667-36.096c0-19.882667 15.786667-36.096 35.114667-36.096h484.394666c19.370667 0 35.157333 16.213333 35.157334 36.096a35.669333 35.669333 0 0 1-35.242667 36.096z m16.384 213.034667h-260.821333c-10.410667 0-18.901333-16.213333-18.901334-36.096 0-19.925333 8.490667-36.138667 18.901334-36.138667h260.864c10.410667 0 18.901333 16.213333 18.901333 36.138667-0.042667 19.882667-8.490667 36.096-18.944 36.096z m0 212.992h-260.821333c-10.410667 0-18.901333-16.213333-18.901334-36.096 0-19.925333 8.490667-36.096 18.901334-36.096h260.864c10.410667 0 18.901333 16.213333 18.901333 36.096-0.042667 19.882667-8.490667 36.096-18.944 36.096z"
-		fill="#1890FF"
-		p-id="11420"
-	/></svg
->
--- a/AgentQnA/ui/svelte/src/lib/assets/Agent/download.svelte
+++ b/AgentQnA/ui/svelte/src/lib/assets/Agent/download.svelte
@@ -1,9 +0,0 @@
-<!--
-  Copyright (C) 2025 Intel Corporation
-  SPDX-License-Identifier: Apache-2.0
-->
-
-<svg class="w-3.5 h-3.5 me-2.5" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="currentColor" viewBox="0 0 20 20">
-    <path d="M14.707 7.793a1 1 0 0 0-1.414 0L11 10.086V1.5a1 1 0 0 0-2 0v8.586L6.707 7.793a1 1 0 1 0-1.414 1.414l4 4a1 1 0 0 0 1.416 0l4-4a1 1 0 0 0-.002-1.414Z"/>
-    <path d="M18 12h-2.55l-2.975 2.975a3.5 3.5 0 0 1-4.95 0L4.55 12H2a2 2 0 0 0-2 2v4a2 2 0 0 0 2 2h16a2 2 0 0 0 2-2v-4a2 2 0 0 0-2-2Zm-3 5a1 1 0 1 1 0-2 1 1 0 0 1 0 2Z"/>
-  </svg>
--- a/AgentQnA/ui/svelte/src/lib/assets/Agent/eye.svelte
+++ b/AgentQnA/ui/svelte/src/lib/assets/Agent/eye.svelte
@@ -1,16 +0,0 @@
-<!--
-  Copyright (C) 2025 Intel Corporation
-  SPDX-License-Identifier: Apache-2.0
-->
-
-<svg
-						class="me-2 h-3 w-3"
-						aria-hidden="true"
-						xmlns="http://www.w3.org/2000/svg"
-						fill="currentColor"
-						viewBox="0 0 20 14"
-					>
-						<path
-							d="M10 0C4.612 0 0 5.336 0 7c0 1.742 3.546 7 10 7 6.454 0 10-5.258 10-7 0-1.664-4.612-7-10-7Zm0 10a3 3 0 1 1 0-6 3 3 0 0 1 0 6Z"
-						/>
-					</svg>
--- a/AgentQnA/ui/svelte/src/lib/assets/Agent/newAI.svelte
+++ b/AgentQnA/ui/svelte/src/lib/assets/Agent/newAI.svelte
@@ -1,97 +0,0 @@
-<!--
-  Copyright (C) 2025 Intel Corporation
-  SPDX-License-Identifier: Apache-2.0
-->
-
-<!-- <svg class="h-11 w-11 flex-none overflow-visible" fill="none"
-	><defs
-		><filter
-			id="step-icon-2"
-			x="-3"
-			y="-1"
-			width="50"
-			height="50"
-			filterUnits="userSpaceOnUse"
-			color-interpolation-filters="sRGB"
-			><feFlood flood-opacity="0" result="BackgroundImageFix" /><feColorMatrix
-				in="SourceAlpha"
-				values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 127 0"
-				result="hardAlpha"
-			/><feOffset dy="2" /><feGaussianBlur stdDeviation="2.5" /><feComposite
-				in2="hardAlpha"
-				operator="out"
-			/><feColorMatrix
-				values="0 0 0 0 0.054902 0 0 0 0 0.647059 0 0 0 0 0.913725 0 0 0 0.12 0"
-			/><feBlend
-				in2="BackgroundImageFix"
-				result="effect1_dropShadow_804_95228"
-			/><feBlend
-				in="SourceGraphic"
-				in2="effect1_dropShadow_804_95228"
-				result="shape"
-			/></filter
-		></defs
-	><g filter="url(#step-icon-2)"
-		><path
-			d="M2.75 10A7.25 7.25 0 0 1 10 2.75h24A7.25 7.25 0 0 1 41.25 10v24A7.25 7.25 0 0 1 34 41.25H10A7.25 7.25 0 0 1 2.75 34V10Z"
-			fill="#EEF2FF"
-		/><path
-			d="M2.75 10A7.25 7.25 0 0 1 10 2.75h24A7.25 7.25 0 0 1 41.25 10v24A7.25 7.25 0 0 1 34 41.25H10A7.25 7.25 0 0 1 2.75 34V10Z"
-			stroke="#6366F1"
-			stroke-width="1.5"
-			stroke-linecap="round"
-			stroke-linejoin="round"
-		/></g
-	><path
-		fill-rule="evenodd"
-		clip-rule="evenodd"
-		d="M23 35.25c.69 0 1.25-.56 1.25-1.25A3.75 3.75 0 0 1 28 30.25a1.25 1.25 0 1 0 0-2.5A3.75 3.75 0 0 1 24.25 24a1.25 1.25 0 1 0-2.5 0A3.75 3.75 0 0 1 18 27.75a1.25 1.25 0 0 0 0 2.5A3.75 3.75 0 0 1 21.75 34c0 .69.56 1.25 1.25 1.25Z"
-		fill="#fff"
-	/><path
-		d="M28 27a.75.75 0 0 0 0 1.5V27Zm-4.5 7a.5.5 0 0 1-.5.5V36a2 2 0 0 0 2-2h-1.5Zm5-5a.5.5 0 0 1-.5.5V31a2 2 0 0 0 2-2h-1.5Zm-.5-.5a.5.5 0 0 1 .5.5H30a2 2 0 0 0-2-2v1.5Zm-5-5a.5.5 0 0 1 .5.5H25a2 2 0 0 0-2-2v1.5Zm-.5.5a.5.5 0 0 1 .5-.5V22a2 2 0 0 0-2 2h1.5Zm-5 5a.5.5 0 0 1 .5-.5V27a2 2 0 0 0-2 2h1.5Zm.5.5a.5.5 0 0 1-.5-.5H16a2 2 0 0 0 2 2v-1.5Zm5 5a.5.5 0 0 1-.5-.5H21a2 2 0 0 0 2 2v-1.5ZM18 31a3 3 0 0 1 3 3h1.5a4.5 4.5 0 0 0-4.5-4.5V31Zm3-7a3 3 0 0 1-3 3v1.5a4.5 4.5 0 0 0 4.5-4.5H21Zm7 3a3 3 0 0 1-3-3h-1.5a4.5 4.5 0 0 0 4.5 4.5V27Zm-3 7a3 3 0 0 1 3-3v-1.5a4.5 4.5 0 0 0-4.5 4.5H25Z"
-		fill="#6366F1"
-	/><path
-		fill-rule="evenodd"
-		clip-rule="evenodd"
-		d="M13 27.25c.69 0 1.25-.56 1.25-1.25 0-.966.784-1.75 1.75-1.75a1.25 1.25 0 1 0 0-2.5A1.75 1.75 0 0 1 14.25 20a1.25 1.25 0 1 0-2.5 0A1.75 1.75 0 0 1 10 21.75a1.25 1.25 0 0 0 0 2.5c.966 0 1.75.784 1.75 1.75 0 .69.56 1.25 1.25 1.25Z"
-		fill="#fff"
-	/><path
-		d="M16 21a.75.75 0 0 0 0 1.5V21Zm-2.5 5a.5.5 0 0 1-.5.5V28a2 2 0 0 0 2-2h-1.5Zm3-3a.5.5 0 0 1-.5.5V25a2 2 0 0 0 2-2h-1.5Zm-.5-.5a.5.5 0 0 1 .5.5H18a2 2 0 0 0-2-2v1.5Zm-3-3a.5.5 0 0 1 .5.5H15a2 2 0 0 0-2-2v1.5Zm-.5.5a.5.5 0 0 1 .5-.5V18a2 2 0 0 0-2 2h1.5Zm-3 3a.5.5 0 0 1 .5-.5V21a2 2 0 0 0-2 2h1.5Zm.5.5a.5.5 0 0 1-.5-.5H8a2 2 0 0 0 2 2v-1.5Zm3 3a.5.5 0 0 1-.5-.5H11a2 2 0 0 0 2 2v-1.5ZM10 25a1 1 0 0 1 1 1h1.5a2.5 2.5 0 0 0-2.5-2.5V25Zm1-5a1 1 0 0 1-1 1v1.5a2.5 2.5 0 0 0 2.5-2.5H11Zm5 1a1 1 0 0 1-1-1h-1.5a2.5 2.5 0 0 0 2.5 2.5V21Zm-1 5a1 1 0 0 1 1-1v-1.5a2.5 2.5 0 0 0-2.5 2.5H15Z"
-		fill="#6366F1"
-	/><path
-		opacity=".4"
-		d="M29.75 35.25h2.5a3 3 0 0 0 3-3v-20.5a3 3 0 0 0-3-3h-20.5a3 3 0 0 0-3 3v5.5M12.75 14.25h18.5"
-		stroke="#6366F1"
-		stroke-width="1.5"
-		stroke-linecap="round"
-		stroke-linejoin="round"
-	/></svg
-> -->
-
-<svg
-	t="1731984480564"
-	class="h-10 w-10"
-	viewBox="0 0 1114 1024"
-	version="1.1"
-	xmlns="http://www.w3.org/2000/svg"
-	p-id="29550"
-	width="200"
-	height="200"
-	><path
-		d="M1081.916235 788.781176H909.312v172.634353a24.696471 24.696471 0 0 1-49.332706 0V788.781176H687.314824a24.696471 24.696471 0 0 1 0-49.362823H859.949176V566.814118a24.696471 24.696471 0 0 1 49.332706 0v172.634353h172.664471a24.696471 24.696471 0 0 1 0 49.362823z"
-		fill="#0972E7"
-		p-id="29551"
-	/><path
-		d="M174.772706 143.028706h509.831529c43.550118 0 78.516706 35.689412 78.516706 80.173176v280.576c0 44.453647-34.966588 80.173176-78.516706 80.173177H174.772706c-43.550118 0-78.516706-35.719529-78.516706-80.173177V223.171765c0-43.851294 34.966588-80.173176 78.516706-80.173177z"
-		fill="#CAE4FF"
-		p-id="29552"
-	/><path
-		d="M335.600941 910.637176H104.899765c-24.545882 0-43.550118-20.028235-43.550118-45.086117V107.098353c0-25.057882 19.636706-45.086118 44.182588-45.086118h742.912c23.913412 0 44.182588 20.028235 44.182589 44.453647V282.503529c0 16.896 13.492706 31.322353 30.659764 31.322353a30.72 30.72 0 0 0 30.689883-31.322353V106.465882C953.976471 47.585882 906.721882 0 849.046588 0H104.899765C47.224471 0 0 48.218353 0 107.098353v758.452706c0 58.88 46.622118 107.098353 104.297412 107.098353h230.671059c16.564706 0 30.659765-13.793882 30.659764-31.322353a30.027294 30.027294 0 0 0-30.057411-30.689883z"
-		fill="#0972E7"
-		p-id="29553"
-	/><path
-		d="M709.180235 219.196235c0-16.896-13.492706-31.322353-30.659764-31.322353H171.760941c-16.564706 0-30.659765 13.793882-30.659765 31.322353 0 16.926118 13.492706 31.322353 30.659765 31.322353h506.75953a30.72 30.72 0 0 0 30.659764-31.322353zM171.760941 436.525176c-16.564706 0-30.659765 13.793882-30.659765 31.322353 0 16.896 13.492706 31.322353 30.659765 31.322353h344.786824c16.564706 0 30.689882-13.793882 30.689882-31.322353 0-16.926118-13.522824-31.322353-30.689882-31.322353H171.760941z"
-		fill="#0972E7"
-		p-id="29554"
-	/></svg
->
--- a/AgentQnA/ui/svelte/src/lib/assets/Agent/resource.svelte
+++ b/AgentQnA/ui/svelte/src/lib/assets/Agent/resource.svelte
@@ -1,8 +0,0 @@
-<!--
-  Copyright (C) 2025 Intel Corporation
-  SPDX-License-Identifier: Apache-2.0
-->
-
-<svg class="h-5 w-5 flex-shrink-0 text-[#1d4dd5]" viewBox="0 0 20 20" fill="currentColor" aria-hidden="true" data-slot="icon">
-  <path fill-rule="evenodd" d="M15.621 4.379a3 3 0 0 0-4.242 0l-7 7a3 3 0 0 0 4.241 4.243h.001l.497-.5a.75.75 0 0 1 1.064 1.057l-.498.501-.002.002a4.5 4.5 0 0 1-6.364-6.364l7-7a4.5 4.5 0 0 1 6.368 6.36l-3.455 3.553A2.625 2.625 0 1 1 9.52 9.52l3.45-3.451a.75.75 0 1 1 1.061 1.06l-3.45 3.451a1.125 1.125 0 0 0 1.587 1.595l3.454-3.553a3 3 0 0 0 0-4.242Z" clip-rule="evenodd"></path>
-</svg>
--- a/AgentQnA/ui/svelte/src/lib/assets/Agent/search.svelte
+++ b/AgentQnA/ui/svelte/src/lib/assets/Agent/search.svelte
@@ -1,13 +0,0 @@
-<!--
-  Copyright (C) 2025 Intel Corporation
-  SPDX-License-Identifier: Apache-2.0
-->
-
-<svg
-			class="pointer-events-none absolute left-0 ml-4 hidden h-4 w-4 fill-current text-gray-500 group-hover:text-gray-400 sm:block"
-			xmlns="http://www.w3.org/2000/svg"
-			viewBox="0 0 20 20"
-			><path
-				d="M12.9 14.32a8 8 0 1 1 1.41-1.41l5.35 5.33-1.42 1.42-5.33-5.34zM8 14A6 6 0 1 0 8 2a6 6 0 0 0 0 12z"
-			/></svg
-		>
--- a/AgentQnA/ui/svelte/src/lib/assets/Agent/searchDelete.svelte
+++ b/AgentQnA/ui/svelte/src/lib/assets/Agent/searchDelete.svelte
@@ -1,17 +0,0 @@
-<!--
-  Copyright (C) 2025 Intel Corporation
-  SPDX-License-Identifier: Apache-2.0
-->
-
-<svg
-				fill="none"
-				class="relative h-5 w-5"
-				stroke-linecap="round"
-				stroke-linejoin="round"
-				stroke-width="2"
-				stroke="currentColor"
-				viewBox="0 0 24 24"
-				><path
-					d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z"
-				/></svg
-			>
--- a/Show More
+++ b/Show More