[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
add gateway to GenAIExamples.
2024-09-19 00:24:44 +00:00 · 2024-09-19 00:23:09 +00:00
656 changed files with 15190 additions and 49680 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,17 +1,13 @@
-/AgentQnA/ kaokao.lv@intel.com
+/AgentQnA/ xuhui.ren@intel.com
 /AudioQnA/ sihan.chen@intel.com
 /ChatQnA/ liang1.lv@intel.com
 /CodeGen/ liang1.lv@intel.com
 /CodeTrans/ sihan.chen@intel.com
-/DocSum/ letong.han@intel.com
-/DocIndexRetriever/ kaokao.lv@intel.com chendi.xue@intel.com
-/InstructionTuning xinyu.ye@intel.com
-/RerankFinetuning xinyu.ye@intel.com
-/MultimodalQnA tiep.le@intel.com
+/DocSum/ sihan.chen@intel.com
+/DocIndexRetriever/ xuhui.ren@intel.com chendi.xue@intel.com
 /FaqGen/ xinyao.wang@intel.com
-/SearchQnA/ sihan.chen@intel.com
+/SearchQnA/ letong.han@intel.com
 /Translation/ liang1.lv@intel.com
 /VisualQnA/ liang1.lv@intel.com
 /ProductivitySuite/ hoong.tee.yeoh@intel.com
-/VideoQnA huiling.bao@intel.com
 /*/ liang1.lv@intel.com
--- a/.github/code_spell_ignore.txt
+++ b/.github/code_spell_ignore.txt
@@ -1,2 +0,0 @@
-ModelIn
-modelin
--- a/.github/workflows/_example-workflow.yml
+++ b/.github/workflows/_example-workflow.yml
@@ -12,10 +12,6 @@ on:
      example:
        required: true
        type: string
-      services:
-        default: ""
-        required: false
-        type: string
      tag:
        default: "latest"
        required: false
@@ -40,11 +36,6 @@ on:
        default: "main"
        required: false
        type: string
-      inject_commit:
-        default: false
-        required: false
-        type: string
-
 jobs:
 ####################################################################################################
 # Image Build
@@ -55,34 +46,33 @@ jobs:
      - name: Clean Up Working Directory
        run: sudo rm -rf ${{github.workspace}}/*

-      - name: Get Checkout Ref
+      - name: Get checkout ref
        run: |
          if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
            echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
          else
            echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
          fi
+          echo "checkout ref ${{ env.CHECKOUT_REF }}"

-      - name: Checkout out GenAIExamples
+      - name: Checkout out Repo
        uses: actions/checkout@v4
        with:
          ref: ${{ env.CHECKOUT_REF }}
          fetch-depth: 0

-      - name: Clone Required Repo
+      - name: Clone required Repo
        run: |
          cd ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
          docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
+          if [[ $(grep -c "tei-gaudi:" ${docker_compose_path}) != 0 ]]; then
+              git clone https://github.com/huggingface/tei-gaudi.git
+          fi
          if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
              git clone https://github.com/vllm-project/vllm.git
-              cd vllm && git rev-parse HEAD && cd ../
-          fi
-          if [[ $(grep -c "vllm-hpu:" ${docker_compose_path}) != 0 ]]; then
-               git clone https://github.com/HabanaAI/vllm-fork.git
-               cd vllm-fork && git rev-parse HEAD && cd ../
          fi
          git clone https://github.com/opea-project/GenAIComps.git
-          cd GenAIComps && git checkout ${{ inputs.opea_branch }} && git rev-parse HEAD && cd ../
+          cd GenAIComps && git checkout ${{ inputs.opea_branch }} && cd ../

      - name: Build Image
        if: ${{ fromJSON(inputs.build) }}
@@ -90,9 +80,7 @@ jobs:
        with:
          work_dir: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
          docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
-          service_list: ${{ inputs.services }}
          registry: ${OPEA_IMAGE_REPO}opea
-          inject_commit: ${{ inputs.inject_commit }}
          tag: ${{ inputs.tag }}

 ####################################################################################################
@@ -120,6 +108,7 @@ jobs:
      example: ${{ inputs.example }}
      hardware: ${{ inputs.node }}
      tag: ${{ inputs.tag }}
+      context: "CD"
    secrets: inherit

 ####################################################################################################
--- a/.github/workflows/_manifest-e2e.yml
+++ b/.github/workflows/_manifest-e2e.yml
@@ -20,6 +20,11 @@ on:
        description: "Tag to apply to images, default is latest"
        required: false
        type: string
+      context:
+        default: "CI"
+        description: "CI or CD"
+        required: false
+        type: string

 jobs:
  manifest-test:
@@ -46,7 +51,7 @@ jobs:

      - name: Set variables
        run: |
-          echo "IMAGE_REPO=${OPEA_IMAGE_REPO}opea" >> $GITHUB_ENV
+          echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
          echo "IMAGE_TAG=${{ inputs.tag }}" >> $GITHUB_ENV
          lower_example=$(echo "${{ inputs.example }}" | tr '[:upper:]' '[:lower:]')
          echo "NAMESPACE=$lower_example-$(tr -dc a-z0-9 </dev/urandom | head -c 16)" >> $GITHUB_ENV
@@ -55,6 +60,7 @@ jobs:
          echo "continue_test=true" >> $GITHUB_ENV
          echo "should_cleanup=false" >> $GITHUB_ENV
          echo "skip_validate=true" >> $GITHUB_ENV
+          echo "CONTEXT=${{ inputs.context }}" >> $GITHUB_ENV
          echo "NAMESPACE=$NAMESPACE"

      - name: Kubectl install
@@ -90,16 +96,10 @@ jobs:
              echo "Validate ${{ inputs.example }} successful!"
            else
              echo "Validate ${{ inputs.example }} failure!!!"
-              echo "Check the logs in 'Dump logs when e2e test failed' step!!!"
-              exit 1
+              .github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE
            fi
          fi

-      - name: Dump logs when e2e test failed
-        if: failure()
-        run: |
-          .github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE
-
      - name: Kubectl uninstall
        if: always()
        run: |
--- a/.github/workflows/_run-docker-compose.yml
+++ b/.github/workflows/_run-docker-compose.yml
@@ -118,9 +118,6 @@ jobs:
          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
          PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
-          PINECONE_KEY_LANGCHAIN_TEST: ${{ secrets.PINECONE_KEY_LANGCHAIN_TEST }}
-          SDK_BASE_URL: ${{ secrets.SDK_BASE_URL }}
-          SERVING_TOKEN: ${{ secrets.SERVING_TOKEN }}
          IMAGE_REPO: ${{ inputs.registry }}
          IMAGE_TAG: ${{ inputs.tag }}
          example: ${{ inputs.example }}
@@ -141,11 +138,7 @@ jobs:
          flag=${flag#test_}
          yaml_file=$(find . -type f -wholename "*${{ inputs.hardware }}/${flag}.yaml")
          echo $yaml_file
-          container_list=$(cat $yaml_file | grep container_name | cut -d':' -f2)
-          for container_name in $container_list; do
-              cid=$(docker ps -aq --filter "name=$container_name")
-              if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
-          done
+          docker compose -f $yaml_file stop && docker compose -f $yaml_file rm -f || true
          docker system prune -f
          docker rmi $(docker images --filter reference="*:5000/*/*" -q) || true

--- a/.github/workflows/check-online-doc-build.yml
+++ b/.github/workflows/check-online-doc-build.yml
@@ -1,35 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-name: Check Online Document Building
-permissions: {}
-
-on:
-  pull_request:
-    branches: [main]
-    paths:
-      - "**.md"
-      - "**.rst"
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-
-    - name: Checkout
-      uses: actions/checkout@v4
-      with:
-        path: GenAIExamples
-
-    - name: Checkout docs
-      uses: actions/checkout@v4
-      with:
-        repository: opea-project/docs
-        path: docs
-
-    - name: Build Online Document
-      shell: bash
-      run: |
-        echo "build online doc"
-        cd docs
-        bash scripts/build.sh
--- a/.github/workflows/pr-dependency-review.yml
+++ b/.github/workflows/pr-dependency-review.yml
--- a/.github/workflows/manual-docker-publish.yml
+++ b/.github/workflows/manual-docker-publish.yml
@@ -11,23 +11,23 @@ on:
        required: true
        type: string
      examples:
-        default: ""
-        description: 'List of examples to publish [AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA]'
+        default: "Translation"
+        description: 'List of examples to publish [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
        required: false
        type: string
      images:
-        default: ""
-        description: 'List of images to publish [gmcmanager,gmcrouter]'
+        default: "gmcmanager,gmcrouter"
+        description: 'List of images to publish [gmcmanager,gmcrouter, ...]'
        required: false
        type: string
      tag:
-        default: "rc"
-        description: "Tag to publish, like [1.0rc]"
+        default: "v0.9"
+        description: "Tag to publish"
        required: true
        type: string
      publish_tags:
-        default: "latest,1.x"
-        description: "Tag list apply to publish images, like [latest,1.0]"
+        default: "latest,v0.9"
+        description: 'Tag list apply to publish images'
        required: false
        type: string

--- a/.github/workflows/manual-docker-scan.yml
+++ b/.github/workflows/manual-docker-scan.yml
@@ -11,13 +11,13 @@ on:
        required: true
        type: string
      examples:
-        default: ""
-        description: 'List of examples to publish "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"'
+        default: "ChatQnA"
+        description: 'List of examples to scan [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
        required: false
        type: string
      images:
-        default: ""
-        description: 'List of images to publish "gmcmanager,gmcrouter"'
+        default: "gmcmanager,gmcrouter"
+        description: 'List of images to scan [gmcmanager,gmcrouter, ...]'
        required: false
        type: string
      tag:
--- a/.github/workflows/manual-example-workflow.yml
+++ b/.github/workflows/manual-example-workflow.yml
@@ -50,11 +50,6 @@ on:
        description: 'OPEA branch for image build'
        required: false
        type: string
-      inject_commit:
-        default: true
-        description: "inject commit to docker images true or false"
-        required: false
-        type: string

 permissions: read-all
 jobs:
@@ -106,5 +101,4 @@ jobs:
      test_k8s: ${{ fromJSON(inputs.test_k8s) }}
      test_gmc: ${{ fromJSON(inputs.test_gmc) }}
      opea_branch: ${{ inputs.opea_branch }}
-      inject_commit: ${{ inputs.inject_commit }}
    secrets: inherit
--- a/.github/workflows/manual-image-build.yml
+++ b/.github/workflows/manual-image-build.yml
@@ -1,66 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-name: Build specific images on manual event
-on:
-  workflow_dispatch:
-    inputs:
-      nodes:
-        default: "gaudi,xeon"
-        description: "Hardware to run test"
-        required: true
-        type: string
-      example:
-        default: "ChatQnA"
-        description: 'Build images belong to which example?'
-        required: true
-        type: string
-      services:
-        default: "chatqna,chatqna-without-rerank"
-        description: 'Service list to build'
-        required: true
-        type: string
-      tag:
-        default: "latest"
-        description: "Tag to apply to images"
-        required: true
-        type: string
-      opea_branch:
-        default: "main"
-        description: 'OPEA branch for image build'
-        required: false
-        type: string
-      inject_commit:
-        default: true
-        description: "inject commit to docker images true or false"
-        required: false
-        type: string
-
-jobs:
-  get-test-matrix:
-    runs-on: ubuntu-latest
-    outputs:
-      nodes: ${{ steps.get-matrix.outputs.nodes }}
-    steps:
-      - name: Create Matrix
-        id: get-matrix
-        run: |
-          nodes=($(echo ${{ inputs.nodes }} | tr ',' ' '))
-          nodes_json=$(printf '%s\n' "${nodes[@]}" | sort -u | jq -R '.' | jq -sc '.')
-          echo "nodes=$nodes_json" >> $GITHUB_OUTPUT
-
-  image-build:
-    needs: get-test-matrix
-    strategy:
-      matrix:
-        node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
-      fail-fast: false
-    uses: ./.github/workflows/_example-workflow.yml
-    with:
-      node: ${{ matrix.node }}
-      example: ${{ inputs.example }}
-      services: ${{ inputs.services }}
-      tag: ${{ inputs.tag }}
-      opea_branch: ${{ inputs.opea_branch }}
-      inject_commit: ${{ inputs.inject_commit }}
-    secrets: inherit
--- a/.github/workflows/manual-trellix.yml
+++ b/.github/workflows/manual-trellix.yml
--- a/.github/workflows/nightly-docker-build-publish.yml
+++ b/.github/workflows/nightly-docker-build-publish.yml
@@ -1,70 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-name: Nightly build/publish latest docker images
-
-on:
-  schedule:
-    - cron: "30 13 * * *" # UTC time
-  workflow_dispatch:
-
-env:
-  EXAMPLES: "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"
-  TAG: "latest"
-  PUBLISH_TAGS: "latest"
-
-jobs:
-  get-build-matrix:
-    runs-on: ubuntu-latest
-    outputs:
-      examples_json: ${{ steps.get-matrix.outputs.examples_json }}
-      EXAMPLES: ${{ steps.get-matrix.outputs.EXAMPLES }}
-      TAG: ${{ steps.get-matrix.outputs.TAG }}
-      PUBLISH_TAGS: ${{ steps.get-matrix.outputs.PUBLISH_TAGS }}
-    steps:
-      - name: Create Matrix
-        id: get-matrix
-        run: |
-          examples=($(echo ${EXAMPLES} | tr ',' ' '))
-          examples_json=$(printf '%s\n' "${examples[@]}" | sort -u | jq -R '.' | jq -sc '.')
-          echo "examples_json=$examples_json" >> $GITHUB_OUTPUT
-          echo "EXAMPLES=$EXAMPLES" >> $GITHUB_OUTPUT
-          echo "TAG=$TAG" >> $GITHUB_OUTPUT
-          echo "PUBLISH_TAGS=$PUBLISH_TAGS" >> $GITHUB_OUTPUT
-
-  build:
-    needs: get-build-matrix
-    strategy:
-      matrix:
-        example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
-      fail-fast: false
-    uses: ./.github/workflows/_example-workflow.yml
-    with:
-      node: gaudi
-      example: ${{ matrix.example }}
-    secrets: inherit
-
-  get-image-list:
-    needs: get-build-matrix
-    uses: ./.github/workflows/_get-image-list.yml
-    with:
-      examples: ${{ needs.get-build-matrix.outputs.EXAMPLES }}
-
-  publish:
-    needs: [get-build-matrix, get-image-list, build]
-    strategy:
-      matrix:
-        image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
-    runs-on: "docker-build-gaudi"
-    steps:
-      - uses: docker/login-action@v3.2.0
-        with:
-          username: ${{ secrets.DOCKERHUB_USER }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
-
-      - name: Image Publish
-        uses: opea-project/validation/actions/image-publish@main
-        with:
-          local_image_ref: ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ needs.get-build-matrix.outputs.TAG }}
-          image_name: opea/${{ matrix.image }}
-          publish_tags: ${{ needs.get-build-matrix.outputs.PUBLISH_TAGS }}
--- a/.github/workflows/pr-bum_list_check.yml
+++ b/.github/workflows/pr-bum_list_check.yml
@@ -0,0 +1,50 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Check Requirements
+
+on: [pull_request]
+
+jobs:
+  check-requirements:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout PR branch
+        uses: actions/checkout@v4
+
+      - name: Save PR requirements
+        run: |
+          find . -name "requirements.txt" -exec cat {} \; | \
+              grep -v '^\s*#' | \
+              grep -v '^\s*$' | \
+              grep -v '^\s*-' | \
+              sed 's/^\s*//' | \
+              awk -F'[>=<]' '{print $1}' | \
+              sort -u > pr-requirements.txt
+          cat pr-requirements.txt
+
+      - name: Checkout main branch
+        uses: actions/checkout@v4
+        with:
+          ref: main
+          path: main-branch
+
+      - name: Save main branch requirements
+        run: |
+          find ./main-branch -name "requirements.txt" -exec cat {} \; | \
+              grep -v '^\s*#' | \
+              grep -v '^\s*$' | \
+              grep -v '^\s*-' | \
+              sed 's/^\s*//' | \
+              awk -F'[>=<]' '{print $1}' | \
+              sort -u > main-requirements.txt
+          cat main-requirements.txt
+
+      - name: Compare requirements
+        run: |
+          comm -23 pr-requirements.txt main-requirements.txt > added-packages.txt
+          if [ -s added-packages.txt ]; then
+            echo "New packages found in PR:" && cat added-packages.txt
+          else
+            echo "No new packages found😊."
+          fi
--- a/.github/workflows/pr-gmc-e2e.yaml
+++ b/.github/workflows/pr-gmc-e2e.yaml
@@ -12,7 +12,7 @@ on:
      - "**/tests/test_gmc**"
      - "!**.md"
      - "!**.txt"
-      - "!**/kubernetes/**/manifest/**"
+      - "!**/kubernetes/**/manifests/**"

 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
--- a/.github/workflows/pr-manifest-e2e.yml
+++ b/.github/workflows/pr-manifest-e2e.yml
@@ -8,9 +8,7 @@ on:
    branches: ["main", "*rc"]
    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
    paths:
-      - "**/Dockerfile**"
-      - "**.py"
-      - "**/kubernetes/**/manifest/**"
+      - "**/kubernetes/**/manifests/**"
      - "**/tests/test_manifest**"
      - "!**.md"
      - "!**.txt"
--- a/.github/workflows/pr-manifest-validate.yml
+++ b/.github/workflows/pr-manifest-validate.yml
@@ -0,0 +1,54 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Manifests Validate
+
+on:
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
+    paths:
+      - "**/kubernetes/manifests/**"
+      - .github/workflows/manifest-validate.yml
+  workflow_dispatch:
+
+# If there is a new commit, the previous jobs will be canceled
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+env:
+  MANIFEST_DIR: "manifests"
+
+jobs:
+  manifests-validate:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: changed files
+        id: changed_files
+        run: |
+          set -xe
+          changed_folder=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} | \
+          grep "kubernetes/manifests" | grep -vE '.github|README.md|*.txt|*.sh' | cut -d'/' -f1 | sort -u )
+          echo "changed_folder: $changed_folder"
+          if [ -z "$changed_folder" ]; then
+              echo "No changes in manifests folder"
+              echo "SKIP=true" >> $GITHUB_OUTPUT
+              exit 0
+          fi
+          echo "SKIP=false" >> $GITHUB_OUTPUT
+          for folder in $changed_folder; do
+              folder_str="$folder_str $folder/kubernetes/manifests/"
+          done
+          echo "folder_str=$folder_str"
+          echo "folder_str=$folder_str" >> $GITHUB_ENV
+
+      - uses: docker://ghcr.io/yannh/kubeconform:latest
+        if: steps.changed_files.outputs.SKIP == 'false'
+        with:
+          args: "-summary -output json ${{env.folder_str}}"
--- a/.github/workflows/pr-path-detection.yml
+++ b/.github/workflows/pr-path-detection.yml
@@ -50,40 +50,28 @@ jobs:

      - name: Checkout Repo GenAIExamples
        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0

      - name: Check the Validity of Hyperlinks
        run: |
          cd ${{github.workspace}}
          fail="FALSE"
-          merged_commit=$(git log -1 --format='%H')
-          changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
-          if  [ -n "$changed_files" ]; then
-            for changed_file in $changed_files; do
-              # echo $changed_file
-              url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIExamples/blob/main') || true
-              if [ -n "$url_lines" ]; then
-                for url_line in $url_lines; do
-                  # echo $url_line
-                  url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
-                  path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
-                  response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")|| true
-                  if [ "$response" -ne 200 ]; then
-                    echo "**********Validation failed, try again**********"
-                    response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
-                    if [ "$response_retry" -eq 200 ]; then
-                      echo "*****Retry successfully*****"
-                    else
-                      echo "Invalid link from ${{github.workspace}}/$path: $url"
-                      fail="TRUE"
-                    fi
-                  fi
-                done
+          url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .)
+          if [ -n "$url_lines" ]; then
+            for url_line in $url_lines; do
+              url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
+              path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
+              response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
+              if [ "$response" -ne 200 ]; then
+                echo "**********Validation failed, try again**********"
+                response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
+                if [ "$response_retry" -eq 200 ]; then
+                  echo "*****Retry successfully*****"
+                else
+                  echo "Invalid link from ${{github.workspace}}/$path: $url"
+                  fail="TRUE"
+                fi
              fi
            done
-          else
-            echo "No changed .md file."
          fi

          if [[ "$fail" == "TRUE" ]]; then
@@ -101,8 +89,6 @@ jobs:

      - name: Checkout Repo GenAIExamples
        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0

      - name: Checking Relative Path Validity
        run: |
@@ -116,34 +102,33 @@ jobs:
            branch="https://github.com/opea-project/GenAIExamples/blob/${{ github.event.pull_request.head.ref }}"
          fi
          link_head="https://github.com/opea-project/GenAIExamples/blob/main"
-
-          merged_commit=$(git log -1 --format='%H')
-          changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
          png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http')
          if [ -n "$png_lines" ]; then
            for png_line in $png_lines; do
              refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-)
              png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1)
-
              if [[ "${png_path:0:1}" == "/" ]]; then
-                check_path=$png_path
-              elif [[ "$png_path" == *#* ]]; then
-                relative_path=$(echo "$png_path" | cut -d '#' -f1)
-                if [ -n "$relative_path" ]; then
-                  check_path=$(dirname "$refer_path")/$relative_path
-                  png_path=$(echo "$png_path" | awk -F'#' '{print "#" $2}')
-                else
-                  check_path=$refer_path
-                fi
+                check_path=${{github.workspace}}$png_path
+              elif [[ "${png_path:0:1}" == "#" ]]; then
+                check_path=${{github.workspace}}/$refer_path$png_path
              else
-                check_path=$(dirname "$refer_path")/$png_path
+                check_path=${{github.workspace}}/$(dirname "$refer_path")/$png_path
              fi
-
-              if [ -e "$check_path" ]; then
-                real_path=$(realpath $check_path)
-                if [[ "$png_line" == *#* ]]; then
-                  if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then
-                    url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIExamples||')$png_path
+              real_path=$(realpath $check_path)
+              if [ $? -ne 0 ]; then
+                echo "Path $png_path in file ${{github.workspace}}/$refer_path does not exist"
+                fail="TRUE"
+              else
+                url=$link_head$(echo "$real_path" | sed 's|.*/GenAIExamples||')
+                response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url")
+                if [ "$response" -ne 200 ]; then
+                  echo "**********Validation failed, try again**********"
+                  response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
+                  if [ "$response_retry" -eq 200 ]; then
+                    echo "*****Retry successfully*****"
+                  else
+                    echo "Retry failed. Check branch ${{ github.event.pull_request.head.ref }}"
+                    url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIExamples||')
                    response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev")
                    if [ "$response" -ne 200 ]; then
                      echo "**********Validation failed, try again**********"
@@ -151,17 +136,14 @@ jobs:
                      if [ "$response_retry" -eq 200 ]; then
                        echo "*****Retry successfully*****"
                      else
-                        echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path"
+                        echo "Invalid link from $real_path: $url_dev"
                        fail="TRUE"
                      fi
                    else
-                      echo "Validation succeed $png_line"
+                      echo "Check branch ${{ github.event.pull_request.head.ref }} successfully."
                    fi
                  fi
                fi
-              else
-                echo "${{github.workspace}}/$refer_path:$png_path does not exist"
-                fail="TRUE"
              fi
            done
          fi
--- a/.github/workflows/push-image-build.yml
+++ b/.github/workflows/push-image-build.yml
@@ -9,6 +9,7 @@ on:
    paths:
      - "**.py"
      - "**Dockerfile"
+  workflow_dispatch:

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}-on-push
@@ -23,10 +24,12 @@ jobs:
  image-build:
    needs: job1
    strategy:
-      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
+      matrix:
+        example: ${{ fromJSON(needs.job1.outputs.run_matrix).include.*.example }}
+        node: ["gaudi","xeon"]
      fail-fast: false
    uses: ./.github/workflows/_example-workflow.yml
    with:
-      node: ${{ matrix.hardware }}
+      node: ${{ matrix.node }}
      example: ${{ matrix.example }}
    secrets: inherit
--- a/.github/workflows/scripts/get_test_matrix.sh
+++ b/.github/workflows/scripts/get_test_matrix.sh
@@ -9,15 +9,12 @@ set -e
 changed_files=$changed_files
 test_mode=$test_mode
 run_matrix="{\"include\":["
+hardware_list="xeon gaudi" # current support hardware list

 examples=$(printf '%s\n' "${changed_files[@]}" | grep '/' | cut -d'/' -f1 | sort -u)
 for example in ${examples}; do
    cd $WORKSPACE/$example
    if [[ ! $(find . -type f | grep ${test_mode}) ]]; then continue; fi
-    cd tests
-    ls -l
-    hardware_list=$(find . -type f -name "test_compose*_on_*.sh" | cut -d/ -f2 | cut -d. -f1 | awk -F'_on_' '{print $2}'| sort -u)
-    echo "Test supported hardware list = ${hardware_list}"

    run_hardware=""
    if [[ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | cut -d'/' -f2 | grep -E '*.py|Dockerfile*|ui|docker_image_build' ) ]]; then
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,6 +18,8 @@ repos:
              SearchQnA/ui/svelte/tsconfig.json|
              DocSum/ui/svelte/tsconfig.json
          )$
+      - id: check-yaml
+        args: [--allow-multiple-documents]
      - id: debug-statements
      - id: requirements-txt-fixer
      - id: trailing-whitespace
@@ -79,7 +81,7 @@ repos:
      - id: isort

  - repo: https://github.com/PyCQA/docformatter
-    rev: 06907d0
+    rev: v1.7.5
    hooks:
      - id: docformatter
        args: [
--- a/AgentQnA/README.md
+++ b/AgentQnA/README.md
@@ -5,73 +5,6 @@
 This example showcases a hierarchical multi-agent system for question-answering applications. The architecture diagram is shown below. The supervisor agent interfaces with the user and dispatch tasks to the worker agent and other tools to gather information and come up with answers. The worker agent uses the retrieval tool to generate answers to the queries posted by the supervisor agent. Other tools used by the supervisor agent may include APIs to interface knowledge graphs, SQL databases, external knowledge bases, etc.
 ![Architecture Overview](assets/agent_qna_arch.png)

-The AgentQnA example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
-
-```mermaid
---
-config:
-  flowchart:
-    nodeSpacing: 400
-    rankSpacing: 100
-    curve: linear
-  themeVariables:
-    fontSize: 50px
---
-flowchart LR
-    %% Colors %%
-    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
-    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
-    classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
-    classDef invisible fill:transparent,stroke:transparent;
-
-    %% Subgraphs %%
-    subgraph DocIndexRetriever-MegaService["DocIndexRetriever MegaService "]
-        direction LR
-        EM([Embedding MicroService]):::blue
-        RET([Retrieval MicroService]):::blue
-        RER([Rerank MicroService]):::blue
-    end
-    subgraph UserInput[" User Input "]
-        direction LR
-        a([User Input Query]):::orchid
-        Ingest([Ingest data]):::orchid
-    end
-    AG_REACT([Agent MicroService - react]):::blue
-    AG_RAG([Agent MicroService - rag]):::blue
-    LLM_gen{{LLM Service <br>}}
-    DP([Data Preparation MicroService]):::blue
-    TEI_RER{{Reranking service<br>}}
-    TEI_EM{{Embedding service <br>}}
-    VDB{{Vector DB<br><br>}}
-    R_RET{{Retriever service <br>}}
-
-
-
-    %% Questions interaction
-    direction LR
-    a[User Input Query] --> AG_REACT
-    AG_REACT --> AG_RAG
-    AG_RAG --> DocIndexRetriever-MegaService
-    EM ==> RET
-    RET ==> RER
-    Ingest[Ingest data] --> DP
-
-    %% Embedding service flow
-    direction LR
-    AG_RAG <-.-> LLM_gen
-    AG_REACT <-.-> LLM_gen
-    EM <-.-> TEI_EM
-    RET <-.-> R_RET
-    RER <-.-> TEI_RER
-
-    direction TB
-    %% Vector DB interaction
-    R_RET <-.-> VDB
-    DP <-.-> VDB
-
-
-```
-
 ### Why Agent for question answering?

 1. Improve relevancy of retrieved context.
@@ -81,13 +14,17 @@ flowchart LR
 3. Hierarchical agent can further improve performance.
   Expert worker agents, such as retrieval agent, knowledge graph agent, SQL agent, etc., can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information together to provide a comprehensive answer.

-## Deployment with docker
+### Roadmap

-1. Build agent docker image
+- v0.9: Worker agent uses open-source websearch tool (duckduckgo), agents use OpenAI GPT-4o-mini as llm backend.
+- v1.0: Worker agent uses OPEA retrieval megaservice as tool.
+- v1.0 or later: agents use open-source llm backend.
+- v1.1 or later: add safeguards

-   Note: this is optional. The docker images will be automatically pulled when running the docker compose commands. This step is only needed if pulling images failed.
+## Getting started

-   First, clone the opea GenAIComps repo.
+1. Build agent docker image </br>
+   First, clone the opea GenAIComps repo

   ```
   export WORKDIR=<your-work-directory>
@@ -102,63 +39,35 @@ flowchart LR
   docker build -t opea/agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/Dockerfile .
   ```

-2. Set up environment for this example </br>
-   First, clone this repo.
-
-   ```
-   cd $WORKDIR
-   git clone https://github.com/opea-project/GenAIExamples.git
-   ```
-
-   Second, set up env vars.
-
-   ```
-   export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
-   # for using open-source llms
-   export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
-   export HF_CACHE_DIR=<directory-where-llms-are-downloaded> #so that no need to redownload every time
-
-   # optional: OPANAI_API_KEY if you want to use OpenAI models
-   export OPENAI_API_KEY=<your-openai-key>
-   ```
-
-3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
-
-   First, launch the mega-service.
-
-   ```
-   cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
-   bash launch_retrieval_tool.sh
-   ```
-
-   Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
-
-   ```
-   bash run_ingest_data.sh
-   ```
-
-4. Launch other tools. </br>
+2. Launch tool services </br>
   In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.

   ```
   docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
   ```

-5. Launch agent services</br>
-   We provide two options for `llm_engine` of the agents: 1. open-source LLMs, 2. OpenAI models via API calls.
-
-   To use open-source LLMs on Gaudi2, run commands below.
+3. Set up environment for this example </br>
+   First, clone this repo

   ```
-   cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
-   bash launch_tgi_gaudi.sh
-   bash launch_agent_service_tgi_gaudi.sh
+   cd $WORKDIR
+   git clone https://github.com/opea-project/GenAIExamples.git
   ```

-   To use OpenAI models, run commands below.
+   Second, set up env vars

   ```
-   cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
+   export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
+   # optional: OPANAI_API_KEY
+   export OPENAI_API_KEY=<your-openai-key>
+   ```
+
+4. Launch agent services</br>
+   The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and we plan to add support for llama3.1-70B-instruct (served by TGI-Gaudi) in a subsequent release.
+   To use openai llm, run command below.
+
+   ```
+   cd docker_compose/intel/cpu/xeon
   bash launch_agent_service_openai.sh
   ```

@@ -167,12 +76,10 @@ flowchart LR
 First look at logs of the agent docker containers:

 ```
-# worker agent
-docker logs rag-agent-endpoint
+docker logs docgrader-agent-endpoint
 ```

 ```
-# supervisor agent
 docker logs react-agent-endpoint
 ```

@@ -196,4 +103,4 @@ curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: app

 ## How to register your own tools with agent

-You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md).
+You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain#5-customize-agent-strategy).
--- a/AgentQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/README.md
@@ -1,3 +0,0 @@
-# Deployment on Xeon
-
-We deploy the retrieval tool on Xeon. For LLMs, we support OpenAI models via API calls. For instructions on using open-source LLMs, please refer to the deployment guide [here](../../../../README.md).
--- a/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
@@ -2,10 +2,11 @@
 # SPDX-License-Identifier: Apache-2.0

 services:
-  worker-rag-agent:
+  worker-docgrader-agent:
    image: opea/agent-langchain:latest
-    container_name: rag-agent-endpoint
+    container_name: docgrader-agent-endpoint
    volumes:
+      - ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
      - ${TOOLSET_PATH}:/home/user/tools/
    ports:
      - "9095:9095"
@@ -35,9 +36,8 @@ services:
  supervisor-react-agent:
    image: opea/agent-langchain:latest
    container_name: react-agent-endpoint
-    depends_on:
-      - worker-rag-agent
    volumes:
+      - ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
      - ${TOOLSET_PATH}:/home/user/tools/
    ports:
      - "9090:9090"
--- a/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh
@@ -7,7 +7,7 @@ export recursion_limit_worker=12
 export recursion_limit_supervisor=10
 export model="gpt-4o-mini-2024-07-18"
 export temperature=0
-export max_new_tokens=4096
+export max_new_tokens=512
 export OPENAI_API_KEY=${OPENAI_API_KEY}
 export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
 export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -2,9 +2,33 @@
 # SPDX-License-Identifier: Apache-2.0

 services:
-  worker-rag-agent:
+  tgi-server:
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+    container_name: tgi-server
+    ports:
+      - "8085:80"
+    volumes:
+      - ${HF_CACHE_DIR}:/data
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      PT_HPU_ENABLE_LAZY_COLLECTIVES: true
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --sharded true --num-shard ${NUM_SHARDS}
+  worker-docgrader-agent:
    image: opea/agent-langchain:latest
-    container_name: rag-agent-endpoint
+    container_name: docgrader-agent-endpoint
+    depends_on:
+      - tgi-server
    volumes:
      # - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
      - ${TOOLSET_PATH}:/home/user/tools/
@@ -13,7 +37,7 @@ services:
    ipc: host
    environment:
      ip_address: ${ip_address}
-      strategy: rag_agent_llama
+      strategy: rag_agent
      recursion_limit: ${recursion_limit_worker}
      llm_engine: tgi
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -38,7 +62,8 @@ services:
    image: opea/agent-langchain:latest
    container_name: react-agent-endpoint
    depends_on:
-      - worker-rag-agent
+      - tgi-server
+      - worker-docgrader-agent
    volumes:
      # - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
      - ${TOOLSET_PATH}:/home/user/tools/
@@ -47,7 +72,7 @@ services:
    ipc: host
    environment:
      ip_address: ${ip_address}
-      strategy: react_llama
+      strategy: react_langgraph
      recursion_limit: ${recursion_limit_supervisor}
      llm_engine: tgi
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_tgi_gaudi.sh
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_tgi_gaudi.sh
@@ -15,7 +15,7 @@ export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
 export NUM_SHARDS=4
 export LLM_ENDPOINT_URL="http://${ip_address}:8085"
 export temperature=0.01
-export max_new_tokens=4096
+export max_new_tokens=512

 # agent related environment variables
 export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
@@ -27,3 +27,17 @@ export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
 export CRAG_SERVER=http://${ip_address}:8080

 docker compose -f compose.yaml up -d
+
+sleep 5s
+echo "Waiting tgi gaudi ready"
+n=0
+until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
+    docker logs tgi-server &> tgi-gaudi-service.log
+    n=$((n+1))
+    if grep -q Connected tgi-gaudi-service.log; then
+        break
+    fi
+    sleep 5s
+done
+sleep 5s
+echo "Service started successfully"
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/launch_tgi_gaudi.sh
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/launch_tgi_gaudi.sh
@@ -1,25 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-# LLM related environment variables
-export HF_CACHE_DIR=${HF_CACHE_DIR}
-ls $HF_CACHE_DIR
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
-export NUM_SHARDS=4
-
-docker compose -f tgi_gaudi.yaml up -d
-
-sleep 5s
-echo "Waiting tgi gaudi ready"
-n=0
-until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
-    docker logs tgi-server &> tgi-gaudi-service.log
-    n=$((n+1))
-    if grep -q Connected tgi-gaudi-service.log; then
-        break
-    fi
-    sleep 5s
-done
-sleep 5s
-echo "Service started successfully"
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml
@@ -1,30 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-services:
-  tgi-server:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
-    container_name: tgi-server
-    ports:
-      - "8085:80"
-    volumes:
-      - ${HF_CACHE_DIR}:/data
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      PT_HPU_ENABLE_LAZY_COLLECTIVES: true
-      ENABLE_HPU_GRAPH: true
-      LIMIT_HPU_GRAPH: true
-      USE_FLASH_ATTENTION: true
-      FLASH_ATTENTION_RECOMPUTE: true
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
-    command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --sharded true --num-shard ${NUM_SHARDS}
--- a/AgentQnA/tests/step1_build_images.sh
+++ b/AgentQnA/tests/step1_build_images.sh
--- a/AgentQnA/tests/step2_start_retrieval_tool.sh
+++ b/AgentQnA/tests/step2_start_retrieval_tool.sh
--- a/AgentQnA/tests/step3_ingest_data_and_validate_retrieval.sh
+++ b/AgentQnA/tests/step3_ingest_data_and_validate_retrieval.sh
--- a/AgentQnA/tests/step4_launch_and_validate_agent_openai.sh
+++ b/AgentQnA/tests/step4_launch_and_validate_agent_openai.sh
--- a/AgentQnA/tests/step4_launch_and_validate_agent_tgi.sh
+++ b/AgentQnA/tests/step4_launch_and_validate_agent_tgi.sh
@@ -17,12 +17,6 @@ if [ ! -d "$HF_CACHE_DIR" ]; then
 fi
 ls $HF_CACHE_DIR

-function start_tgi(){
-    echo "Starting tgi-gaudi server"
-    cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
-    bash launch_tgi_gaudi.sh
-
-}

 function start_agent_and_api_server() {
    echo "Starting CRAG server"
@@ -31,7 +25,6 @@ function start_agent_and_api_server() {
    echo "Starting Agent services"
    cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
    bash launch_agent_service_tgi_gaudi.sh
-    sleep 10
 }

 function validate() {
@@ -50,22 +43,18 @@ function validate() {

 function validate_agent_service() {
    echo "----------------Test agent ----------------"
-    # local CONTENT=$(http_proxy="" curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-    #  "query": "Tell me about Michael Jackson song thriller"
-    # }')
-    export agent_port="9095"
-    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py)
-    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint")
-    docker logs rag-agent-endpoint
+    local CONTENT=$(http_proxy="" curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+     "query": "Tell me about Michael Jackson song thriller"
+    }')
+    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
+    docker logs docgrader-agent-endpoint
    if [ "$EXIT_CODE" == "1" ]; then
        exit 1
    fi

-    # local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-    #  "query": "Tell me about Michael Jackson song thriller"
-    # }')
-    export agent_port="9090"
-    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py)
+    local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+     "query": "Tell me about Michael Jackson song thriller"
+    }')
    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
    docker logs react-agent-endpoint
    if [ "$EXIT_CODE" == "1" ]; then
@@ -75,10 +64,6 @@ function validate_agent_service() {
 }

 function main() {
-    echo "==================== Start TGI ===================="
-    start_tgi
-    echo "==================== TGI started ===================="
-
    echo "==================== Start agent ===================="
    start_agent_and_api_server
    echo "==================== Agent started ===================="
--- a/AgentQnA/tests/test.py
+++ b/AgentQnA/tests/test.py
@@ -1,25 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import os
-
-import requests
-
-
-def generate_answer_agent_api(url, prompt):
-    proxies = {"http": ""}
-    payload = {
-        "query": prompt,
-    }
-    response = requests.post(url, json=payload, proxies=proxies)
-    answer = response.json()["text"]
-    return answer
-
-
-if __name__ == "__main__":
-    ip_address = os.getenv("ip_address", "localhost")
-    agent_port = os.getenv("agent_port", "9095")
-    url = f"http://{ip_address}:{agent_port}/v1/chat/completions"
-    prompt = "Tell me about Michael Jackson song thriller"
-    answer = generate_answer_agent_api(url, prompt)
-    print(answer)
--- a/AgentQnA/tests/test_compose_on_gaudi.sh
+++ b/AgentQnA/tests/test_compose_on_gaudi.sh
@@ -19,6 +19,7 @@ function stop_crag() {

 function stop_agent_docker() {
    cd $WORKPATH/docker_compose/intel/hpu/gaudi/
+    # docker compose -f compose.yaml down
    container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
    for container_name in $container_list; do
        cid=$(docker ps -aq --filter "name=$container_name")
@@ -27,21 +28,11 @@ function stop_agent_docker() {
    done
 }

-function stop_tgi(){
-    cd $WORKPATH/docker_compose/intel/hpu/gaudi/
-    container_list=$(cat tgi_gaudi.yaml | grep container_name | cut -d':' -f2)
-    for container_name in $container_list; do
-        cid=$(docker ps -aq --filter "name=$container_name")
-        echo "Stopping container $container_name"
-        if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
-    done
-
-}
-
 function stop_retrieval_tool() {
    echo "Stopping Retrieval tool"
    local RETRIEVAL_TOOL_PATH=$WORKPATH/../DocIndexRetriever
    cd $RETRIEVAL_TOOL_PATH/docker_compose/intel/cpu/xeon/
+    # docker compose -f compose.yaml down
    container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
    for container_name in $container_list; do
        cid=$(docker ps -aq --filter "name=$container_name")
@@ -52,26 +43,25 @@ function stop_retrieval_tool() {
 echo "workpath: $WORKPATH"
 echo "=================== Stop containers ===================="
 stop_crag
-stop_tgi
 stop_agent_docker
 stop_retrieval_tool

 cd $WORKPATH/tests

 echo "=================== #1 Building docker images===================="
-bash step1_build_images.sh
+bash 1_build_images.sh
 echo "=================== #1 Building docker images completed===================="

 echo "=================== #2 Start retrieval tool===================="
-bash step2_start_retrieval_tool.sh
+bash 2_start_retrieval_tool.sh
 echo "=================== #2 Retrieval tool started===================="

 echo "=================== #3 Ingest data and validate retrieval===================="
-bash step3_ingest_data_and_validate_retrieval.sh
+bash 3_ingest_data_and_validate_retrieval.sh
 echo "=================== #3 Data ingestion and validation completed===================="

 echo "=================== #4 Start agent and API server===================="
-bash step4_launch_and_validate_agent_tgi.sh
+bash 4_launch_and_validate_agent_tgi.sh
 echo "=================== #4 Agent test passed ===================="

 echo "=================== #5 Stop agent and API server===================="
@@ -80,6 +70,4 @@ stop_agent_docker
 stop_retrieval_tool
 echo "=================== #5 Agent and API server stopped===================="

-echo y | docker system prune
-
 echo "ALL DONE!"
--- a/AgentQnA/tools/supervisor_agent_tools.yaml
+++ b/AgentQnA/tools/supervisor_agent_tools.yaml
@@ -25,7 +25,7 @@ get_billboard_rank_date:
  args_schema:
    rank:
      type: int
-      description: the rank of interest, for example 1 for top 1
+      description: song name
    date:
      type: str
      description: date
--- a/AgentQnA/tools/worker_agent_tools.py
+++ b/AgentQnA/tools/worker_agent_tools.py
@@ -12,31 +12,16 @@ def search_knowledge_base(query: str) -> str:
    print(url)
    proxies = {"http": ""}
    payload = {
-        "messages": query,
+        "text": query,
    }
    response = requests.post(url, json=payload, proxies=proxies)
    print(response)
-    if "documents" in response.json():
-        docs = response.json()["documents"]
-        context = ""
-        for i, doc in enumerate(docs):
-            if i == 0:
-                context = doc
-            else:
-                context += "\n" + doc
-        # print(context)
-        return context
-    elif "text" in response.json():
-        return response.json()["text"]
-    elif "reranked_docs" in response.json():
-        docs = response.json()["reranked_docs"]
-        context = ""
-        for i, doc in enumerate(docs):
-            if i == 0:
-                context = doc["text"]
-            else:
-                context += "\n" + doc["text"]
-        # print(context)
-        return context
-    else:
-        return "Error parsing response from the knowledge base."
+    docs = response.json()["documents"]
+    context = ""
+    for i, doc in enumerate(docs):
+        if i == 0:
+            context = doc
+        else:
+            context += "\n" + doc
+    print(context)
+    return context
--- a/AudioQnA/Dockerfile.multilang
+++ b/AudioQnA/Dockerfile.multilang
@@ -1,32 +0,0 @@
-
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-FROM python:3.11-slim
-
-RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
-    libgl1-mesa-glx \
-    libjemalloc-dev \
-    git
-
-RUN useradd -m -s /bin/bash user && \
-    mkdir -p /home/user && \
-    chown -R user /home/user/
-
-WORKDIR /home/user/
-RUN git clone https://github.com/opea-project/GenAIComps.git
-
-WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
-
-COPY ./audioqna_multilang.py /home/user/audioqna_multilang.py
-
-ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
-
-USER user
-
-WORKDIR /home/user
-
-ENTRYPOINT ["python", "audioqna_multilang.py"]
--- a/AudioQnA/README.md
+++ b/AudioQnA/README.md
@@ -2,63 +2,6 @@

 AudioQnA is an example that demonstrates the integration of Generative AI (GenAI) models for performing question-answering (QnA) on audio files, with the added functionality of Text-to-Speech (TTS) for generating spoken responses. The example showcases how to convert audio input to text using Automatic Speech Recognition (ASR), generate answers to user queries using a language model, and then convert those answers back to speech using Text-to-Speech (TTS).

-The AudioQnA example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
-
-```mermaid
---
-config:
-  flowchart:
-    nodeSpacing: 400
-    rankSpacing: 100
-    curve: linear
-  themeVariables:
-    fontSize: 50px
---
-flowchart LR
-    %% Colors %%
-    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
-    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
-    classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
-    classDef invisible fill:transparent,stroke:transparent;
-    style AudioQnA-MegaService stroke:#000000
-
-    %% Subgraphs %%
-    subgraph AudioQnA-MegaService["AudioQnA MegaService "]
-        direction LR
-        ASR([ASR MicroService]):::blue
-        LLM([LLM MicroService]):::blue
-        TTS([TTS MicroService]):::blue
-    end
-    subgraph UserInterface[" User Interface "]
-        direction LR
-        a([User Input Query]):::orchid
-        UI([UI server<br>]):::orchid
-    end
-
-
-
-    WSP_SRV{{whisper service<br>}}
-    SPC_SRV{{speecht5 service <br>}}
-    LLM_gen{{LLM Service <br>}}
-    GW([AudioQnA GateWay<br>]):::orange
-
-
-    %% Questions interaction
-    direction LR
-    a[User Audio Query] --> UI
-    UI --> GW
-    GW <==> AudioQnA-MegaService
-    ASR ==> LLM
-    LLM ==> TTS
-
-    %% Embedding service flow
-    direction LR
-    ASR <-.-> WSP_SRV
-    LLM <-.-> LLM_gen
-    TTS <-.-> SPC_SRV
-
-```
-
 ## Deploy AudioQnA Service

 The AudioQnA service can be deployed on either Intel Gaudi2 or Intel Xeon Scalable Processor.
--- a/AudioQnA/audioqna_multilang.py
+++ b/AudioQnA/audioqna_multilang.py
@@ -1,98 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import asyncio
-import base64
-import os
-
-from comps import AudioQnAGateway, MicroService, ServiceOrchestrator, ServiceType
-
-MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
-MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
-
-WHISPER_SERVER_HOST_IP = os.getenv("WHISPER_SERVER_HOST_IP", "0.0.0.0")
-WHISPER_SERVER_PORT = int(os.getenv("WHISPER_SERVER_PORT", 7066))
-GPT_SOVITS_SERVER_HOST_IP = os.getenv("GPT_SOVITS_SERVER_HOST_IP", "0.0.0.0")
-GPT_SOVITS_SERVER_PORT = int(os.getenv("GPT_SOVITS_SERVER_PORT", 9088))
-LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
-LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888))
-
-
-def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
-    print(inputs)
-    if self.services[cur_node].service_type == ServiceType.ASR:
-        # {'byte_str': 'UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA'}
-        inputs["audio"] = inputs["byte_str"]
-        del inputs["byte_str"]
-    elif self.services[cur_node].service_type == ServiceType.LLM:
-        # convert TGI/vLLM to unified OpenAI /v1/chat/completions format
-        next_inputs = {}
-        next_inputs["model"] = "tgi"  # specifically clarify the fake model to make the format unified
-        next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
-        next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
-        next_inputs["top_p"] = llm_parameters_dict["top_p"]
-        next_inputs["stream"] = inputs["streaming"]  # False as default
-        next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
-        # next_inputs["presence_penalty"] = inputs["presence_penalty"]
-        # next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
-        next_inputs["temperature"] = inputs["temperature"]
-        inputs = next_inputs
-    elif self.services[cur_node].service_type == ServiceType.TTS:
-        next_inputs = {}
-        next_inputs["text"] = inputs["choices"][0]["message"]["content"]
-        next_inputs["text_language"] = kwargs["tts_text_language"] if "tts_text_language" in kwargs else "zh"
-        inputs = next_inputs
-    return inputs
-
-
-def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
-    if self.services[cur_node].service_type == ServiceType.TTS:
-        audio_base64 = base64.b64encode(data).decode("utf-8")
-        return {"byte_str": audio_base64}
-    return data
-
-
-class AudioQnAService:
-    def __init__(self, host="0.0.0.0", port=8000):
-        self.host = host
-        self.port = port
-        ServiceOrchestrator.align_inputs = align_inputs
-        ServiceOrchestrator.align_outputs = align_outputs
-        self.megaservice = ServiceOrchestrator()
-
-    def add_remote_service(self):
-        asr = MicroService(
-            name="asr",
-            host=WHISPER_SERVER_HOST_IP,
-            port=WHISPER_SERVER_PORT,
-            # endpoint="/v1/audio/transcriptions",
-            endpoint="/v1/asr",
-            use_remote_service=True,
-            service_type=ServiceType.ASR,
-        )
-        llm = MicroService(
-            name="llm",
-            host=LLM_SERVER_HOST_IP,
-            port=LLM_SERVER_PORT,
-            endpoint="/v1/chat/completions",
-            use_remote_service=True,
-            service_type=ServiceType.LLM,
-        )
-        tts = MicroService(
-            name="tts",
-            host=GPT_SOVITS_SERVER_HOST_IP,
-            port=GPT_SOVITS_SERVER_PORT,
-            # endpoint="/v1/audio/speech",
-            endpoint="/",
-            use_remote_service=True,
-            service_type=ServiceType.TTS,
-        )
-        self.megaservice.add(asr).add(llm).add(tts)
-        self.megaservice.flow_to(asr, llm)
-        self.megaservice.flow_to(llm, tts)
-        self.gateway = AudioQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
-
-
-if __name__ == "__main__":
-    audioqna = AudioQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
-    audioqna.add_remote_service()
--- a/AudioQnA/benchmark/accuracy/README.md
+++ b/AudioQnA/benchmark/accuracy/README.md
@@ -1,51 +0,0 @@
-# AudioQnA Accuracy
-
-AudioQnA is an example that demonstrates the integration of Generative AI (GenAI) models for performing question-answering (QnA) on audio scene, which contains Automatic Speech Recognition (ASR) and Text-to-Speech (TTS). The following is the piepline for evaluating the ASR accuracy.
-
-## Dataset
-
-We evaluate the ASR accuracy on the test set of librispeech [dataset](https://huggingface.co/datasets/andreagasparini/librispeech_test_only), which contains 2620 records of audio and texts.
-
-## Metrics
-
-We evaluate the WER (Word Error Rate) metric of the ASR microservice.
-
-## Evaluation
-
-### Launch ASR microservice
-
-Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr/whisper/README.md).
-
-```bash
-git clone https://github.com/opea-project/GenAIComps
-cd GenAIComps
-docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
-# change the name of model by editing model_name_or_path you want to evaluate
-docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper:latest --model_name_or_path "openai/whisper-tiny"
-```
-
-### Evaluate
-
-Install dependencies:
-
-```
-pip install -r requirements.txt
-```
-
-Evaluate the performance with the LLM:
-
-```py
-# validate the offline model
-# python offline_eval.py
-# validate the online asr microservice accuracy
-python online_eval.py
-```
-
-### Performance Result
-
-Here is the tested result for your reference
-|| WER |
-| --- | ---- |
-|whisper-large-v2| 2.87|
-|whisper-large| 2.7 |
-|whisper-medium| 3.45 |
--- a/AudioQnA/benchmark/accuracy/local_eval.py
+++ b/AudioQnA/benchmark/accuracy/local_eval.py
@@ -1,35 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import torch
-from datasets import load_dataset
-from evaluate import load
-from transformers import WhisperForConditionalGeneration, WhisperProcessor
-
-device = "cuda" if torch.cuda.is_available() else "cpu"
-
-MODEL_NAME = "openai/whisper-large-v2"
-
-librispeech_test_clean = load_dataset(
-    "andreagasparini/librispeech_test_only", "clean", split="test", trust_remote_code=True
-)
-processor = WhisperProcessor.from_pretrained(MODEL_NAME)
-model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME).to(device)
-
-
-def map_to_pred(batch):
-    audio = batch["audio"]
-    input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
-    batch["reference"] = processor.tokenizer._normalize(batch["text"])
-
-    with torch.no_grad():
-        predicted_ids = model.generate(input_features.to(device))[0]
-    transcription = processor.decode(predicted_ids)
-    batch["prediction"] = processor.tokenizer._normalize(transcription)
-    return batch
-
-
-result = librispeech_test_clean.map(map_to_pred)
-
-wer = load("wer")
-print(100 * wer.compute(references=result["reference"], predictions=result["prediction"]))
--- a/AudioQnA/benchmark/accuracy/online_eval.py
+++ b/AudioQnA/benchmark/accuracy/online_eval.py
@@ -1,56 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import base64
-import json
-
-import requests
-import torch
-from datasets import load_dataset
-from evaluate import load
-from pydub import AudioSegment
-from transformers import WhisperForConditionalGeneration, WhisperProcessor
-
-MODEL_NAME = "openai/whisper-large-v2"
-processor = WhisperProcessor.from_pretrained(MODEL_NAME)
-
-librispeech_test_clean = load_dataset(
-    "andreagasparini/librispeech_test_only", "clean", split="test", trust_remote_code=True
-)
-
-
-def map_to_pred(batch):
-    batch["reference"] = processor.tokenizer._normalize(batch["text"])
-
-    file_path = batch["file"]
-    # process the file_path
-    pidx = file_path.rfind("/")
-    sidx = file_path.rfind(".")
-
-    file_path_prefix = file_path[: pidx + 1]
-    file_path_suffix = file_path[sidx:]
-    file_path_mid = file_path[pidx + 1 : sidx]
-    splits = file_path_mid.split("-")
-    file_path_mid = f"LibriSpeech/test-clean/{splits[0]}/{splits[1]}/{file_path_mid}"
-
-    file_path = file_path_prefix + file_path_mid + file_path_suffix
-
-    audio = AudioSegment.from_file(file_path)
-    audio.export("tmp.wav")
-    with open("tmp.wav", "rb") as f:
-        test_audio_base64_str = base64.b64encode(f.read()).decode("utf-8")
-
-    inputs = {"audio": test_audio_base64_str}
-    endpoint = "http://localhost:7066/v1/asr"
-    response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None})
-
-    result_str = response.json()["asr_result"]
-
-    batch["prediction"] = processor.tokenizer._normalize(result_str)
-    return batch
-
-
-result = librispeech_test_clean.map(map_to_pred)
-
-wer = load("wer")
-print(100 * wer.compute(references=result["reference"], predictions=result["prediction"]))
--- a/AudioQnA/benchmark/accuracy/requirements.txt
+++ b/AudioQnA/benchmark/accuracy/requirements.txt
@@ -1,8 +0,0 @@
-datasets
-evaluate
-jiwer
-librosa
-pydub
-soundfile
-torch
-transformers
--- a/AudioQnA/benchmark/accuracy/run_acc.sh
+++ b/AudioQnA/benchmark/accuracy/run_acc.sh
@@ -1,5 +0,0 @@
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-python online_eval.py
--- a/AudioQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/README.md
@@ -108,7 +108,7 @@ curl http://${host_ip}:3006/generate \
 # llm microservice
 curl http://${host_ip}:3007/v1/chat/completions\
  -X POST \
-  -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
+  -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
  -H 'Content-Type: application/json'

 # speecht5 service
@@ -127,13 +127,9 @@ curl http://${host_ip}:3002/v1/audio/speech \

 ## 🚀 Test MegaService

-Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the
-base64 string to the megaservice endpoint. The megaservice will return a spoken response as a base64 string. To listen
-to the response, decode the base64 string and save it as a .wav file.
-
 ```bash
 curl http://${host_ip}:3008/v1/audioqna \
  -X POST \
  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
-  -H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
+  -H 'Content-Type: application/json'
 ```
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -41,7 +41,7 @@ services:
    environment:
      TTS_ENDPOINT: ${TTS_ENDPOINT}
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
    container_name: tgi-service
    ports:
      - "3006:80"
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
@@ -1,64 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-services:
-  whisper-service:
-    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
-    container_name: whisper-service
-    ports:
-      - "7066:7066"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-    restart: unless-stopped
-    command: --language "zh"
-  gpt-sovits-service:
-    image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
-    container_name: gpt-sovits-service
-    ports:
-      - "9880:9880"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-    restart: unless-stopped
-  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
-    container_name: tgi-service
-    ports:
-      - "3006:80"
-    volumes:
-      - "./data:/data"
-    shm_size: 1g
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
-  audioqna-xeon-backend-server:
-    image: ${REGISTRY:-opea}/audioqna-multilang:${TAG:-latest}
-    container_name: audioqna-xeon-backend-server
-    ports:
-      - "3008:8888"
-    environment:
-      - no_proxy=${no_proxy}
-      - https_proxy=${https_proxy}
-      - http_proxy=${http_proxy}
-      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
-      - LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
-      - LLM_SERVER_PORT=${LLM_SERVER_PORT}
-      - LLM_MODEL_ID=${LLM_MODEL_ID}
-      - WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
-      - WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
-      - GPT_SOVITS_SERVER_HOST_IP=${GPT_SOVITS_SERVER_HOST_IP}
-      - GPT_SOVITS_SERVER_PORT=${GPT_SOVITS_SERVER_PORT}
-    ipc: host
-    restart: always
-
-networks:
-  default:
-    driver: bridge
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -79,8 +79,6 @@ export LLM_SERVICE_PORT=3007

 ## 🚀 Start the MegaService

-> **_NOTE:_** Users will need at least three Gaudi cards for AudioQnA.
-
 ```bash
 cd GenAIExamples/AudioQnA/docker_compose/intel/hpu/gaudi/
 docker compose up -d
@@ -110,7 +108,7 @@ curl http://${host_ip}:3006/generate \
 # llm microservice
 curl http://${host_ip}:3007/v1/chat/completions\
  -X POST \
-  -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
+  -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
  -H 'Content-Type: application/json'

 # speecht5 service
@@ -129,13 +127,9 @@ curl http://${host_ip}:3002/v1/audio/speech \

 ## 🚀 Test MegaService

-Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the
-base64 string to the megaservice endpoint. The megaservice will return a spoken response as a base64 string. To listen
-to the response, decode the base64 string and save it as a .wav file.
-
 ```bash
 curl http://${host_ip}:3008/v1/audioqna \
  -X POST \
  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
-  -H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
+  -H 'Content-Type: application/json'
 ```
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -51,7 +51,7 @@ services:
    environment:
      TTS_ENDPOINT: ${TTS_ENDPOINT}
  tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
    container_name: tgi-gaudi-server
    ports:
      - "3006:80"
@@ -61,15 +61,11 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      ENABLE_HPU_GRAPH: true
-      LIMIT_HPU_GRAPH: true
-      USE_FLASH_ATTENTION: true
-      FLASH_ATTENTION_RECOMPUTE: true
    runtime: habana
    cap_add:
      - SYS_NICE
--- a/AudioQnA/docker_image_build/build.yaml
+++ b/AudioQnA/docker_image_build/build.yaml
@@ -53,9 +53,3 @@ services:
      dockerfile: comps/tts/speecht5/Dockerfile
    extends: audioqna
    image: ${REGISTRY:-opea}/tts:${TAG:-latest}
-  gpt-sovits:
-    build:
-      context: GenAIComps
-      dockerfile: comps/tts/gpt-sovits/Dockerfile
-    extends: audioqna
-    image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
--- a/AudioQnA/kubernetes/intel/README.md
+++ b/AudioQnA/kubernetes/intel/README.md
@@ -7,14 +7,14 @@

 ## Deploy On Xeon
 ```
-cd GenAIExamples/AudioQnA/kubernetes/intel/cpu/xeon/manifest
+cd GenAIExamples/AudioQnA/kubernetes/intel/cpu/xeon/manifests
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
 kubectl apply -f audioqna.yaml
 ```
 ## Deploy On Gaudi
 ```
-cd GenAIExamples/AudioQnA/kubernetes/intel/hpu/gaudi/manifest
+cd GenAIExamples/AudioQnA/kubernetes/intel/hpu/gaudi/manifests
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
 kubectl apply -f audioqna.yaml
--- a/AudioQnA/kubernetes/intel/README_gmc.md
+++ b/AudioQnA/kubernetes/intel/README_gmc.md
@@ -4,7 +4,7 @@ This document outlines the deployment process for a AudioQnA application utilizi

 The AudioQnA Service leverages a Kubernetes operator called genai-microservices-connector(GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere.

-Install GMC  in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
+Install GMC  in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.


 The AudioQnA application is defined as a Custom Resource (CR) file that the above GMC operator acts  upon. It first checks if the microservices listed in the CR yaml file are running, if not starts them and then proceeds to connect them. When the AudioQnA pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular `asr`, `tts`, and `llm`.
@@ -25,7 +25,7 @@ The AudioQnA uses the below prebuilt images if you choose a Xeon deployment
 Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
 For Gaudi:

- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.5
+- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
 - whisper-gaudi: opea/whisper-gaudi:latest
 - speecht5-gaudi: opea/speecht5-gaudi:latest

--- a/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
+++ b/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
@@ -247,7 +247,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
+        image: ghcr.io/huggingface/text-generation-inference:2.2.0
        name: llm-dependency-deploy-demo
        securityContext:
          capabilities:
--- a/AudioQnA/kubernetes/intel/hpu/gaudi/manifest/audioqna.yaml
+++ b/AudioQnA/kubernetes/intel/hpu/gaudi/manifest/audioqna.yaml
@@ -271,7 +271,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
        name: llm-dependency-deploy-demo
        securityContext:
          capabilities:
@@ -303,14 +303,6 @@ spec:
          value: none
        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
          value: 'true'
-        - name: ENABLE_HPU_GRAPH
-          value: 'true'
-        - name: LIMIT_HPU_GRAPH
-          value: 'true'
-        - name: USE_FLASH_ATTENTION
-          value: 'true'
-        - name: FLASH_ATTENTION_RECOMPUTE
-          value: 'true'
        - name: runtime
          value: habana
        - name: HABANA_VISIBLE_DEVICES
@@ -323,7 +315,7 @@ spec:
      volumes:
      - name: model-volume
        hostPath:
-          path: /mnt/models
+          path: /home/sdp/cesg
          type: Directory
      - name: shm
        emptyDir:
--- a/AudioQnA/tests/test_compose_on_gaudi.sh
+++ b/AudioQnA/tests/test_compose_on_gaudi.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
    service_list="audioqna whisper-gaudi asr llm-tgi speecht5-gaudi tts"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
    docker images && sleep 1s
 }

--- a/AudioQnA/tests/test_compose_on_xeon.sh
+++ b/AudioQnA/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
    service_list="audioqna whisper asr llm-tgi speecht5 tts"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
    docker images && sleep 1s
 }

--- a/AudioQnA/tests/test_gmc_on_gaudi.sh
+++ b/AudioQnA/tests/test_gmc_on_gaudi.sh
@@ -34,7 +34,7 @@ function validate_audioqa() {
    export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
    echo "$CLIENT_POD"
    accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
-    byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
+    byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
    echo "$byte_str" > $LOG_PATH/curl_audioqa.log
    if [ -z "$byte_str" ]; then
 	echo "audioqa failed, please check the logs in ${LOG_PATH}!"
--- a/AudioQnA/tests/test_gmc_on_xeon.sh
+++ b/AudioQnA/tests/test_gmc_on_xeon.sh
@@ -34,7 +34,7 @@ function validate_audioqa() {
    export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
    echo "$CLIENT_POD"
    accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
-    byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
+    byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
    echo "$byte_str" > $LOG_PATH/curl_audioqa.log
    if [ -z "$byte_str" ]; then
        echo "audioqa failed, please check the logs in ${LOG_PATH}!"
--- a/AvatarChatbot/.gitignore
+++ b/AvatarChatbot/.gitignore
@@ -1,8 +0,0 @@
-*.safetensors
-*.bin
-*.model
-*.log
-docker_compose/intel/cpu/xeon/data
-docker_compose/intel/hpu/gaudi/data
-inputs/
-outputs/
--- a/AvatarChatbot/README.md
+++ b/AvatarChatbot/README.md
@@ -1,105 +0,0 @@
-# AvatarChatbot Application
-
-The AvatarChatbot service can be effortlessly deployed on either Intel Gaudi2 or Intel XEON Scalable Processors.
-
-## AI Avatar Workflow
-
-The AI Avatar example is implemented using both megaservices and the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different megaservices and microservices for this example.
-
-```mermaid
---
-config:
-  flowchart:
-    nodeSpacing: 100
-    rankSpacing: 100
-    curve: linear
-  themeVariables:
-    fontSize: 42px
---
-flowchart LR
-    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
-    classDef thistle fill:#D8BFD8,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
-    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
-    classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
-    classDef invisible fill:transparent,stroke:transparent;
-    style AvatarChatbot-Megaservice stroke:#000000
-
-    subgraph AvatarChatbot-Megaservice["AvatarChatbot Megaservice"]
-        direction LR
-        ASR([ASR Microservice]):::blue
-        LLM([LLM Microservice]):::blue
-        TTS([TTS Microservice]):::blue
-        animation([Animation Microservice]):::blue
-    end
-    subgraph UserInterface["User Interface"]
-        direction LR
-        invis1[ ]:::invisible
-        USER1([User Audio Query]):::orchid
-        USER2([User Image/Video Query]):::orchid
-        UI([UI server<br>]):::orchid
-    end
-    GW([AvatarChatbot GateWay<br>]):::orange
-    subgraph .
-        direction LR
-        X([OPEA Microservice]):::blue
-        Y{{Open Source Service}}:::thistle
-        Z([OPEA Gateway]):::orange
-        Z1([UI]):::orchid
-    end
-
-    WHISPER{{Whisper service}}:::thistle
-    TGI{{LLM service}}:::thistle
-    T5{{Speecht5 service}}:::thistle
-    WAV2LIP{{Wav2Lip service}}:::thistle
-
-    %% Connections %%
-    direction LR
-    USER1 -->|1| UI
-    UI -->|2| GW
-    GW <==>|3| AvatarChatbot-Megaservice
-    ASR ==>|4| LLM ==>|5| TTS ==>|6| animation
-
-    direction TB
-    ASR <-.->|3'| WHISPER
-    LLM <-.->|4'| TGI
-    TTS <-.->|5'| T5
-    animation <-.->|6'| WAV2LIP
-
-    USER2 -->|1| UI
-    UI <-.->|6'| WAV2LIP
-```
-
-## Deploy AvatarChatbot Service
-
-The AvatarChatbot service can be deployed on either Intel Gaudi2 AI Accelerator or Intel Xeon Scalable Processor.
-
-### Deploy AvatarChatbot on Gaudi
-
-Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) for instructions on deploying AvatarChatbot on Gaudi, and on setting up an UI for the application.
-
-### Deploy AvatarChatbot on Xeon
-
-Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for instructions on deploying AvatarChatbot on Xeon.
-
-## Supported Models
-
-### ASR
-
-The default model is [openai/whisper-small](https://huggingface.co/openai/whisper-small). It also supports all models in the Whisper family, such as `openai/whisper-large-v3`, `openai/whisper-medium`, `openai/whisper-base`, `openai/whisper-tiny`, etc.
-
-To replace the model, please edit the `compose.yaml` and add the `command` line to pass the name of the model you want to use:
-
-```yaml
-services:
-  whisper-service:
-    ...
-    command: --model_name_or_path openai/whisper-tiny
-```
-
-### TTS
-
-The default model is [microsoft/SpeechT5](https://huggingface.co/microsoft/speecht5_tts). We currently do not support replacing the model. More models under the commercial license will be added in the future.
-
-### Animation
-
-The default model is [Rudrabha/Wav2Lip](https://github.com/Rudrabha/Wav2Lip) and [TencentARC/GFPGAN](https://github.com/TencentARC/GFPGAN). We currently do not support replacing the model. More models under the commercial license such as [OpenTalker/SadTalker](https://github.com/OpenTalker/SadTalker) will be added in the future.
--- a/AvatarChatbot/assets/audio/eg3_ref.wav
+++ b/AvatarChatbot/assets/audio/eg3_ref.wav
--- a/AvatarChatbot/assets/audio/sample_minecraft.json
+++ b/AvatarChatbot/assets/audio/sample_minecraft.json
--- a/AvatarChatbot/assets/audio/sample_question.json
+++ b/AvatarChatbot/assets/audio/sample_question.json
--- a/AvatarChatbot/assets/audio/sample_whoareyou.json
+++ b/AvatarChatbot/assets/audio/sample_whoareyou.json
--- a/AvatarChatbot/assets/img/UI.png
+++ b/AvatarChatbot/assets/img/UI.png
--- a/AvatarChatbot/assets/img/avatar1.jpg
+++ b/AvatarChatbot/assets/img/avatar1.jpg
--- a/AvatarChatbot/assets/img/avatar2.jpg
+++ b/AvatarChatbot/assets/img/avatar2.jpg
--- a/AvatarChatbot/assets/img/avatar3.png
+++ b/AvatarChatbot/assets/img/avatar3.png
--- a/AvatarChatbot/assets/img/avatar4.png
+++ b/AvatarChatbot/assets/img/avatar4.png
--- a/AvatarChatbot/assets/img/avatar5.png
+++ b/AvatarChatbot/assets/img/avatar5.png
--- a/AvatarChatbot/assets/img/avatar6.png
+++ b/AvatarChatbot/assets/img/avatar6.png
--- a/AvatarChatbot/assets/img/design.png
+++ b/AvatarChatbot/assets/img/design.png
--- a/AvatarChatbot/assets/img/flowchart.png
+++ b/AvatarChatbot/assets/img/flowchart.png
--- a/AvatarChatbot/assets/img/gaudi.png
+++ b/AvatarChatbot/assets/img/gaudi.png
--- a/AvatarChatbot/assets/img/opea_gh_qr.png
+++ b/AvatarChatbot/assets/img/opea_gh_qr.png
--- a/AvatarChatbot/assets/img/opea_qr.png
+++ b/AvatarChatbot/assets/img/opea_qr.png
--- a/AvatarChatbot/assets/img/xeon.jpg
+++ b/AvatarChatbot/assets/img/xeon.jpg
--- a/AvatarChatbot/assets/outputs/result_max_tokens_1024.mp4
+++ b/AvatarChatbot/assets/outputs/result_max_tokens_1024.mp4
--- a/AvatarChatbot/assets/outputs/result_max_tokens_64.mp4
+++ b/AvatarChatbot/assets/outputs/result_max_tokens_64.mp4
--- a/AvatarChatbot/avatarchatbot.py
+++ b/AvatarChatbot/avatarchatbot.py
@@ -1,93 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import asyncio
-import os
-import sys
-
-from comps import AvatarChatbotGateway, MicroService, ServiceOrchestrator, ServiceType
-
-MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
-MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
-ASR_SERVICE_HOST_IP = os.getenv("ASR_SERVICE_HOST_IP", "0.0.0.0")
-ASR_SERVICE_PORT = int(os.getenv("ASR_SERVICE_PORT", 9099))
-LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
-LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
-TTS_SERVICE_HOST_IP = os.getenv("TTS_SERVICE_HOST_IP", "0.0.0.0")
-TTS_SERVICE_PORT = int(os.getenv("TTS_SERVICE_PORT", 9088))
-ANIMATION_SERVICE_HOST_IP = os.getenv("ANIMATION_SERVICE_HOST_IP", "0.0.0.0")
-ANIMATION_SERVICE_PORT = int(os.getenv("ANIMATION_SERVICE_PORT", 9066))
-
-
-def check_env_vars(env_var_list):
-    for var in env_var_list:
-        if os.getenv(var) is None:
-            print(f"Error: The environment variable '{var}' is not set.")
-            sys.exit(1)  # Exit the program with a non-zero status code
-    print("All environment variables are set.")
-
-
-class AvatarChatbotService:
-    def __init__(self, host="0.0.0.0", port=8000):
-        self.host = host
-        self.port = port
-        self.megaservice = ServiceOrchestrator()
-
-    def add_remote_service(self):
-        asr = MicroService(
-            name="asr",
-            host=ASR_SERVICE_HOST_IP,
-            port=ASR_SERVICE_PORT,
-            endpoint="/v1/audio/transcriptions",
-            use_remote_service=True,
-            service_type=ServiceType.ASR,
-        )
-        llm = MicroService(
-            name="llm",
-            host=LLM_SERVICE_HOST_IP,
-            port=LLM_SERVICE_PORT,
-            endpoint="/v1/chat/completions",
-            use_remote_service=True,
-            service_type=ServiceType.LLM,
-        )
-        tts = MicroService(
-            name="tts",
-            host=TTS_SERVICE_HOST_IP,
-            port=TTS_SERVICE_PORT,
-            endpoint="/v1/audio/speech",
-            use_remote_service=True,
-            service_type=ServiceType.TTS,
-        )
-        animation = MicroService(
-            name="animation",
-            host=ANIMATION_SERVICE_HOST_IP,
-            port=ANIMATION_SERVICE_PORT,
-            endpoint="/v1/animation",
-            use_remote_service=True,
-            service_type=ServiceType.ANIMATION,
-        )
-        self.megaservice.add(asr).add(llm).add(tts).add(animation)
-        self.megaservice.flow_to(asr, llm)
-        self.megaservice.flow_to(llm, tts)
-        self.megaservice.flow_to(tts, animation)
-        self.gateway = AvatarChatbotGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
-
-
-if __name__ == "__main__":
-    check_env_vars(
-        [
-            "MEGA_SERVICE_HOST_IP",
-            "MEGA_SERVICE_PORT",
-            "ASR_SERVICE_HOST_IP",
-            "ASR_SERVICE_PORT",
-            "LLM_SERVICE_HOST_IP",
-            "LLM_SERVICE_PORT",
-            "TTS_SERVICE_HOST_IP",
-            "TTS_SERVICE_PORT",
-            "ANIMATION_SERVICE_HOST_IP",
-            "ANIMATION_SERVICE_PORT",
-        ]
-    )
-
-    avatarchatbot = AvatarChatbotService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
-    avatarchatbot.add_remote_service()
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
@@ -1,210 +0,0 @@
-# Build Mega Service of AvatarChatbot on Xeon
-
-This document outlines the deployment process for a AvatarChatbot application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server.
-
-## 🚀 Build Docker images
-
-### 1. Source Code install GenAIComps
-
-```bash
-git clone https://github.com/opea-project/GenAIComps.git
-cd GenAIComps
-```
-
-### 2. Build ASR Image
-
-```bash
-docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
-
-
-docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
-```
-
-### 3. Build LLM Image
-
-```bash
-docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
-```
-
-### 4. Build TTS Image
-
-```bash
-docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile .
-
-docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
-```
-
-### 5. Build Animation Image
-
-```bash
-docker build -t opea/wav2lip:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/dependency/Dockerfile .
-
-docker build -t opea/animation:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/Dockerfile .
-```
-
-### 6. Build MegaService Docker Image
-
-To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
-
-```bash
-git clone https://github.com/opea-project/GenAIExamples.git
-cd GenAIExamples/AvatarChatbot/
-docker build --no-cache -t opea/avatarchatbot:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
-```
-
-Then run the command `docker images`, you will have following images ready:
-
-1. `opea/whisper:latest`
-2. `opea/asr:latest`
-3. `opea/llm-tgi:latest`
-4. `opea/speecht5:latest`
-5. `opea/tts:latest`
-6. `opea/wav2lip:latest`
-7. `opea/animation:latest`
-8. `opea/avatarchatbot:latest`
-
-## 🚀 Set the environment variables
-
-Before starting the services with `docker compose`, you have to recheck the following environment variables.
-
-```bash
-export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
-export host_ip=$(hostname -I | awk '{print $1}')
-
-export TGI_LLM_ENDPOINT=http://$host_ip:3006
-export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
-
-export ASR_ENDPOINT=http://$host_ip:7066
-export TTS_ENDPOINT=http://$host_ip:7055
-export WAV2LIP_ENDPOINT=http://$host_ip:7860
-
-export MEGA_SERVICE_HOST_IP=${host_ip}
-export ASR_SERVICE_HOST_IP=${host_ip}
-export TTS_SERVICE_HOST_IP=${host_ip}
-export LLM_SERVICE_HOST_IP=${host_ip}
-export ANIMATION_SERVICE_HOST_IP=${host_ip}
-
-export MEGA_SERVICE_PORT=8888
-export ASR_SERVICE_PORT=3001
-export TTS_SERVICE_PORT=3002
-export LLM_SERVICE_PORT=3007
-export ANIMATION_SERVICE_PORT=3008
-```
-
- Xeon CPU
-
-```bash
-export DEVICE="cpu"
-export WAV2LIP_PORT=7860
-export INFERENCE_MODE='wav2lip_only'
-export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
-export FACE="assets/img/avatar1.jpg"
-# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
-export AUDIO='None'
-export FACESIZE=96
-export OUTFILE="/outputs/result.mp4"
-export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
-export UPSCALE_FACTOR=1
-export FPS=10
-```
-
-## 🚀 Start the MegaService
-
-```bash
-cd GenAIExamples/AvatarChatbot/docker_compose/intel/cpu/xeon/
-docker compose -f compose.yaml up -d
-```
-
-## 🚀 Test MicroServices
-
-```bash
-# whisper service
-curl http://${host_ip}:7066/v1/asr \
-  -X POST \
-  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-  -H 'Content-Type: application/json'
-
-# asr microservice
-curl http://${host_ip}:3001/v1/audio/transcriptions \
-  -X POST \
-  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-  -H 'Content-Type: application/json'
-
-# tgi service
-curl http://${host_ip}:3006/generate \
-  -X POST \
-  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-  -H 'Content-Type: application/json'
-
-# llm microservice
-curl http://${host_ip}:3007/v1/chat/completions\
-  -X POST \
-  -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
-  -H 'Content-Type: application/json'
-
-# speecht5 service
-curl http://${host_ip}:7055/v1/tts \
-  -X POST \
-  -d '{"text": "Who are you?"}' \
-  -H 'Content-Type: application/json'
-
-# tts microservice
-curl http://${host_ip}:3002/v1/audio/speech \
-  -X POST \
-  -d '{"text": "Who are you?"}' \
-  -H 'Content-Type: application/json'
-
-# wav2lip service
-cd ../../../..
-curl http://${host_ip}:7860/v1/wav2lip \
-  -X POST \
-  -d @assets/audio/sample_minecraft.json \
-  -H 'Content-Type: application/json'
-
-# animation microservice
-curl http://${host_ip}:3008/v1/animation \
-  -X POST \
-  -d @assets/audio/sample_question.json \
-  -H "Content-Type: application/json"
-
-```
-
-## 🚀 Test MegaService
-
-```bash
-curl http://${host_ip}:3009/v1/avatarchatbot \
-  -X POST \
-  -d @assets/audio/sample_whoareyou.json \
-  -H 'Content-Type: application/json'
-```
-
-If the megaservice is running properly, you should see the following output:
-
-```bash
-"/outputs/result.mp4"
-```
-
-The output file will be saved in the current working directory, as `${PWD}` is mapped to `/outputs` inside the wav2lip-service Docker container.
-
-## Gradio UI
-
-```bash
-cd $WORKPATH/GenAIExamples/AvatarChatbot
-python3 ui/gradio/app_gradio_demo_avatarchatbot.py
-```
-
-The UI can be viewed at http://${host_ip}:7861  
-<img src="../../../../assets/img/UI.png" alt="UI Example" width="60%">  
-In the current version v1.0, you need to set the avatar figure image/video and the DL model choice in the environment variables before starting AvatarChatbot backend service and running the UI. Please just customize the audio question in the UI.  
-\*\* We will enable change of avatar figure between runs in v2.0
-
-## Troubleshooting
-
-```bash
-cd GenAIExamples/AvatarChatbot/tests
-export IMAGE_REPO="opea"
-export IMAGE_TAG="latest"
-export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
-
-test_avatarchatbot_on_xeon.sh
-```
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
@@ -1,138 +0,0 @@
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-services:
-  whisper-service:
-    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
-    container_name: whisper-service
-    ports:
-      - "7066:7066"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-    restart: unless-stopped
-  asr:
-    image: ${REGISTRY:-opea}/asr:${TAG:-latest}
-    container_name: asr-service
-    ports:
-      - "3001:9099"
-    ipc: host
-    environment:
-      ASR_ENDPOINT: ${ASR_ENDPOINT}
-  speecht5-service:
-    image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
-    container_name: speecht5-service
-    ports:
-      - "7055:7055"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-    restart: unless-stopped
-  tts:
-    image: ${REGISTRY:-opea}/tts:${TAG:-latest}
-    container_name: tts-service
-    ports:
-      - "3002:9088"
-    ipc: host
-    environment:
-      TTS_ENDPOINT: ${TTS_ENDPOINT}
-  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
-    container_name: tgi-service
-    ports:
-      - "3006:80"
-    volumes:
-      - "./data:/data"
-    shm_size: 1g
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
-  llm:
-    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
-    container_name: llm-tgi-server
-    depends_on:
-      - tgi-service
-    ports:
-      - "3007:9000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-    restart: unless-stopped
-  wav2lip-service:
-    image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
-    container_name: wav2lip-service
-    ports:
-      - "7860:7860"
-    ipc: host
-    volumes:
-      - ${PWD}:/outputs
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      DEVICE: ${DEVICE}
-      INFERENCE_MODE: ${INFERENCE_MODE}
-      CHECKPOINT_PATH: ${CHECKPOINT_PATH}
-      FACE: ${FACE}
-      AUDIO: ${AUDIO}
-      FACESIZE: ${FACESIZE}
-      OUTFILE: ${OUTFILE}
-      GFPGAN_MODEL_VERSION: ${GFPGAN_MODEL_VERSION}
-      UPSCALE_FACTOR: ${UPSCALE_FACTOR}
-      FPS: ${FPS}
-      WAV2LIP_PORT: ${WAV2LIP_PORT}
-    restart: unless-stopped
-  animation:
-    image: ${REGISTRY:-opea}/animation:${TAG:-latest}
-    container_name: animation-server
-    ports:
-      - "3008:9066"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      WAV2LIP_ENDPOINT: ${WAV2LIP_ENDPOINT}
-    restart: unless-stopped
-  avatarchatbot-xeon-backend-server:
-    image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
-    container_name: avatarchatbot-xeon-backend-server
-    depends_on:
-      - asr
-      - llm
-      - tts
-      - animation
-    ports:
-      - "3009:8888"
-    environment:
-      - no_proxy=${no_proxy}
-      - https_proxy=${https_proxy}
-      - http_proxy=${http_proxy}
-      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
-      - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
-      - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
-      - ASR_SERVICE_PORT=${ASR_SERVICE_PORT}
-      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
-      - LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
-      - TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP}
-      - TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
-      - ANIMATION_SERVICE_HOST_IP=${ANIMATION_SERVICE_HOST_IP}
-      - ANIMATION_SERVICE_PORT=${ANIMATION_SERVICE_PORT}
-    ipc: host
-    restart: always
-
-networks:
-  default:
-    driver: bridge
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
@@ -1,220 +0,0 @@
-# Build Mega Service of AvatarChatbot on Gaudi
-
-This document outlines the deployment process for a AvatarChatbot application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server.
-
-## 🚀 Build Docker images
-
-### 1. Source Code install GenAIComps
-
-```bash
-git clone https://github.com/opea-project/GenAIComps.git
-cd GenAIComps
-```
-
-### 2. Build ASR Image
-
-```bash
-docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile.intel_hpu .
-
-
-docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
-```
-
-### 3. Build LLM Image
-
-```bash
-docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
-```
-
-### 4. Build TTS Image
-
-```bash
-docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile.intel_hpu .
-
-docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
-```
-
-### 5. Build Animation Image
-
-```bash
-docker build -t opea/wav2lip-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/dependency/Dockerfile.intel_hpu .
-
-docker build -t opea/animation:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/Dockerfile .
-```
-
-### 6. Build MegaService Docker Image
-
-To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
-
-```bash
-git clone https://github.com/opea-project/GenAIExamples.git
-cd GenAIExamples/AvatarChatbot/
-docker build --no-cache -t opea/avatarchatbot:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
-```
-
-Then run the command `docker images`, you will have following images ready:
-
-1. `opea/whisper-gaudi:latest`
-2. `opea/asr:latest`
-3. `opea/llm-tgi:latest`
-4. `opea/speecht5-gaudi:latest`
-5. `opea/tts:latest`
-6. `opea/wav2lip-gaudi:latest`
-7. `opea/animation:latest`
-8. `opea/avatarchatbot:latest`
-
-## 🚀 Set the environment variables
-
-Before starting the services with `docker compose`, you have to recheck the following environment variables.
-
-```bash
-export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
-export host_ip=$(hostname -I | awk '{print $1}')
-
-export TGI_LLM_ENDPOINT=http://$host_ip:3006
-export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
-
-export ASR_ENDPOINT=http://$host_ip:7066
-export TTS_ENDPOINT=http://$host_ip:7055
-export WAV2LIP_ENDPOINT=http://$host_ip:7860
-
-export MEGA_SERVICE_HOST_IP=${host_ip}
-export ASR_SERVICE_HOST_IP=${host_ip}
-export TTS_SERVICE_HOST_IP=${host_ip}
-export LLM_SERVICE_HOST_IP=${host_ip}
-export ANIMATION_SERVICE_HOST_IP=${host_ip}
-
-export MEGA_SERVICE_PORT=8888
-export ASR_SERVICE_PORT=3001
-export TTS_SERVICE_PORT=3002
-export LLM_SERVICE_PORT=3007
-export ANIMATION_SERVICE_PORT=3008
-```
-
- Gaudi2 HPU
-
-```bash
-export DEVICE="hpu"
-export WAV2LIP_PORT=7860
-export INFERENCE_MODE='wav2lip_only'
-export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
-export FACE="assets/img/avatar1.jpg"
-# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
-export AUDIO='None'
-export FACESIZE=96
-export OUTFILE="/outputs/result.mp4"
-export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
-export UPSCALE_FACTOR=1
-export FPS=10
-```
-
-## 🚀 Start the MegaService
-
-```bash
-cd GenAIExamples/AvatarChatbot/docker_compose/intel/hpu/gaudi/
-docker compose -f compose.yaml up -d
-```
-
-## 🚀 Test MicroServices
-
-```bash
-# whisper service
-curl http://${host_ip}:7066/v1/asr \
-  -X POST \
-  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-  -H 'Content-Type: application/json'
-
-# asr microservice
-curl http://${host_ip}:3001/v1/audio/transcriptions \
-  -X POST \
-  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-  -H 'Content-Type: application/json'
-
-# tgi service
-curl http://${host_ip}:3006/generate \
-  -X POST \
-  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-  -H 'Content-Type: application/json'
-
-# llm microservice
-curl http://${host_ip}:3007/v1/chat/completions\
-  -X POST \
-  -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
-  -H 'Content-Type: application/json'
-
-# speecht5 service
-curl http://${host_ip}:7055/v1/tts \
-  -X POST \
-  -d '{"text": "Who are you?"}' \
-  -H 'Content-Type: application/json'
-
-# tts microservice
-curl http://${host_ip}:3002/v1/audio/speech \
-  -X POST \
-  -d '{"text": "Who are you?"}' \
-  -H 'Content-Type: application/json'
-
-# wav2lip service
-cd ../../../..
-curl http://${host_ip}:7860/v1/wav2lip \
-  -X POST \
-  -d @assets/audio/sample_minecraft.json \
-  -H 'Content-Type: application/json'
-
-# animation microservice
-curl http://${host_ip}:3008/v1/animation \
-  -X POST \
-  -d @assets/audio/sample_question.json \
-  -H "Content-Type: application/json"
-
-```
-
-## 🚀 Test MegaService
-
-```bash
-curl http://${host_ip}:3009/v1/avatarchatbot \
-  -X POST \
-  -d @assets/audio/sample_whoareyou.json \
-  -H 'Content-Type: application/json'
-```
-
-If the megaservice is running properly, you should see the following output:
-
-```bash
-"/outputs/result.mp4"
-```
-
-The output file will be saved in the current working directory, as `${PWD}` is mapped to `/outputs` inside the wav2lip-service Docker container.
-
-## Gradio UI
-
-```bash
-sudo apt update
-sudo apt install -y yasm pkg-config libx264-dev nasm
-cd $WORKPATH
-git clone https://github.com/FFmpeg/FFmpeg.git
-cd FFmpeg
-sudo ./configure --enable-gpl --enable-libx264 && sudo make -j$(nproc-1) && sudo make install && hash -r
-pip install gradio==4.38.1 soundfile
-```
-
-```bash
-cd $WORKPATH/GenAIExamples/AvatarChatbot
-python3 ui/gradio/app_gradio_demo_avatarchatbot.py
-```
-
-The UI can be viewed at http://${host_ip}:7861  
-<img src="../../../../assets/img/UI.png" alt="UI Example" width="60%">  
-In the current version v1.0, you need to set the avatar figure image/video and the DL model choice in the environment variables before starting AvatarChatbot backend service and running the UI. Please just customize the audio question in the UI.  
-\*\* We will enable change of avatar figure between runs in v2.0
-
-## Troubleshooting
-
-```bash
-cd GenAIExamples/AvatarChatbot/tests
-export IMAGE_REPO="opea"
-export IMAGE_TAG="latest"
-export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
-
-test_avatarchatbot_on_gaudi.sh
-```
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -1,171 +0,0 @@
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-version: "3.8"
-
-services:
-  whisper-service:
-    image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
-    container_name: whisper-service
-    ports:
-      - "7066:7066"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HABANA_VISIBLE_MODULES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    restart: unless-stopped
-  asr:
-    image: ${REGISTRY:-opea}/asr:${TAG:-latest}
-    container_name: asr-service
-    ports:
-      - "3001:9099"
-    ipc: host
-    environment:
-      ASR_ENDPOINT: ${ASR_ENDPOINT}
-  speecht5-service:
-    image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
-    container_name: speecht5-service
-    ports:
-      - "7055:7055"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HABANA_VISIBLE_MODULES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    restart: unless-stopped
-  tts:
-    image: ${REGISTRY:-opea}/tts:${TAG:-latest}
-    container_name: tts-service
-    ports:
-      - "3002:9088"
-    ipc: host
-    environment:
-      TTS_ENDPOINT: ${TTS_ENDPOINT}
-  tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
-    container_name: tgi-gaudi-server
-    ports:
-      - "3006:80"
-    volumes:
-      - "./data:/data"
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_MODULES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      ENABLE_HPU_GRAPH: true
-      LIMIT_HPU_GRAPH: true
-      USE_FLASH_ATTENTION: true
-      FLASH_ATTENTION_RECOMPUTE: true
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
-    command: --model-id ${LLM_MODEL_ID} --max-input-length 128 --max-total-tokens 256
-  llm:
-    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
-    container_name: llm-tgi-gaudi-server
-    depends_on:
-      - tgi-service
-    ports:
-      - "3007:9000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-    restart: unless-stopped
-  wav2lip-service:
-    image: ${REGISTRY:-opea}/wav2lip-gaudi:${TAG:-latest}
-    container_name: wav2lip-service
-    ports:
-      - "7860:7860"
-    ipc: host
-    volumes:
-      - ${PWD}:/outputs
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HABANA_VISIBLE_MODULES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      DEVICE: ${DEVICE}
-      INFERENCE_MODE: ${INFERENCE_MODE}
-      CHECKPOINT_PATH: ${CHECKPOINT_PATH}
-      FACE: ${FACE}
-      AUDIO: ${AUDIO}
-      FACESIZE: ${FACESIZE}
-      OUTFILE: ${OUTFILE}
-      GFPGAN_MODEL_VERSION: ${GFPGAN_MODEL_VERSION}
-      UPSCALE_FACTOR: ${UPSCALE_FACTOR}
-      FPS: ${FPS}
-      WAV2LIP_PORT: ${WAV2LIP_PORT}
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    restart: unless-stopped
-  animation:
-    image: ${REGISTRY:-opea}/animation:${TAG:-latest}
-    container_name: animation-gaudi-server
-    ports:
-      - "3008:9066"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HABANA_VISIBLE_MODULES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      WAV2LIP_ENDPOINT: ${WAV2LIP_ENDPOINT}
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    restart: unless-stopped
-  avatarchatbot-gaudi-backend-server:
-    image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
-    container_name: avatarchatbot-gaudi-backend-server
-    depends_on:
-      - asr
-      - llm
-      - tts
-      - animation
-    ports:
-      - "3009:8888"
-    environment:
-      - no_proxy=${no_proxy}
-      - https_proxy=${https_proxy}
-      - http_proxy=${http_proxy}
-      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
-      - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
-      - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
-      - ASR_SERVICE_PORT=${ASR_SERVICE_PORT}
-      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
-      - LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
-      - TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP}
-      - TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
-      - ANIMATION_SERVICE_HOST_IP=${ANIMATION_SERVICE_HOST_IP}
-      - ANIMATION_SERVICE_PORT=${ANIMATION_SERVICE_PORT}
-    ipc: host
-    restart: always
-
-networks:
-  default:
-    driver: bridge
--- a/AvatarChatbot/docker_image_build/build.yaml
+++ b/AvatarChatbot/docker_image_build/build.yaml
@@ -1,73 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-services:
-  avatarchatbot:
-    build:
-      args:
-        http_proxy: ${http_proxy}
-        https_proxy: ${https_proxy}
-        no_proxy: ${no_proxy}
-      context: ../
-      dockerfile: ./Dockerfile
-    image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
-  whisper-gaudi:
-    build:
-      context: GenAIComps
-      dockerfile: comps/asr/whisper/dependency/Dockerfile.intel_hpu
-    extends: avatarchatbot
-    image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
-  whisper:
-    build:
-      context: GenAIComps
-      dockerfile: comps/asr/whisper/dependency/Dockerfile
-    extends: avatarchatbot
-    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
-  asr:
-    build:
-      context: GenAIComps
-      dockerfile: comps/asr/whisper/Dockerfile
-    extends: avatarchatbot
-    image: ${REGISTRY:-opea}/asr:${TAG:-latest}
-  llm-tgi:
-    build:
-      context: GenAIComps
-      dockerfile: comps/llms/text-generation/tgi/Dockerfile
-    extends: avatarchatbot
-    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
-  speecht5-gaudi:
-    build:
-      context: GenAIComps
-      dockerfile: comps/tts/speecht5/dependency/Dockerfile.intel_hpu
-    extends: avatarchatbot
-    image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
-  speecht5:
-    build:
-      context: GenAIComps
-      dockerfile: comps/tts/speecht5/dependency/Dockerfile
-    extends: avatarchatbot
-    image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
-  tts:
-    build:
-      context: GenAIComps
-      dockerfile: comps/tts/speecht5/Dockerfile
-    extends: avatarchatbot
-    image: ${REGISTRY:-opea}/tts:${TAG:-latest}
-  wav2lip-gaudi:
-    build:
-      context: GenAIComps
-      dockerfile: comps/animation/wav2lip/dependency/Dockerfile.intel_hpu
-    extends: avatarchatbot
-    image: ${REGISTRY:-opea}/wav2lip-gaudi:${TAG:-latest}
-  wav2lip:
-    build:
-      context: GenAIComps
-      dockerfile: comps/animation/wav2lip/dependency/Dockerfile
-    extends: avatarchatbot
-    image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
-  animation:
-    build:
-      context: GenAIComps
-      dockerfile: comps/animation/wav2lip/Dockerfile
-    extends: avatarchatbot
-    image: ${REGISTRY:-opea}/animation:${TAG:-latest}
--- a/AvatarChatbot/tests/test_compose_on_gaudi.sh
+++ b/AvatarChatbot/tests/test_compose_on_gaudi.sh
@@ -1,147 +0,0 @@
-#!/bin/bash
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-set -e
-IMAGE_REPO=${IMAGE_REPO:-"opea"}
-IMAGE_TAG=${IMAGE_TAG:-"latest"}
-echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
-echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
-export REGISTRY=${IMAGE_REPO}
-export TAG=${IMAGE_TAG}
-
-WORKPATH=$(dirname "$PWD")
-LOG_PATH="$WORKPATH/tests"
-if ls $LOG_PATH/*.log 1> /dev/null 2>&1; then
-    rm $LOG_PATH/*.log
-    echo "Log files removed."
-else
-    echo "No log files to remove."
-fi
-ip_address=$(hostname -I | awk '{print $1}')
-
-
-function build_docker_images() {
-    cd $WORKPATH/docker_image_build
-    git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
-
-    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="avatarchatbot whisper-gaudi asr llm-tgi speecht5-gaudi tts wav2lip-gaudi animation"
-    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
-
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
-
-    docker images && sleep 1s
-}
-
-
-function start_services() {
-    cd $WORKPATH/docker_compose/intel/hpu/gaudi
-
-    export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
-    export host_ip=$(hostname -I | awk '{print $1}')
-
-    export TGI_LLM_ENDPOINT=http://$host_ip:3006
-    export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
-
-    export ASR_ENDPOINT=http://$host_ip:7066
-    export TTS_ENDPOINT=http://$host_ip:7055
-    export WAV2LIP_ENDPOINT=http://$host_ip:7860
-
-    export MEGA_SERVICE_HOST_IP=${host_ip}
-    export ASR_SERVICE_HOST_IP=${host_ip}
-    export TTS_SERVICE_HOST_IP=${host_ip}
-    export LLM_SERVICE_HOST_IP=${host_ip}
-    export ANIMATION_SERVICE_HOST_IP=${host_ip}
-
-    export MEGA_SERVICE_PORT=8888
-    export ASR_SERVICE_PORT=3001
-    export TTS_SERVICE_PORT=3002
-    export LLM_SERVICE_PORT=3007
-    export ANIMATION_SERVICE_PORT=3008
-
-    export DEVICE="hpu"
-    export WAV2LIP_PORT=7860
-    export INFERENCE_MODE='wav2lip+gfpgan'
-    export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
-    export FACE="assets/img/avatar1.jpg"
-    # export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
-    export AUDIO='None'
-    export FACESIZE=96
-    export OUTFILE="/outputs/result.mp4"
-    export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
-    export UPSCALE_FACTOR=1
-    export FPS=10
-
-    # Start Docker Containers
-    docker compose up -d
-
-    n=0
-    until [[ "$n" -ge 100 ]]; do
-       docker logs tgi-gaudi-server > $LOG_PATH/tgi_service_start.log
-       if grep -q Connected $LOG_PATH/tgi_service_start.log; then
-           break
-       fi
-       sleep 5s
-       n=$((n+1))
-    done
-
-    # sleep 5m
-    echo "All services are up and running"
-    sleep 5s
-}
-
-
-function validate_megaservice() {
-    cd $WORKPATH
-    result=$(http_proxy="" curl http://${ip_address}:3009/v1/avatarchatbot -X POST -d @assets/audio/sample_whoareyou.json -H 'Content-Type: application/json')
-    echo "result is === $result"
-    if [[ $result == *"mp4"* ]]; then
-        echo "Result correct."
-    else
-        docker logs whisper-service > $LOG_PATH/whisper-service.log
-        docker logs asr-service > $LOG_PATH/asr-service.log
-        docker logs speecht5-service > $LOG_PATH/speecht5-service.log
-        docker logs tts-service > $LOG_PATH/tts-service.log
-        docker logs tgi-gaudi-server > $LOG_PATH/tgi-gaudi-server.log
-        docker logs llm-tgi-gaudi-server > $LOG_PATH/llm-tgi-gaudi-server.log
-        docker logs wav2lip-service > $LOG_PATH/wav2lip-service.log
-        docker logs animation-gaudi-server > $LOG_PATH/animation-gaudi-server.log
-
-        echo "Result wrong."
-        exit 1
-    fi
-
-}
-
-
-#function validate_frontend() {
-
-#}
-
-
-function stop_docker() {
-    cd $WORKPATH/docker_compose/intel/hpu/gaudi
-    docker compose down
-}
-
-
-function main() {
-    stop_docker
-    echo y | docker builder prune --all
-    echo y | docker image prune
-
-    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
-    start_services
-    # validate_microservices
-    validate_megaservice
-    # validate_frontend
-
-    stop_docker
-    echo y | docker builder prune --all
-    echo y | docker image prune
-
-}
-
-
-main
--- a/AvatarChatbot/tests/test_compose_on_xeon.sh
+++ b/AvatarChatbot/tests/test_compose_on_xeon.sh
@@ -1,142 +0,0 @@
-#!/bin/bash
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-set -e
-IMAGE_REPO=${IMAGE_REPO:-"opea"}
-IMAGE_TAG=${IMAGE_TAG:-"latest"}
-echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
-echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
-export REGISTRY=${IMAGE_REPO}
-export TAG=${IMAGE_TAG}
-
-WORKPATH=$(dirname "$PWD")
-LOG_PATH="$WORKPATH/tests"
-if ls $LOG_PATH/*.log 1> /dev/null 2>&1; then
-    rm $LOG_PATH/*.log
-    echo "Log files removed."
-else
-    echo "No log files to remove."
-fi
-ip_address=$(hostname -I | awk '{print $1}')
-
-
-function build_docker_images() {
-    cd $WORKPATH/docker_image_build
-    git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
-
-    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="avatarchatbot whisper asr llm-tgi speecht5 tts wav2lip animation"
-    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
-
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
-
-    docker images && sleep 1s
-}
-
-
-function start_services() {
-    cd $WORKPATH/docker_compose/intel/cpu/xeon
-
-    export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
-    export host_ip=$(hostname -I | awk '{print $1}')
-
-    export TGI_LLM_ENDPOINT=http://$host_ip:3006
-    export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
-
-    export ASR_ENDPOINT=http://$host_ip:7066
-    export TTS_ENDPOINT=http://$host_ip:7055
-    export WAV2LIP_ENDPOINT=http://$host_ip:7860
-
-    export MEGA_SERVICE_HOST_IP=${host_ip}
-    export ASR_SERVICE_HOST_IP=${host_ip}
-    export TTS_SERVICE_HOST_IP=${host_ip}
-    export LLM_SERVICE_HOST_IP=${host_ip}
-    export ANIMATION_SERVICE_HOST_IP=${host_ip}
-
-    export MEGA_SERVICE_PORT=8888
-    export ASR_SERVICE_PORT=3001
-    export TTS_SERVICE_PORT=3002
-    export LLM_SERVICE_PORT=3007
-    export ANIMATION_SERVICE_PORT=3008
-
-    export DEVICE="cpu"
-    export WAV2LIP_PORT=7860
-    export INFERENCE_MODE='wav2lip+gfpgan'
-    export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
-    export FACE="assets/img/avatar5.png"
-    # export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
-    export AUDIO='None'
-    export FACESIZE=96
-    export OUTFILE="/outputs/result.mp4"
-    export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
-    export UPSCALE_FACTOR=1
-    export FPS=10
-
-    # Start Docker Containers
-    docker compose up -d
-    n=0
-    until [[ "$n" -ge 100 ]]; do
-       docker logs tgi-service > $LOG_PATH/tgi_service_start.log
-       if grep -q Connected $LOG_PATH/tgi_service_start.log; then
-           break
-       fi
-       sleep 5s
-       n=$((n+1))
-    done
-    echo "All services are up and running"
-    sleep 5s
-}
-
-
-function validate_megaservice() {
-    cd $WORKPATH
-    result=$(http_proxy="" curl http://${ip_address}:3009/v1/avatarchatbot -X POST -d @assets/audio/sample_whoareyou.json -H 'Content-Type: application/json')
-    echo "result is === $result"
-    if [[ $result == *"mp4"* ]]; then
-        echo "Result correct."
-    else
-        docker logs whisper-service > $LOG_PATH/whisper-service.log
-        docker logs asr-service > $LOG_PATH/asr-service.log
-        docker logs speecht5-service > $LOG_PATH/speecht5-service.log
-        docker logs tts-service > $LOG_PATH/tts-service.log
-        docker logs tgi-service > $LOG_PATH/tgi-service.log
-        docker logs llm-tgi-server > $LOG_PATH/llm-tgi-server.log
-        docker logs wav2lip-service > $LOG_PATH/wav2lip-service.log
-        docker logs animation-server > $LOG_PATH/animation-server.log
-
-        echo "Result wrong."
-        exit 1
-    fi
-
-}
-
-
-#function validate_frontend() {
-
-#}
-
-
-function stop_docker() {
-    cd $WORKPATH/docker_compose/intel/cpu/xeon
-    docker compose down
-}
-
-
-function main() {
-
-    stop_docker
-    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
-    start_services
-    # validate_microservices
-    validate_megaservice
-    # validate_frontend
-    stop_docker
-
-    echo y | docker builder prune --all
-    echo y | docker image prune
-
-}
-
-
-main
--- a/AvatarChatbot/ui/gradio/app_gradio_demo_avatarchatbot.py
+++ b/AvatarChatbot/ui/gradio/app_gradio_demo_avatarchatbot.py
@@ -1,349 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import asyncio
-import base64
-import io
-import os
-import shutil
-import subprocess
-import time
-
-import aiohttp
-import docker
-import ffmpeg
-import gradio as gr
-import numpy as np
-import soundfile as sf
-from PIL import Image
-
-
-# %% Docker Management
-def update_env_var_in_container(container_name, env_var, new_value):
-    return
-
-
-# %% AudioQnA functions
-def preprocess_audio(audio):
-    """The audio data is a 16-bit integer array with values ranging from -32768 to 32767 and the shape of the audio data array is (samples,)"""
-    sr, y = audio
-
-    # Convert to normalized float32 audio
-    y = y.astype(np.float32)
-    y /= np.max(np.abs(y))
-
-    # Save to memory
-    buf = io.BytesIO()
-    sf.write(buf, y, sr, format="WAV")
-    buf.seek(0)  # Reset the buffer position to the beginning
-
-    # Encode the WAV file to base64 string
-    base64_bytes = base64.b64encode(buf.read())
-    base64_string = base64_bytes.decode("utf-8")
-    return base64_string
-
-
-def base64_to_int16(base64_string):
-    wav_bytes = base64.b64decode(base64_string)
-    buf = io.BytesIO(wav_bytes)
-    y, sr = sf.read(buf, dtype="int16")
-    return sr, y
-
-
-async def transcribe(audio_input, face_input, model_choice):
-    """Input: mic audio; Output: ai audio, text, text"""
-    global ai_chatbot_url, chat_history, count
-    chat_history = ""
-    # Preprocess the audio
-    base64bytestr = preprocess_audio(audio_input)
-
-    # Send the audio to the AvatarChatbot backend server endpoint
-    initial_inputs = {"audio": base64bytestr, "max_tokens": 64}
-
-    # TO-DO: update wav2lip-service with the chosen face_input
-    # update_env_var_in_container("wav2lip-service", "DEVICE", "new_device_value")
-
-    async with aiohttp.ClientSession() as session:
-        async with session.post(ai_chatbot_url, json=initial_inputs) as response:
-
-            # Check the response status code
-            if response.status == 200:
-                # response_json = await response.json()
-                # # Decode the base64 string
-                # sampling_rate, audio_int16 = base64_to_int16(response_json["byte_str"])
-                # chat_history += f"User: {response_json['query']}\n\n"
-                # chat_ai = response_json["text"]
-                # hitted_ends = [",", ".", "?", "!", "。", ";"]
-                # last_punc_idx = max([chat_ai.rfind(punc) for punc in hitted_ends])
-                # if last_punc_idx != -1:
-                #     chat_ai = chat_ai[: last_punc_idx + 1]
-                # chat_history += f"AI: {chat_ai}"
-                # chat_history = chat_history.replace("OPEX", "OPEA")
-                # return (sampling_rate, audio_int16)  # handle the response
-
-                result = await response.text()
-                return "docker_compose/intel/hpu/gaudi/result.mp4"
-            else:
-                return {"error": "Failed to transcribe audio", "status_code": response.status_code}
-
-
-def resize_image(image_pil, size=(720, 720)):
-    """Resize the image to the specified size."""
-    return image_pil.resize(size, Image.LANCZOS)
-
-
-def resize_video(video_path, save_path, size=(720, 1280)):
-    """Resize the video to the specified size, and save to the save path."""
-    ffmpeg.input(video_path).output(save_path, vf=f"scale={size[0]}:{size[1]}").overwrite_output().run()
-
-
-# %% AI Avatar demo function
-async def aiavatar_demo(audio_input, face_input, model_choice):
-    """Input: mic/preloaded audio, avatar file path;
-    Output: ai video"""
-    # Wait for response from AvatarChatbot backend
-    output_video = await transcribe(audio_input, face_input, model_choice)  # output video path
-
-    if isinstance(output_video, dict):  # in case of an error
-        return None, None
-    else:
-        return output_video
-
-
-# %% Main
-if __name__ == "__main__":
-    # HOST_IP = os.getenv("host_ip")
-    HOST_IP = subprocess.check_output("hostname -I | awk '{print $1}'", shell=True).decode("utf-8").strip()
-
-    # Fetch the AudioQnA backend server
-    ai_chatbot_url = f"http://{HOST_IP}:3009/v1/avatarchatbot"
-
-    # Collect chat history to print in the interface
-    chat_history = ""
-
-    # Prepare 3 image paths and 3 video paths
-    # image_pils = [
-    #     Image.open(os.path.join("assets/img/woman1.png")),
-    #     Image.open(os.path.join("assets/img/man1.png")),
-    #     Image.open(os.path.join("assets/img/woman2.png")),
-    # ]
-
-    # video_paths = [
-    #     os.path.join("assets/video/man1.mp4"),
-    #     os.path.join("assets/video/woman2.mp4"),
-    #     os.path.join("assets/video/man4.mp4"),
-    # ]
-
-    def image_to_base64(image_path):
-        with open(image_path, "rb") as image_file:
-            return base64.b64encode(image_file.read()).decode("utf-8")
-
-    # Convert your images to Base64
-    xeon_base64 = image_to_base64("assets/img/xeon.jpg")
-    gaudi_base64 = image_to_base64("assets/img/gaudi.png")
-
-    # List of prerecorded WAV files containing audio questions
-    # audio_filepaths = [
-    #     "assets/audio/intel2.wav",
-    #     "assets/audio/intel4.wav",
-    # ]
-    # audio_questions = [
-    #     "1. What's the objective of the Open Platform for Enterprise AI? How is it helpful to enterprises building AI solutions?",
-    #     "2. What kinds of Intel AI tools are available to accelerate AI workloads?",
-    # ]
-
-    # Demo frontend
-    demo = gr.Blocks()
-    with demo:
-        # Define processing functions
-        count = 0
-
-        # Make necessary folders:
-        if not os.path.exists("inputs"):
-            os.makedirs("inputs")
-        if not os.path.exists("outputs"):
-            os.makedirs("outputs")
-
-        def initial_process(audio_input, face_input, model_choice):
-            global count
-            start_time = time.time()
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-            video_file = loop.run_until_complete(aiavatar_demo(audio_input, face_input, model_choice))
-            count += 1
-            end_time = time.time()
-            return video_file, f"The entire application took {(end_time - start_time):.1f} seconds"
-
-        # def update_selected_image_state(image_index):
-        #     image_index = int(image_index)
-        #     selected_image_state.value = image_index
-        #     # change image_input here
-        #     if image_index < len(image_pils):
-        #         return f"inputs/face_{image_index}.png"
-        #     else:
-        #         return f"inputs/video_{image_index - len(image_pils)}.mp4"
-
-        # def update_audio_input(audio_choice):
-        #     if audio_choice:
-        #         audio_index = int(audio_choice.split(".")[0]) - 1
-        #         audio_filepath_gradio = f"inputs/audio_{audio_index:d}.wav"
-        #         shutil.copyfile(audio_filepaths[audio_index], audio_filepath_gradio)
-        #         return audio_filepath_gradio
-
-        # UI Components
-        # Title & Introduction
-        gr.Markdown("<h1 style='font-size: 36px;'>A PyTorch and OPEA based AI Avatar Audio Chatbot</h1>")
-        with gr.Row():
-            with gr.Column(scale=8):
-                gr.Markdown(
-                    """
-                <p style='font-size: 24px;'>Welcome to our AI Avatar Audio Chatbot! This application leverages PyTorch and <strong>OPEA (Open Platform for Enterprise AI) v0.8</strong> to provide you with a human-like conversational experience. It's run on Intel® Gaudi® AI Accelerator and Intel® Xeon® Processor, with hardware and software optimizations.<br>
-                Please feel free to interact with the AI avatar by choosing your own avatar and talking into the mic.</p>
-                            """
-                )
-            with gr.Column(scale=1):
-                # with gr.Row():
-                #     gr.Markdown(f"""
-                #     <img src='data:image/png;base64,{opea_qr_base64}' alt='OPEA QR Code' style='width: 150px; height: auto;'>
-                #     """, label="OPEA QR Code")
-                #     gr.Markdown(f"""
-                #     <img src='data:image/png;base64,{opea_gh_qr_base64}' alt='OPEA GitHub QR Code' style='width: 150px; height: auto;'>
-                #     """, label="OPEA GitHub QR Code")
-                with gr.Row():
-                    gr.Markdown(
-                        f"""
-                    <img src='data:image/png;base64,{gaudi_base64}' alt='Intel®Gaudi' style='width: 120px; height: auto;'>""",
-                        label="Intel®Gaudi",
-                    )
-                    gr.Markdown(
-                        f"""
-                    <img src='data:image/png;base64,{xeon_base64}' alt='Intel®Xeon' style='width: 120px; height: auto;'>""",
-                        label="Intel®Xeon",
-                    )
-        gr.Markdown("<hr>")  # Divider
-
-        # Inputs
-        # Image gallery
-        selected_image_state = gr.State(value=-1)
-        image_clicks = []
-        image_click_buttons = []
-        video_clicks = []
-        video_click_buttons = []
-        with gr.Row():
-            with gr.Column(scale=1):
-                audio_input = gr.Audio(
-                    sources=["upload", "microphone"], format="wav", label="🎤 or 📤 for your Input audio!"
-                )
-                # audio_choice = gr.Dropdown(
-                #     choices=audio_questions,
-                #     label="Choose an audio question",
-                #     value=None,  # default value
-                # )
-                # Update audio_input when a selection is made from the dropdown
-                # audio_choice.change(fn=update_audio_input, inputs=audio_choice, outputs=audio_input)
-
-                face_input = gr.File(
-                    file_count="single",
-                    file_types=["image", "video"],
-                    label="Choose an avatar or 📤 an image or video!",
-                )
-                model_choice = gr.Dropdown(
-                    choices=["wav2lip", "wav2lip+GAN", "wav2lip+GFPGAN"],
-                    label="Choose a DL model",
-                )
-            # with gr.Column(scale=2):
-            #     # Display 3 images and buttons
-            #     with gr.Row():
-            #         for i, image_pil in enumerate(image_pils):
-            #             image_pil = resize_image(image_pil)
-            #             save_path = f"inputs/face_{int(i)}.png"
-            #             image_pil.save(save_path, "PNG")
-            #             image_clicks.append(gr.Image(type="filepath", value=save_path, label=f"Avatar {int(i)+1}"))
-            #     with gr.Row():
-            #         for i in range(len(image_pils)):
-            #             image_click_buttons.append(gr.Button(f"Use Image {i+1}"))
-
-            #     # Display 3 videos and buttons
-            #     with gr.Row():
-            #         for i, video_path in enumerate(video_paths):
-            #             save_path = f"inputs/video_{int(i)}.mp4"
-            #             resize_video(video_path, save_path)
-            #             video_clicks.append(gr.Video(value=save_path, label=f"Video {int(i)+1}"))
-            #     with gr.Row():
-            #         for i in range(len(video_paths)):
-            #             video_click_buttons.append(gr.Button(f"Use Video {int(i)+1}"))
-
-        submit_button = gr.Button("Submit")
-
-        # Outputs
-        gr.Markdown("<hr>")  # Divider
-        with gr.Row():
-            with gr.Column():
-                video_output = gr.Video(label="Your AI Avatar video: ", format="mp4", width=1280, height=720)
-                video_time_text = gr.Textbox(label="Video processing time", value="0.0 seconds")
-
-        # Technical details
-        gr.Markdown("<hr>")  # Divider
-        with gr.Row():
-            gr.Markdown(
-                """
-                <p style='font-size: 24px;'>OPEA megaservice deployed: <br>
-                <ul style='font-size: 24px;'>
-                    <li><strong>AvatarChatbot</strong></li>
-                </ul></p>
-                <p style='font-size: 24px;'>OPEA microservices deployed:
-                <ul style='font-size: 24px;'>
-                    <li><strong>ASR</strong> (service: opea/whisper-gaudi, model: openai/whisper-small)</li>
-                    <li><strong>LLM 'text-generation'</strong> (service: opea/llm-tgi, model: Intel/neural-chat-7b-v3-3)</li>
-                    <li><strong>TTS</strong> (service: opea/speecht5-gaudi, model: microsoft/speecht5_tts)</li>
-                    <li><strong>Animation</strong> (service: opea/animation, model: wav2lip+gfpgan)</li>
-                </ul></p>
-                        """
-            )
-        with gr.Row():
-            gr.Image("assets/img/flowchart.png", label="Megaservice Flowchart")
-        with gr.Row():
-            gr.Markdown(
-                """
-            <p style='font-size: 24px;'>The AI Avatar Audio Chatbot is powered by the following Intel® AI software:<br>
-                        <ul style='font-size: 24px;'>
-                        <li><strong>Intel Gaudi Software v1.17.0</strong></li>
-                        <li><strong>PyTorch v2.3.1 (Eager mode + torch.compile) </strong></li>
-                        <li><strong>HPU Graph</strong></li>
-                        <li><strong>Intel Neural Compressor (INC)</strong></li>
-                        </ul></p>
-                        """
-            )
-
-        # Disclaimer
-        gr.Markdown("<hr>")  # Divider
-        gr.Markdown("<h2 style='font-size: 24px;'>Notices & Disclaimers</h1>")
-        gr.Markdown(
-            """
-                    <p style='font-size: 20px;'>Intel is committed to respecting human rights and avoiding complicity in human rights abuses. See Intel's Global Human Rights Principles. Intel's products and software are intended only to be used in applications that do not cause or contribute to a violation of an internationally recognized human right.<br></p>
-                    <p style='font-size: 20px;'>© Intel Corporation.  Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries.  Other names and brands may be claimed as the property of others.<br></p>
-                    <p style='font-size: 20px;'>You may not use or facilitate the use of this document in connection with any infringement or other legal analysis concerning Intel products described herein. You agree to grant Intel a non-exclusive, royalty-free license to any patent claim thereafter drafted which includes subject matter disclosed herein.<br></p>
-                    """
-        )
-
-        # State transitions
-        # for i in range(len(image_pils)):
-        #     image_click_buttons[i].click(
-        #         update_selected_image_state, inputs=[gr.Number(value=i, visible=False)], outputs=[face_input]
-        #     )
-        # for i in range(len(video_paths)):
-        #     video_click_buttons[i].click(
-        #         update_selected_image_state,
-        #         inputs=[gr.Number(value=i + len(image_pils), visible=False)],
-        #         outputs=[face_input],
-        #     )
-        submit_button.click(
-            initial_process,
-            inputs=[audio_input, face_input, model_choice],
-            outputs=[
-                video_output,
-                video_time_text,
-            ],
-        )
-
-        demo.queue().launch(server_name="0.0.0.0", server_port=7861)
--- a/ChatQnA/Dockerfile
+++ b/ChatQnA/Dockerfile
@@ -19,10 +19,10 @@ RUN git clone https://github.com/opea-project/GenAIComps.git

 WORKDIR /home/user/GenAIComps
 RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
-    pip install --no-cache-dir langchain_core
+    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt

 COPY ./chatqna.py /home/user/chatqna.py
+COPY ./gateway.py /home/user/gateway.py

 ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps

--- a/ChatQnA/Dockerfile.guardrails
+++ b/ChatQnA/Dockerfile.guardrails
@@ -19,10 +19,9 @@ RUN git clone https://github.com/opea-project/GenAIComps.git

 WORKDIR /home/user/GenAIComps
 RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
-    pip install --no-cache-dir langchain_core
+    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt

-COPY ./chatqna.py /home/user/chatqna.py
+COPY ./chatqna_guardrails.py /home/user/chatqna_guardrails.py

 ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps

@@ -32,4 +31,4 @@ WORKDIR /home/user

 RUN echo 'ulimit -S -n 999999' >> ~/.bashrc

-ENTRYPOINT ["python", "chatqna.py", "--with-guardrails"]
+ENTRYPOINT ["python", "chatqna_guardrails.py"]
--- a/ChatQnA/Dockerfile.no_wrapper
+++ b/ChatQnA/Dockerfile.no_wrapper
@@ -1,12 +1,15 @@
+
+
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

 FROM python:3.11-slim

 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
-    git \
    libgl1-mesa-glx \
-    libjemalloc-dev
+    libjemalloc-dev \
+    vim \
+    git

 RUN useradd -m -s /bin/bash user && \
    mkdir -p /home/user && \
@@ -20,7 +23,7 @@ RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
    pip install --no-cache-dir langchain_core

-COPY ./graphrag.py /home/user/graphrag.py
+COPY ./chatqna_no_wrapper.py /home/user/chatqna_no_wrapper.py

 ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps

@@ -28,6 +31,4 @@ USER user

 WORKDIR /home/user

-RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
-
-ENTRYPOINT ["python", "graphrag.py"]
+ENTRYPOINT ["python", "chatqna_no_wrapper.py"]
--- a/ChatQnA/Dockerfile.no_wrapper_without_rerank
+++ b/ChatQnA/Dockerfile.no_wrapper_without_rerank
@@ -17,12 +17,13 @@ RUN useradd -m -s /bin/bash user && \

 WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git
+
 WORKDIR /home/user/GenAIComps
-
 RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
+    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
+    pip install --no-cache-dir langchain_core

-COPY ./avatarchatbot.py /home/user/avatarchatbot.py
+COPY ./chatqna_no_wrapper.py /home/user/chatqna_no_wrapper.py

 ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps

@@ -30,4 +31,4 @@ USER user

 WORKDIR /home/user

-ENTRYPOINT ["python", "avatarchatbot.py"]
+ENTRYPOINT ["python", "chatqna_no_wrapper.py", "--without-rerank"]
--- a/ChatQnA/Dockerfile.without_rerank
+++ b/ChatQnA/Dockerfile.without_rerank
@@ -6,9 +6,9 @@
 FROM python:3.11-slim

 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
-    git \
    libgl1-mesa-glx \
-    libjemalloc-dev
+    libjemalloc-dev \
+    git

 RUN useradd -m -s /bin/bash user && \
    mkdir -p /home/user && \
@@ -19,10 +19,9 @@ RUN git clone https://github.com/opea-project/GenAIComps.git

 WORKDIR /home/user/GenAIComps
 RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
-    pip install --no-cache-dir langchain_core
+    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt

-COPY ./chatqna.py /home/user/chatqna.py
+COPY ./chatqna_without_rerank.py /home/user/chatqna_without_rerank.py

 ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps

@@ -32,4 +31,4 @@ WORKDIR /home/user

 RUN echo 'ulimit -S -n 999999' >> ~/.bashrc

-ENTRYPOINT ["python", "chatqna.py", "--without-rerank"]
+ENTRYPOINT ["python", "chatqna_without_rerank.py"]
--- a/ChatQnA/README.md
+++ b/ChatQnA/README.md
@@ -16,8 +16,6 @@ Quick Start Deployment Steps:
 2. Run Docker Compose.
 3. Consume the ChatQnA Service.

-Note: If you do not have docker installed you can run this script to install docker : `bash docker_compose/install_docker.sh`
-
 ### Quick Start: 1.Setup Environment Variable

 To set up environment variables for deploying ChatQnA services, follow these steps:
@@ -55,7 +53,6 @@ To set up environment variables for deploying ChatQnA services, follow these ste
 ### Quick Start: 2.Run Docker Compose

 Select the compose.yaml file that matches your hardware.
-
 CPU example:

 ```bash
@@ -72,13 +69,9 @@ docker pull opea/chatqna:latest
 docker pull opea/chatqna-ui:latest
 ```

-In following cases, you could build docker image from source by yourself.
+If you want to build docker by yourself, please refer to `built from source`: [Guide](docker_compose/intel/cpu/xeon/README.md).

- Failed to download the docker image.
-
- If you want to use a specific version of Docker image.
-
-Please refer to the 'Build Docker Images' in [Guide](docker_compose/intel/cpu/xeon/README.md).
+> Note: The optional docker image **opea/chatqna-without-rerank:latest** has not been published yet, users need to build this docker image from source.

 ### QuickStart: 3.Consume the ChatQnA Service

@@ -206,6 +199,8 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/
 docker compose up -d
 ```

+> Notice: Currently only the **Habana Driver 1.16.x** is supported for Gaudi.
+
 Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) to build docker images from source.

 ### Deploy ChatQnA on Xeon
@@ -240,7 +235,7 @@ Refer to the [Kubernetes Guide](./kubernetes/intel/README.md) for instructions o

 Install Helm (version >= 3.15) first. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.

-Refer to the [ChatQnA helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna/README.md) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi.
+Refer to the [ChatQnA helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi.

 ### Deploy ChatQnA on AI PC

@@ -250,9 +245,7 @@ Refer to the [AI PC Guide](./docker_compose/intel/cpu/aipc/README.md) for instru

 Refer to the [Intel Technology enabling for Openshift readme](https://github.com/intel/intel-technology-enabling-for-openshift/blob/main/workloads/opea/chatqna/README.md) for instructions to deploy ChatQnA prototype on RHOCP with [Red Hat OpenShift AI (RHOAI)](https://www.redhat.com/en/technologies/cloud-computing/openshift/openshift-ai).

-## Consume ChatQnA Service with RAG
-
-### Check Service Status
+## Consume ChatQnA Service

 Before consuming ChatQnA Service, make sure the TGI/vLLM service is ready (which takes up to 2 minutes to start).

@@ -267,23 +260,6 @@ Consume ChatQnA service until you get the TGI response like below.
 2024-09-03T02:47:53.402023Z  INFO text_generation_router::server: router/src/server.rs:2311: Connected
 ```

-### Upload RAG Files (Optional)
-
-To chat with retrieved information, you need to upload a file using `Dataprep` service.
-
-Here is an example of `Nike 2023` pdf.
-
-```bash
-# download pdf file
-wget https://raw.githubusercontent.com/opea-project/GenAIComps/main/comps/retrievers/redis/data/nke-10k-2023.pdf
-# upload pdf file with dataprep
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
-    -H "Content-Type: multipart/form-data" \
-    -F "files=@./nke-10k-2023.pdf"
-```
-
-### Consume Chat Service
-
 Two ways of consuming ChatQnA Service:

 1. Use cURL command on terminal
@@ -306,7 +282,7 @@ Two ways of consuming ChatQnA Service:

 ## Troubleshooting

-1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker_compose/intel/cpu/xeon/README.md#validate-microservices) first. A simple example:
+1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker_compose/intel/cpu/xeon#validate-microservices) first. A simple example:

   ```bash
   http_proxy="" curl ${host_ip}:6006/embed -X POST  -d '{"inputs":"What is Deep Learning?"}' -H 'Content-Type: application/json'
--- a/ChatQnA/benchmark/performance/README.md
+++ b/ChatQnA/benchmark/performance/README.md
@@ -29,8 +29,6 @@ Results will be displayed in the terminal and saved as CSV file named `1_stats.c

 ## Getting Started

-We recommend using Kubernetes to deploy the ChatQnA service, as it offers benefits such as load balancing and improved scalability. However, you can also deploy the service using Docker if that better suits your needs. Below is a description of Kubernetes deployment and benchmarking. For instructions on deploying and benchmarking with Docker, please refer to [this section](#benchmark-with-docker).
-
 ### Prerequisites

 - Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md).
@@ -69,7 +67,7 @@ We have created the [BKC manifest](https://github.com/opea-project/GenAIExamples
 ```bash
 # on k8s-master node
 git clone https://github.com/opea-project/GenAIExamples.git
-cd GenAIExamples/ChatQnA/benchmark/performance
+cd GenAIExamples/ChatQnA/benchmark

 # replace the image tag from latest to v0.9 since we want to test with v0.9 release
 IMAGE_TAG=v0.9
@@ -88,9 +86,22 @@ find . -name '*.yaml' -type f -exec sed -i "s#\$(EMBEDDING_MODEL_ID)#${EMBEDDING
 find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL_ID}#g" {} \;
 ```

+### Benchmark tool preparation
+
+The test uses the [benchmark tool](https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark) to do performance test. We need to set up benchmark tool at the master node of Kubernetes which is k8s-master.
+
+```bash
+# on k8s-master node
+git clone https://github.com/opea-project/GenAIEval.git
+cd GenAIEval
+python3 -m venv stress_venv
+source stress_venv/bin/activate
+pip install -r requirements.txt
+```
+
 ### Test Configurations

-By default, the workload and benchmark configuration is as below:
+Workload configuration:

 | Key      | Value   |
 | -------- | ------- |
@@ -133,11 +144,11 @@ kubectl label nodes k8s-worker1 node-type=chatqna-opea

 ##### 2. Install ChatQnA

-Go to [BKC manifest](./tuned/with_rerank/single_gaudi) and apply to K8s.
+Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark/tuned/with_rerank/single_gaudi) and apply to K8s.

 ```bash
 # on k8s-master node
-cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/single_gaudi
+cd GenAIExamples/ChatQnA/benchmark/tuned/with_rerank/single_gaudi
 kubectl apply -f .
 ```

@@ -176,21 +187,21 @@ curl -X POST "http://${cluster_ip}:6007/v1/dataprep" \

 ###### 3.2 Run Benchmark Test

-Before the benchmark, we can configure the number of test queries and test output directory by:
+We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.

 ```bash
-export USER_QUERIES="[640, 640, 640, 640]"
+export USER_QUERIES="[4, 8, 16, 640]"
 export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_1"
+envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
 ```

-And then run the benchmark by:
+And then run the benchmark tool by:

 ```bash
-bash benchmark.sh -n 1
+cd GenAIEval/evals/benchmark
+python benchmark.py
 ```

-The argument `-n` refers to the number of test nodes. Note that necessary dependencies will be automatically installed when running benchmark for the first time.
-
 ##### 4. Data collection

 All the test results will come to this folder `/home/sdp/benchmark_output/node_1` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
@@ -199,7 +210,7 @@ All the test results will come to this folder `/home/sdp/benchmark_output/node_1

 ```bash
 # on k8s-master node
-cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/single_gaudi
+cd GenAIExamples/ChatQnA/benchmark/tuned/with_rerank/single_gaudi
 kubectl delete -f .
 kubectl label nodes k8s-worker1 node-type-
 ```
@@ -216,31 +227,31 @@ kubectl label nodes k8s-worker1 k8s-worker2 node-type=chatqna-opea

 ##### 2. Install ChatQnA

-Go to [BKC manifest](./tuned/with_rerank/two_gaudi) and apply to K8s.
+Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark/tuned/with_rerank/two_gaudi) and apply to K8s.

 ```bash
 # on k8s-master node
-cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/two_gaudi
+cd GenAIExamples/ChatQnA/benchmark/tuned/with_rerank/two_gaudi
 kubectl apply -f .
 ```

 ##### 3. Run tests

-Before the benchmark, we can configure the number of test queries and test output directory by:
+We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.

 ```bash
-export USER_QUERIES="[1280, 1280, 1280, 1280]"
+export USER_QUERIES="[4, 8, 16, 1280]"
 export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_2"
+envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
 ```

-And then run the benchmark by:
+And then run the benchmark tool by:

 ```bash
-bash benchmark.sh -n 2
+cd GenAIEval/evals/benchmark
+python benchmark.py
 ```

-The argument `-n` refers to the number of test nodes. Note that necessary dependencies will be automatically installed when running benchmark for the first time.
-
 ##### 4. Data collection

 All the test results will come to this folder `/home/sdp/benchmark_output/node_2` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
@@ -265,31 +276,31 @@ kubectl label nodes k8s-master k8s-worker1 k8s-worker2 k8s-worker3 node-type=cha

 ##### 2. Install ChatQnA

-Go to [BKC manifest](./tuned/with_rerank/four_gaudi) and apply to K8s.
+Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark/tuned/with_rerank/four_gaudi) and apply to K8s.

 ```bash
 # on k8s-master node
-cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/four_gaudi
+cd GenAIExamples/ChatQnA/benchmark/tuned/with_rerank/four_gaudi
 kubectl apply -f .
 ```

 ##### 3. Run tests

-Before the benchmark, we can configure the number of test queries and test output directory by:
+We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.

 ```bash
-export USER_QUERIES="[2560, 2560, 2560, 2560]"
+export USER_QUERIES="[4, 8, 16, 2560]"
 export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_4"
+envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
 ```

-And then run the benchmark by:
+And then run the benchmark tool by:

 ```bash
-bash benchmark.sh -n 4
+cd GenAIEval/evals/benchmark
+python benchmark.py
 ```

-The argument `-n` refers to the number of test nodes. Note that necessary dependencies will be automatically installed when running benchmark for the first time.
-
 ##### 4. Data collection

 All the test results will come to this folder `/home/sdp/benchmark_output/node_4` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
@@ -298,81 +309,11 @@ All the test results will come to this folder `/home/sdp/benchmark_output/node_4

 ```bash
 # on k8s-master node
-cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/single_gaudi
+cd GenAIExamples/ChatQnA/benchmark/tuned/with_rerank/single_gaudi
 kubectl delete -f .
 kubectl label nodes k8s-master k8s-worker1 k8s-worker2 k8s-worker3 node-type-
 ```

-## Benchmark with Docker
+#### 6. Results

-### Deploy ChatQnA service with Docker
-
-In order to set up the environment correctly, you'll need to configure essential environment variables and, if applicable, proxy-related variables.
-
-```bash
-# Example: host_ip="192.168.1.1"
-export host_ip="External_Public_IP"
-# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-export no_proxy="Your_No_Proxy"
-export http_proxy="Your_HTTP_Proxy"
-export https_proxy="Your_HTTPs_Proxy"
-export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
-```
-
-#### Deploy ChatQnA on Gaudi
-
-```bash
-cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/
-docker compose up -d
-```
-
-Refer to the [Gaudi Guide](../../docker_compose/intel/hpu/gaudi/README.md) to build docker images from source.
-
-#### Deploy ChatQnA on Xeon
-
-```bash
-cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
-docker compose up -d
-```
-
-Refer to the [Xeon Guide](../../docker_compose/intel/cpu/xeon/README.md) for more instructions on building docker images from source.
-
-#### Deploy ChatQnA on NVIDIA GPU
-
-```bash
-cd GenAIExamples/ChatQnA/docker_compose/nvidia/gpu/
-docker compose up -d
-```
-
-Refer to the [NVIDIA GPU Guide](../../docker_compose/nvidia/gpu/README.md) for more instructions on building docker images from source.
-
-### Run tests
-
-Before the benchmark, we can configure the number of test queries and test output directory by:
-
-```bash
-export USER_QUERIES="[640, 640, 640, 640]"
-export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/docker"
-```
-
-And then run the benchmark by:
-
-```bash
-bash benchmark.sh -d docker -i <service-ip> -p <service-port>
-```
-
-The argument `-i` and `-p` refer to the deployed ChatQnA service IP and port, respectively. Note that necessary dependencies will be automatically installed when running benchmark for the first time.
-
-### Data collection
-
-All the test results will come to this folder `/home/sdp/benchmark_output/docker` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
-
-### Clean up
-
-Take gaudi as example, use the below command to clean up system.
-
-```bash
-cd GenAIExamples/docker_compose/intel/hpu/gaudi
-docker compose stop && docker compose rm -f
-echo y | docker system prune
-```
+Check OOB performance data [here](/opea_release_data.md#chatqna), tuned performance data will be released soon.
--- a/ChatQnA/benchmark/accuracy/README.md
+++ b/ChatQnA/benchmark/accuracy/README.md
@@ -1,170 +0,0 @@
-# ChatQnA Accuracy
-
-ChatQnA is a Retrieval-Augmented Generation (RAG) pipeline, which can enhance generative models through external information retrieval.
-
-For evaluating the accuracy, we use 2 latest published datasets and 10+ metrics which are popular and comprehensive:
-
- Dataset
-  - [MultiHop](https://arxiv.org/pdf/2401.15391) (English dataset)
-  - [CRUD](https://arxiv.org/abs/2401.17043) (Chinese dataset)
- metrics (measure accuracy of both the context retrieval and response generation)
-  - evaluation for retrieval/reranking
-    - MRR@10
-    - MAP@10
-    - Hits@10
-    - Hits@4
-    - LLM-as-a-Judge
-  - evaluation for the generated response from the end-to-end pipeline
-    - BLEU
-    - ROGUE(L)
-    - LLM-as-a-Judge
-
-## Prerequisite
-
-### Environment
-
-```bash
-git clone https://github.com/opea-project/GenAIEval
-cd GenAIEval
-pip install -r requirements.txt
-pip install -e .
-```
-
-## MultiHop (English dataset)
-
-[MultiHop-RAG](https://arxiv.org/pdf/2401.15391): a QA dataset to evaluate retrieval and reasoning across documents with metadata in the RAG pipelines. It contains 2556 queries, with evidence for each query distributed across 2 to 4 documents. The queries also involve document metadata, reflecting complex scenarios commonly found in real-world RAG applications.
-
-### Launch Service of RAG System
-
-Please refer to this [guide](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/README.md) to launch the service of `ChatQnA`.
-
-### Launch Service of LLM-as-a-Judge
-
-To setup a LLM model, we can use [tgi-gaudi](https://github.com/huggingface/tgi-gaudi) to launch a service. For example, the follow command is to setup the [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) model on 2 Gaudi2 cards:
-
-```
-# please set your llm_port and hf_token
-
-docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2
-
-# for better performance, set `PREFILL_BATCH_BUCKET_SIZE`, `BATCH_BUCKET_SIZE`, `max-batch-total-tokens`, `max-batch-prefill-tokens`
-docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} -e PREFILL_BATCH_BUCKET_SIZE=1 -e BATCH_BUCKET_SIZE=8 --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 2048
-```
-
-### Prepare Dataset
-
-We use the evaluation dataset from [MultiHop-RAG](https://github.com/yixuantt/MultiHop-RAG) repo, use the below command to prepare the dataset.
-
-```bash
-git clone https://github.com/yixuantt/MultiHop-RAG.git
-```
-
-### Evaluation
-
-Use below command to run the evaluation, please note that for the first run, argument `--ingest_docs` should be added in the command to ingest the documents into the vector database, while for the subsequent run, this argument should be omitted. Set `--retrieval_metrics` to get retrieval related metrics (MRR@10/MAP@10/Hits@10/Hits@4). Set `--ragas_metrics` and `--llm_endpoint` to get end-to-end rag pipeline metrics (faithfulness/answer_relevancy/...), which are judged by LLMs. We set `--limits` is 100 as default, which means only 100 examples are evaluated by llm-as-judge as it is very time consuming.
-
-If you are using docker compose to deploy `ChatQnA` system, you can simply run the evaluation as following:
-
-```bash
-python eval_multihop.py --docs_path MultiHop-RAG/dataset/corpus.json  --dataset_path MultiHop-RAG/dataset/MultiHopRAG.json --ingest_docs --retrieval_metrics --ragas_metrics --llm_endpoint http://{llm_as_judge_ip}:{llm_as_judge_port}/generate
-```
-
-If you are using Kubernetes manifest/helm to deploy `ChatQnA` system, you must specify more arguments as following:
-
-```bash
-python eval_multihop.py --docs_path MultiHop-RAG/dataset/corpus.json  --dataset_path MultiHop-RAG/dataset/MultiHopRAG.json --ingest_docs --retrieval_metrics --ragas_metrics --llm_endpoint http://{llm_as_judge_ip}:{llm_as_judge_port}/generate --database_endpoint http://{your_dataprep_ip}:{your_dataprep_port}/v1/dataprep --embedding_endpoint http://{your_embedding_ip}:{your_embedding_port}/v1/embeddings --tei_embedding_endpoint http://{your_tei_embedding_ip}:{your_tei_embedding_port} --retrieval_endpoint http://{your_retrieval_ip}:{your_retrieval_port}/v1/retrieval --service_url http://{your_chatqna_ip}:{your_chatqna_port}/v1/chatqna
-```
-
-The default values for arguments are:
-|Argument|Default value|
-|--------|-------------|
-|service_url|http://localhost:8888/v1/chatqna|
-|database_endpoint|http://localhost:6007/v1/dataprep|
-|embedding_endpoint|http://localhost:6000/v1/embeddings|
-|tei_embedding_endpoint|http://localhost:8090|
-|retrieval_endpoint|http://localhost:7000/v1/retrieval|
-|reranking_endpoint|http://localhost:8000/v1/reranking|
-|output_dir|./output|
-|temperature|0.1|
-|max_new_tokens|1280|
-|chunk_size|256|
-|chunk_overlap|100|
-|search_type|similarity|
-|retrival_k|10|
-|fetch_k|20|
-|lambda_mult|0.5|
-|dataset_path|None|
-|docs_path|None|
-|limits|100|
-
-You can check arguments details use below command:
-
-```bash
-python eval_multihop.py --help
-```
-
-## CRUD (Chinese dataset)
-
-[CRUD-RAG](https://arxiv.org/abs/2401.17043) is a Chinese benchmark for RAG (Retrieval-Augmented Generation) system. This example utilize CRUD-RAG for evaluating the RAG system.
-
-### Prepare Dataset
-
-We use the evaluation dataset from [CRUD-RAG](https://github.com/IAAR-Shanghai/CRUD_RAG) repo, use the below command to prepare the dataset.
-
-```bash
-git clone https://github.com/IAAR-Shanghai/CRUD_RAG
-mkdir data/
-cp CRUD_RAG/data/crud_split/split_merged.json data/
-cp -r CRUD_RAG/data/80000_docs/ data/
-python process_crud_dataset.py
-```
-
-### Launch Service of RAG System
-
-Please refer to this [guide](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/README.md) to launch the service of `ChatQnA` system. For Chinese dataset, you should replace the English emebdding and llm model with Chinese, for example, `EMBEDDING_MODEL_ID="BAAI/bge-base-zh-v1.5"` and `LLM_MODEL_ID=Qwen/Qwen2-7B-Instruct`.
-
-### Evaluation
-
-Use below command to run the evaluation, please note that for the first run, argument `--ingest_docs` should be added in the command to ingest the documents into the vector database, while for the subsequent run, this argument should be omitted.
-
-If you are using docker compose to deploy `ChatQnA` system, you can simply run the evaluation as following:
-
-```bash
-python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/80000_docs --ingest_docs
-
-# if you want to get ragas metrics
-python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/80000_docs  --contain_original_data --llm_endpoint "http://{llm_as_judge_ip}:{llm_as_judge_port}"  --ragas_metrics
-```
-
-If you are using Kubernetes manifest/helm to deploy `ChatQnA` system, you must specify more arguments as following:
-
-```bash
-python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/80000_docs --ingest_docs --database_endpoint http://{your_dataprep_ip}:{your_dataprep_port}/v1/dataprep --embedding_endpoint http://{your_embedding_ip}:{your_embedding_port}/v1/embeddings --retrieval_endpoint http://{your_retrieval_ip}:{your_retrieval_port}/v1/retrieval --service_url http://{your_chatqna_ip}:{your_chatqna_port}/v1/chatqna
-```
-
-The default values for arguments are:
-|Argument|Default value|
-|--------|-------------|
-|service_url|http://localhost:8888/v1/chatqna|
-|database_endpoint|http://localhost:6007/v1/dataprep|
-|embedding_endpoint|http://localhost:6000/v1/embeddings|
-|retrieval_endpoint|http://localhost:7000/v1/retrieval|
-|reranking_endpoint|http://localhost:8000/v1/reranking|
-|output_dir|./output|
-|temperature|0.1|
-|max_new_tokens|1280|
-|chunk_size|256|
-|chunk_overlap|100|
-|dataset_path|./data/split_merged.json|
-|docs_path|./data/80000_docs|
-|tasks|["question_answering"]|
-
-You can check arguments details use below command:
-
-```bash
-python eval_crud.py --help
-```
-
-## Acknowledgements
-
-This example is mostly adapted from [MultiHop-RAG](https://github.com/yixuantt/MultiHop-RAG) and [CRUD-RAG](https://github.com/IAAR-Shanghai/CRUD_RAG) repo, we thank the authors for their great work!
--- a/ChatQnA/benchmark/accuracy/eval_crud.py
+++ b/ChatQnA/benchmark/accuracy/eval_crud.py
@@ -1,210 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-
-import argparse
-import json
-import os
-
-from evals.evaluation.rag_eval import Evaluator
-from evals.evaluation.rag_eval.template import CRUDTemplate
-from evals.metrics.ragas import RagasMetric
-from tqdm import tqdm
-
-
-class CRUD_Evaluator(Evaluator):
-    def get_ground_truth_text(self, data: dict):
-        if self.task == "summarization":
-            ground_truth_text = data["summary"]
-        elif self.task == "question_answering":
-            ground_truth_text = data["answers"]
-        elif self.task == "continuation":
-            ground_truth_text = data["continuing"]
-        elif self.task == "hallucinated_modified":
-            ground_truth_text = data["hallucinatedMod"]
-        else:
-            raise NotImplementedError(
-                f"Unknown task {self.task}, only support "
-                "summarization, question_answering, continuation and hallucinated_modified."
-            )
-        return ground_truth_text
-
-    def get_query(self, data: dict):
-        if self.task == "summarization":
-            query = data["text"]
-        elif self.task == "question_answering":
-            query = data["questions"]
-        elif self.task == "continuation":
-            query = data["beginning"]
-        elif self.task == "hallucinated_modified":
-            query = data["newsBeginning"]
-        else:
-            raise NotImplementedError(
-                f"Unknown task {self.task}, only support "
-                "summarization, question_answering, continuation and hallucinated_modified."
-            )
-        return query
-
-    def get_document(self, data: dict):
-        if self.task == "summarization":
-            document = data["text"]
-        elif self.task == "question_answering":
-            document = data["news1"]
-        elif self.task == "continuation":
-            document = data["beginning"]
-        elif self.task == "hallucinated_modified":
-            document = data["newsBeginning"]
-        else:
-            raise NotImplementedError(
-                f"Unknown task {self.task}, only support "
-                "summarization, question_answering, continuation and hallucinated_modified."
-            )
-        return document
-
-    def get_template(self):
-        if self.task == "summarization":
-            template = CRUDTemplate.get_summarization_template()
-        elif self.task == "question_answering":
-            template = CRUDTemplate.get_question_answering_template()
-        elif self.task == "continuation":
-            template = CRUDTemplate.get_continuation_template()
-        else:
-            raise NotImplementedError(
-                f"Unknown task {self.task}, only support "
-                "summarization, question_answering, continuation and hallucinated_modified."
-            )
-        return template
-
-    def post_process(self, result):
-        return result.split("<response>")[-1].split("</response>")[0].strip()
-
-    def get_ragas_metrics(self, results, arguments):
-        from langchain_huggingface import HuggingFaceEndpointEmbeddings
-
-        embeddings = HuggingFaceEndpointEmbeddings(model=arguments.tei_embedding_endpoint)
-
-        metric = RagasMetric(
-            threshold=0.5,
-            model=arguments.llm_endpoint,
-            embeddings=embeddings,
-            metrics=["faithfulness", "answer_relevancy"],
-        )
-
-        all_answer_relevancy = 0
-        all_faithfulness = 0
-        ragas_inputs = {
-            "question": [],
-            "answer": [],
-            "ground_truth": [],
-            "contexts": [],
-        }
-
-        valid_results = self.remove_invalid(results["results"])
-
-        for data in tqdm(valid_results):
-            data = data["original_data"]
-
-            query = self.get_query(data)
-            generated_text = data["generated_text"]
-            ground_truth = data["ground_truth_text"]
-            retrieved_documents = data["retrieved_documents"]
-
-            ragas_inputs["question"].append(query)
-            ragas_inputs["answer"].append(generated_text)
-            ragas_inputs["ground_truth"].append(ground_truth)
-            ragas_inputs["contexts"].append(retrieved_documents[:3])
-
-        ragas_metrics = metric.measure(ragas_inputs)
-        return ragas_metrics
-
-
-def args_parser():
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument(
-        "--service_url", type=str, default="http://localhost:8888/v1/chatqna", help="Service URL address."
-    )
-    parser.add_argument("--output_dir", type=str, default="./output", help="Directory to save evaluation results.")
-    parser.add_argument(
-        "--temperature", type=float, default=0.1, help="Controls the randomness of the model's text generation"
-    )
-    parser.add_argument(
-        "--max_new_tokens", type=int, default=1280, help="Maximum number of new tokens to be generated by the model"
-    )
-    parser.add_argument(
-        "--chunk_size", type=int, default=256, help="the maximum number of characters that a chunk can contain"
-    )
-    parser.add_argument(
-        "--chunk_overlap",
-        type=int,
-        default=100,
-        help="the number of characters that should overlap between two adjacent chunks",
-    )
-    parser.add_argument("--dataset_path", default="../data/split_merged.json", help="Path to the dataset")
-    parser.add_argument("--docs_path", default="../data/80000_docs", help="Path to the retrieval documents")
-
-    # Retriever related options
-    parser.add_argument("--tasks", default=["question_answering"], nargs="+", help="Task to perform")
-    parser.add_argument("--ingest_docs", action="store_true", help="Whether to ingest documents to vector database")
-    parser.add_argument(
-        "--database_endpoint", type=str, default="http://localhost:6007/v1/dataprep", help="Service URL address."
-    )
-    parser.add_argument(
-        "--embedding_endpoint", type=str, default="http://localhost:6000/v1/embeddings", help="Service URL address."
-    )
-    parser.add_argument(
-        "--retrieval_endpoint", type=str, default="http://localhost:7000/v1/retrieval", help="Service URL address."
-    )
-    parser.add_argument(
-        "--tei_embedding_endpoint",
-        type=str,
-        default="http://localhost:8090",
-        help="Service URL address of tei embedding.",
-    )
-    parser.add_argument("--ragas_metrics", action="store_true", help="Whether to compute ragas metrics.")
-    parser.add_argument("--llm_endpoint", type=str, default=None, help="Service URL address.")
-    parser.add_argument(
-        "--show_progress_bar", action="store", default=True, type=bool, help="Whether to show a progress bar"
-    )
-    parser.add_argument("--contain_original_data", action="store_true", help="Whether to contain original data")
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = args_parser()
-    if os.path.isfile(args.dataset_path):
-        with open(args.dataset_path) as f:
-            all_datasets = json.load(f)
-    else:
-        raise FileNotFoundError(f"Evaluation dataset file {args.dataset_path} not exist.")
-    os.makedirs(args.output_dir, exist_ok=True)
-    for task in args.tasks:
-        if task == "question_answering":
-            dataset = all_datasets["questanswer_1doc"]
-        elif task == "summarization":
-            dataset = all_datasets["event_summary"]
-        else:
-            raise NotImplementedError(
-                f"Unknown task {task}, only support "
-                "summarization, question_answering, continuation and hallucinated_modified."
-            )
-        output_save_path = os.path.join(args.output_dir, f"{task}.json")
-        evaluator = CRUD_Evaluator(dataset=dataset, output_path=output_save_path, task=task)
-        if args.ingest_docs:
-            CRUD_Evaluator.ingest_docs(args.docs_path, args.database_endpoint, args.chunk_size, args.chunk_overlap)
-        results = evaluator.evaluate(
-            args, show_progress_bar=args.show_progress_bar, contain_original_data=args.contain_original_data
-        )
-        print(results["overall"])
-        if args.ragas_metrics:
-            ragas_metrics = evaluator.get_ragas_metrics(results, args)
-            print(ragas_metrics)
-        print(f"Evaluation results of task {task} saved to {output_save_path}.")
-
-
-if __name__ == "__main__":
-    main()
--- a/ChatQnA/benchmark/accuracy/eval_multihop.py
+++ b/ChatQnA/benchmark/accuracy/eval_multihop.py
@@ -1,279 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import argparse
-import json
-import os
-
-import requests
-from evals.evaluation.rag_eval import Evaluator
-from evals.metrics.ragas import RagasMetric
-from evals.metrics.retrieval import RetrievalBaseMetric
-from tqdm import tqdm
-
-
-class MultiHop_Evaluator(Evaluator):
-    def get_ground_truth_text(self, data: dict):
-        return data["answer"]
-
-    def get_query(self, data: dict):
-        return data["query"]
-
-    def get_template(self):
-        return None
-
-    def get_reranked_documents(self, query, docs, arguments):
-        data = {
-            "initial_query": query,
-            "retrieved_docs": [{"text": doc} for doc in docs],
-            "top_n": 10,
-        }
-        headers = {"Content-Type": "application/json"}
-
-        response = requests.post(arguments.reranking_endpoint, data=json.dumps(data), headers=headers)
-        if response.ok:
-            reranked_documents = response.json()["documents"]
-            return reranked_documents
-        else:
-            print(f"Request for retrieval failed due to {response.text}.")
-            return []
-
-    def get_retrieved_documents(self, query, arguments):
-        data = {"inputs": query}
-        headers = {"Content-Type": "application/json"}
-        response = requests.post(arguments.tei_embedding_endpoint + "/embed", data=json.dumps(data), headers=headers)
-        if response.ok:
-            embedding = response.json()[0]
-        else:
-            print(f"Request for embedding failed due to {response.text}.")
-            return []
-        data = {
-            "text": query,
-            "embedding": embedding,
-            "search_type": arguments.search_type,
-            "k": arguments.retrival_k,
-            "fetch_k": arguments.fetch_k,
-            "lambda_mult": arguments.lambda_mult,
-        }
-        response = requests.post(arguments.retrieval_endpoint, data=json.dumps(data), headers=headers)
-        if response.ok:
-            retrieved_documents = response.json()["retrieved_docs"]
-            return [doc["text"] for doc in retrieved_documents]
-        else:
-            print(f"Request for retrieval failed due to {response.text}.")
-            return []
-
-    def get_retrieval_metrics(self, all_queries, arguments):
-        print("start to retrieve...")
-        metric = RetrievalBaseMetric()
-        hits_at_10 = 0
-        hits_at_4 = 0
-        map_at_10 = 0
-        mrr_at_10 = 0
-        total = 0
-        for data in tqdm(all_queries):
-            if data["question_type"] == "null_query":
-                continue
-            query = data["query"]
-            retrieved_documents = self.get_retrieved_documents(query, arguments)
-            if arguments.rerank:
-                retrieved_documents = self.get_reranked_documents(query, retrieved_documents, arguments)
-            golden_context = [each["fact"] for each in data["evidence_list"]]
-            test_case = {
-                "input": query,
-                "golden_context": golden_context,
-                "retrieval_context": retrieved_documents,
-            }
-            results = metric.measure(test_case)
-            hits_at_10 += results["Hits@10"]
-            hits_at_4 += results["Hits@4"]
-            map_at_10 += results["MAP@10"]
-            mrr_at_10 += results["MRR@10"]
-            total += 1
-
-        # Calculate average metrics over all queries
-        hits_at_10 = hits_at_10 / total
-        hits_at_4 = hits_at_4 / total
-        map_at_10 = map_at_10 / total
-        mrr_at_10 = mrr_at_10 / total
-
-        return {
-            "Hits@10": hits_at_10,
-            "Hits@4": hits_at_4,
-            "MAP@10": map_at_10,
-            "MRR@10": mrr_at_10,
-        }
-
-    def evaluate(self, all_queries, arguments):
-        results = []
-        accuracy = 0
-        index = 0
-        for data in tqdm(all_queries):
-            if data["question_type"] == "null_query":
-                continue
-
-            generated_text = self.send_request(data, arguments)
-            data["generated_text"] = generated_text
-
-            # same method with paper: https://github.com/yixuantt/MultiHop-RAG/issues/8
-            if data["answer"] in generated_text:
-                accuracy += 1
-            result = {"id": index, **self.scoring(data)}
-            results.append(result)
-            index += 1
-
-        valid_results = self.remove_invalid(results)
-
-        try:
-            overall = self.compute_overall(valid_results) if len(valid_results) > 0 else {}
-        except Exception as e:
-            print(repr(e))
-            overall = dict()
-
-        overall.update({"accuracy": accuracy / len(results)})
-        return overall
-
-    def get_ragas_metrics(self, all_queries, arguments):
-        from langchain_huggingface import HuggingFaceEndpointEmbeddings
-
-        embeddings = HuggingFaceEndpointEmbeddings(model=arguments.tei_embedding_endpoint)
-
-        metric = RagasMetric(threshold=0.5, model=arguments.llm_endpoint, embeddings=embeddings)
-        all_answer_relevancy = 0
-        all_faithfulness = 0
-        ragas_inputs = {
-            "question": [],
-            "answer": [],
-            "ground_truth": [],
-            "contexts": [],
-        }
-
-        for data in tqdm(all_queries):
-            if data["question_type"] == "null_query":
-                continue
-            retrieved_documents = self.get_retrieved_documents(data["query"], arguments)
-            generated_text = self.send_request(data, arguments)
-            data["generated_text"] = generated_text
-
-            ragas_inputs["question"].append(data["query"])
-            ragas_inputs["answer"].append(generated_text)
-            ragas_inputs["ground_truth"].append(data["answer"])
-            ragas_inputs["contexts"].append(retrieved_documents[:3])
-
-            if len(ragas_inputs["question"]) >= arguments.limits:
-                break
-
-        ragas_metrics = metric.measure(ragas_inputs)
-        return ragas_metrics
-
-
-def args_parser():
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument(
-        "--service_url", type=str, default="http://localhost:8888/v1/chatqna", help="Service URL address."
-    )
-    parser.add_argument("--output_dir", type=str, default="./output", help="Directory to save evaluation results.")
-    parser.add_argument(
-        "--temperature", type=float, default=0.1, help="Controls the randomness of the model's text generation"
-    )
-    parser.add_argument(
-        "--max_new_tokens", type=int, default=1280, help="Maximum number of new tokens to be generated by the model"
-    )
-    parser.add_argument(
-        "--chunk_size", type=int, default=256, help="the maximum number of characters that a chunk can contain"
-    )
-    parser.add_argument(
-        "--chunk_overlap",
-        type=int,
-        default=100,
-        help="the number of characters that should overlap between two adjacent chunks",
-    )
-    parser.add_argument("--search_type", type=str, default="similarity", help="similarity type")
-    parser.add_argument("--retrival_k", type=int, default=10, help="Number of Documents to return.")
-    parser.add_argument(
-        "--fetch_k", type=int, default=20, help="Number of Documents to fetch to pass to MMR algorithm."
-    )
-    parser.add_argument(
-        "--lambda_mult",
-        type=float,
-        default=0.5,
-        help="Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.",
-    )
-    parser.add_argument("--dataset_path", default=None, help="Path to the dataset")
-    parser.add_argument("--docs_path", default=None, help="Path to the retrieval documents")
-
-    # Retriever related options
-    parser.add_argument("--ingest_docs", action="store_true", help="Whether to ingest documents to vector database")
-    parser.add_argument("--retrieval_metrics", action="store_true", help="Whether to compute retrieval metrics.")
-    parser.add_argument("--ragas_metrics", action="store_true", help="Whether to compute ragas metrics.")
-    parser.add_argument("--limits", type=int, default=100, help="Number of examples to be evaluated by llm-as-judge")
-    parser.add_argument(
-        "--database_endpoint", type=str, default="http://localhost:6007/v1/dataprep", help="Service URL address."
-    )
-    parser.add_argument(
-        "--embedding_endpoint", type=str, default="http://localhost:6000/v1/embeddings", help="Service URL address."
-    )
-    parser.add_argument(
-        "--tei_embedding_endpoint",
-        type=str,
-        default="http://localhost:8090",
-        help="Service URL address of tei embedding.",
-    )
-    parser.add_argument(
-        "--retrieval_endpoint", type=str, default="http://localhost:7000/v1/retrieval", help="Service URL address."
-    )
-    parser.add_argument("--rerank", action="store_true", help="Whether to use rerank microservice.")
-    parser.add_argument(
-        "--reranking_endpoint", type=str, default="http://localhost:8000/v1/reranking", help="Service URL address."
-    )
-    parser.add_argument("--llm_endpoint", type=str, default=None, help="Service URL address.")
-    parser.add_argument(
-        "--show_progress_bar", action="store", default=True, type=bool, help="Whether to show a progress bar"
-    )
-    parser.add_argument("--contain_original_data", action="store_true", help="Whether to contain original data")
-
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = args_parser()
-
-    evaluator = MultiHop_Evaluator()
-
-    with open(args.docs_path, "r") as file:
-        doc_data = json.load(file)
-
-    documents = []
-    for doc in doc_data:
-        metadata = {"title": doc["title"], "published_at": doc["published_at"], "source": doc["source"]}
-        documents.append(doc["body"])
-
-    # save docs to a tmp file
-    tmp_corpus_file = "tmp_corpus.txt"
-    with open(tmp_corpus_file, "w") as f:
-        for doc in documents:
-            f.write(doc + "\n")
-
-    if args.ingest_docs:
-        evaluator.ingest_docs(tmp_corpus_file, args.database_endpoint, args.chunk_size, args.chunk_overlap)
-
-    with open(args.dataset_path, "r") as file:
-        all_queries = json.load(file)
-
-    # get retrieval quality
-    if args.retrieval_metrics:
-        retrieval_metrics = evaluator.get_retrieval_metrics(all_queries, args)
-        print(retrieval_metrics)
-
-    # get rag quality
-    if args.ragas_metrics:
-        ragas_metrics = evaluator.get_ragas_metrics(all_queries, args)
-        print(ragas_metrics)
-
-
-if __name__ == "__main__":
-    main()
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
pre-commit-ci[bot]	e372b2210b	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-09-19 00:24:44 +00:00
root	79a2d55807	add gateway to GenAIExamples.	2024-09-19 00:23:09 +00:00