[Doc] Update ChatQnA flow chart (#542 )

* Update flow chart Signed-off-by: Wang, Xigui <xigui.wang@intel.com> * Updated Flowchart Signed-off-by: srinarayan-srikanthan <srinarayan.srikanthan@intel.com> --------- Signed-off-by: Wang, Xigui <xigui.wang@intel.com> Signed-off-by: srinarayan-srikanthan <srinarayan.srikanthan@intel.com> Co-authored-by: Louie Tsai <louie.tsai@intel.com> (cherry picked from commit dad8eb4b82)
Add benchmark README for ChatQnA (#662 )
2024-08-27 11:07:03 +08:00 · 2024-08-27 11:06:36 +08:00 · 2024-08-25 16:28:59 +00:00 · 2024-08-23 22:11:32 +08:00 · 2024-08-23 22:11:31 +08:00 · 2024-08-22 20:00:05 +08:00
394 changed files with 19498 additions and 3511 deletions
--- a/.github/workflows/VisualQnA.yml
+++ b/.github/workflows/VisualQnA.yml
@@ -1,50 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-name: VisualQnA-test
-
-on:
-  pull_request_target:
-    branches: [main]
-    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
-    paths:
-      - VisualQnA/**
-      - "!**.md"
-      - "!**/ui/**"
-      - .github/workflows/VisualQnA.yml
-  workflow_dispatch:
-
-# If there is a new commit, the previous jobs will be canceled
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  VisualQnA:
-    runs-on: aise-cluster
-    strategy:
-      matrix:
-        job_name: ["basic"]
-      fail-fast: false
-    steps:
-      - name: Clean Up Working Directory
-        run: sudo rm -rf ${{github.workspace}}/*
-
-      - name: Checkout out Repo
-        uses: actions/checkout@v4
-        with:
-          ref: "refs/pull/${{ github.event.number }}/merge"
-
-      - name: Run Test
-        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
-        run: |
-          cd ${{ github.workspace }}/VisualQnA/tests
-          bash test_${{ matrix.job_name }}_inference.sh
-
-      - name: Publish pipeline artifact
-        if: ${{ !cancelled() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.job_name }}
-          path: ${{ github.workspace }}/VisualQnA/tests/*.log
--- a/.github/workflows/_example-workflow.yml
+++ b/.github/workflows/_example-workflow.yml
@@ -0,0 +1,166 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Example jobs
+permissions: read-all
+on:
+  workflow_call:
+    inputs:
+      node:
+        required: true
+        type: string
+      example:
+        required: true
+        type: string
+      tag:
+        default: "latest"
+        required: false
+        type: string
+      build:
+        default: true
+        required: false
+        type: boolean
+      scan:
+        default: true
+        required: false
+        type: boolean
+      test_compose:
+        default: false
+        required: false
+        type: boolean
+      test_k8s:
+        default: false
+        required: false
+        type: boolean
+      test_gmc:
+        default: false
+        required: false
+        type: boolean
+      opea_branch:
+        default: "main"
+        required: false
+        type: string
+jobs:
+####################################################################################################
+# Image Build
+####################################################################################################
+  build-images:
+    runs-on: "docker-build-${{ inputs.node }}"
+    steps:
+      - name: Clean Up Working Directory
+        run: sudo rm -rf ${{github.workspace}}/*
+
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+
+      - name: Clone required Repo
+        run: |
+          cd ${{ github.workspace }}/${{ inputs.example }}/docker
+          docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker/docker_build_compose.yaml
+          if [[ $(grep -c "tei-gaudi:" ${docker_compose_path}) != 0 ]]; then
+              git clone https://github.com/huggingface/tei-gaudi.git
+          fi
+          if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
+              git clone https://github.com/vllm-project/vllm.git
+          fi
+          git clone https://github.com/opea-project/GenAIComps.git
+          cd GenAIComps && git checkout ${{ inputs.opea_branch }} && cd ../
+
+      - name: Build Image
+        if: ${{ fromJSON(inputs.build) }}
+        uses: opea-project/validation/actions/image-build@main
+        with:
+          work_dir: ${{ github.workspace }}/${{ inputs.example }}/docker
+          docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker/docker_build_compose.yaml
+          registry: ${OPEA_IMAGE_REPO}opea
+          tag: ${{ inputs.tag }}
+
+####################################################################################################
+# Trivy Scan
+####################################################################################################
+  get-image-list:
+    needs: [build-images]
+    if: ${{ fromJSON(inputs.scan) && inputs.node == 'gaudi' }}
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.scan-matrix.outputs.matrix }}
+    steps:
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+
+      - name: Set Matrix
+        id: scan-matrix
+        run: |
+          pip install yq
+          compose_path=${{ github.workspace }}/${{ inputs.example }}/docker/docker_build_compose.yaml
+          echo "matrix=$(cat ${compose_path} | yq -r '.[]' | jq 'keys' | jq -c '.')" >> $GITHUB_OUTPUT
+
+  scan-images:
+    needs: [get-image-list, build-images]
+    if: ${{ fromJSON(inputs.scan) && inputs.node == 'gaudi'}}
+    runs-on: "docker-build-${{ inputs.node }}"
+    strategy:
+      matrix:
+        image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
+      fail-fast: false
+    steps:
+      - name: Pull Image
+        run: |
+          docker pull ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}
+          echo "OPEA_IMAGE_REPO=${OPEA_IMAGE_REPO}" >> $GITHUB_ENV
+
+      - name: Scan Container
+        uses: opea-project/validation/actions/trivy-scan@main
+        with:
+          image-ref: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }}
+          output: ${{ matrix.image }}-scan.txt
+
+      - name: Cleanup
+        if: always()
+        run: docker rmi -f ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}
+
+      - uses: actions/upload-artifact@v4.3.4
+        with:
+          name: ${{ matrix.image }}-scan
+          path: ${{ matrix.image }}-scan.txt
+          overwrite: true
+
+####################################################################################################
+# Docker Compose Test
+####################################################################################################
+  test-example-compose:
+    needs: [build-images]
+    if: ${{ fromJSON(inputs.test_compose) }}
+    uses: ./.github/workflows/_run-docker-compose.yml
+    with:
+      tag: ${{ inputs.tag }}
+      example: ${{ inputs.example }}
+      hardware: ${{ inputs.node }}
+    secrets: inherit
+
+
+####################################################################################################
+# K8S Test
+####################################################################################################
+  test-k8s-manifest:
+    needs: [build-images]
+    if: ${{ fromJSON(inputs.test_k8s) }}
+    uses: ./.github/workflows/_manifest-e2e.yml
+    with:
+      example: ${{ inputs.example }}
+      hardware: ${{ inputs.node }}
+      tag: ${{ inputs.tag }}
+      context: "CD"
+    secrets: inherit
+
+####################################################################################################
+# GMC Test
+####################################################################################################
+  test-gmc-pipeline:
+    needs: [build-images]
+    if: ${{ fromJSON(inputs.test_gmc) }}
+    uses: ./.github/workflows/_gmc-e2e.yml
+    with:
+      example: ${{ inputs.example }}
+      hardware: ${{ inputs.node }}
+    secrets: inherit
--- a/.github/workflows/reuse-get-test-matrix.yml
+++ b/.github/workflows/reuse-get-test-matrix.yml
@@ -51,7 +51,10 @@ jobs:
        run: |
          set -xe
          if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
-            base_commit=${{ github.event.pull_request.base.sha }}
+            LATEST_COMMIT_SHA=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
+            "https://api.github.com/repos/opea-project/GenAIExamples/commits?sha=main" | jq -r '.[0].sha')
+            echo "Latest commit SHA is $LATEST_COMMIT_SHA"
+            base_commit=$LATEST_COMMIT_SHA
          else
            base_commit=$(git rev-parse HEAD~1) # push event
          fi
--- a/.github/workflows/_gmc-e2e.yml
+++ b/.github/workflows/_gmc-e2e.yml
@@ -1,57 +1,51 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-name: E2E test with GMC
+# This workflow will only test GMC pipeline and will not install GMC any more
+name: Single GMC E2e Test For CD Workflow Call

 on:
-  pull_request_target:
-    branches: [main]
-    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
-    paths:
-      - "**/kubernetes/**"
-      - "**/tests/test_gmc**"
-      - "!**.md"
-      - "!**.txt"
-      - "!**/kubernetes/manifests/**"
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
+  workflow_call:
+    inputs:
+      example:
+        default: "ChatQnA"
+        description: "The example to test on K8s"
+        required: true
+        type: string
+      hardware:
+        default: "xeon"
+        description: "Nodes to run the test, xeon or gaudi"
+        required: true
+        type: string

 jobs:
-  job1:
-    uses: ./.github/workflows/reuse-get-test-matrix.yml
-    with:
-      diff_excluded_files: '.github|deprecated|docker|assets|*.md|*.txt'
-      xeon_server_label: 'xeon'
-      gaudi_server_label: 'gaudi'
-
  gmc-test:
-    needs: [job1]
-    strategy:
-      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
-    runs-on: "k8s-${{ matrix.hardware }}"
+    runs-on: "k8s-${{ inputs.hardware }}"
    continue-on-error: true
    steps:
-      - name: E2e test gmc
-        run: |
-          echo "Matrix - gmc: ${{ matrix.example }}"
-
      - name: Clean Up Working Directory
        run: sudo rm -rf ${{github.workspace}}/*

+      - name: Get checkout ref
+        run: |
+          if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
+            echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
+          else
+            echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
+          fi
+          echo "checkout ref ${{ env.CHECKOUT_REF }}"
+
      - name: Checkout out Repo
        uses: actions/checkout@v4
        with:
-          ref: "refs/pull/${{ github.event.number }}/merge"
+          ref: ${{ env.CHECKOUT_REF }}
+          fetch-depth: 0

      - name: Set variables
        run: |
-          if [ ${{ matrix.hardware }} == "gaudi" ]; then IMAGE_REPO=${{ vars.IMAGE_REPO_GAUDI }}; else IMAGE_REPO=${{ vars.IMAGE_REPO_XEON }}; fi
          echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
-          lower_example=$(echo "${{ matrix.example }}" | tr '[:upper:]' '[:lower:]')
-          echo "APP_NAMESPACE=$lower_example-$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
+          lower_example=$(echo "${{ inputs.example }}" | tr '[:upper:]' '[:lower:]')
+          echo "APP_NAMESPACE=$lower_example-$(tr -dc a-z0-9 </dev/urandom | head -c 16)" >> $GITHUB_ENV
          echo "ROLLOUT_TIMEOUT_SECONDS=1800s" >> $GITHUB_ENV
          echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
          echo "continue_test=true" >> $GITHUB_ENV
@@ -65,16 +59,16 @@ jobs:
          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
        run: |
-          if [[ ! -f ${{ github.workspace }}/${{ matrix.example }}/tests/test_gmc_on_${{ matrix.hardware }}.sh ]]; then
+          if [[ ! -f ${{ github.workspace }}/${{ inputs.example }}/tests/test_gmc_on_${{ inputs.hardware }}.sh ]]; then
            echo "No test script found, exist test!"
            exit 0
          else
            echo "should_cleanup=true" >> $GITHUB_ENV
-            ${{ github.workspace }}/${{ matrix.example }}/tests/test_gmc_on_${{ matrix.hardware }}.sh install_${{ matrix.example }}
-            echo "Testing ${{ matrix.example }}, waiting for pod ready..."
+            ${{ github.workspace }}/${{ inputs.example }}/tests/test_gmc_on_${{ inputs.hardware }}.sh install_${{ inputs.example }}
+            echo "Testing ${{ inputs.example }}, waiting for pod ready..."
            if kubectl rollout status deployment --namespace "$APP_NAMESPACE" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
-              echo "Testing gmc ${{ matrix.example }}, running validation test..."
-              ${{ github.workspace }}/${{ matrix.example }}/tests/test_gmc_on_${{ matrix.hardware }}.sh validate_${{ matrix.example }}
+              echo "Testing gmc ${{ inputs.example }}, running validation test..."
+              ${{ github.workspace }}/${{ inputs.example }}/tests/test_gmc_on_${{ inputs.hardware }}.sh validate_${{ inputs.example }}
            else
              echo "Timeout waiting for pods in namespace $APP_NAMESPACE to be ready!"
              exit 1
--- a/.github/workflows/_gmc-workflow.yml
+++ b/.github/workflows/_gmc-workflow.yml
@@ -0,0 +1,146 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Build and deploy GMC system on call and manual
+
+on:
+  workflow_dispatch:
+    inputs:
+      tag:
+        default: "latest"
+        required: true
+        type: string
+        description: "Tag to apply to images"
+      node:
+        default: "xeon"
+        required: true
+        type: string
+        description: "Hardware to run test"
+      opea_branch:
+        default: "main"
+        required: false
+        type: string
+        description: 'OPEA branch for image build'
+  workflow_call:
+    inputs:
+      tag:
+        default: "latest"
+        required: true
+        type: string
+        description: "Tag to apply to images"
+      node:
+        default: "xeon"
+        required: true
+        type: string
+        description: "Hardware to run test"
+      opea_branch:
+        default: "main"
+        required: false
+        type: string
+        description: 'OPEA branch for image build'
+
+jobs:
+####################################################################################################
+# Image Build and Scan
+####################################################################################################
+  image-build:
+    runs-on: "docker-build-${{ inputs.node }}"
+    steps:
+      - name: Checkout GenAIInfra repository
+        uses: actions/checkout@v4
+        with:
+          repository: opea-project/GenAIInfra
+          ref: ${{ inputs.opea_branch }}
+          path: GenAIInfra
+
+      - name: Set variables
+        id: set_variables
+        run: |
+          echo "DOCKER_REGISTRY=${OPEA_IMAGE_REPO}opea" >> $GITHUB_ENV
+          echo "IMAGE_REPO=${OPEA_IMAGE_REPO}" >> $GITHUB_OUTPUT
+          echo "VERSION=${{ inputs.tag }}" >> $GITHUB_ENV
+          echo "VERSION=${{ inputs.tag }}" >> $GITHUB_OUTPUT
+
+      - name: Build image and push
+        run: |
+          cd ${{github.workspace}}/GenAIInfra/microservices-connector
+          make docker.build
+          make docker.push
+
+      - name: Scan gmcmanager
+        if: ${{ inputs.node == 'gaudi' }}
+        uses: opea-project/validation/actions/trivy-scan@main
+        with:
+          image-ref: ${{ env.DOCKER_REGISTRY }}/gmcmanager:${{ env.VERSION }}
+          output: gmcmanager-scan.txt
+
+      - name: Upload gmcmanager scan result
+        if: ${{ inputs.node == 'gaudi' }}
+        uses: actions/upload-artifact@v4.3.4
+        with:
+          name: gmcmanager-scan
+          path: gmcmanager-scan.txt
+          overwrite: true
+
+      - name: Scan gmcrouter
+        if: ${{ inputs.node == 'gaudi' }}
+        uses: opea-project/validation/actions/trivy-scan@main
+        with:
+          image-ref: ${{ env.DOCKER_REGISTRY }}/gmcrouter:${{ env.VERSION }}
+          output: gmcrouter-scan.txt
+
+      - name: Upload gmcrouter scan result
+        if: ${{ inputs.node == 'gaudi' }}
+        uses: actions/upload-artifact@v4.3.4
+        with:
+          name: gmcrouter-scan
+          path: gmcrouter-scan.txt
+          overwrite: true
+
+      - name: Clean up images
+        if: always()
+        run: |
+          docker rmi ${{ env.DOCKER_REGISTRY }}/gmcrouter:${{ env.VERSION }}
+          docker rmi ${{ env.DOCKER_REGISTRY }}/gmcmanager:${{ env.VERSION }}
+
+      - name: Clean up GenAIInfra source codes
+        if: always()
+        run: |
+           rm -rf ${{github.workspace}}/GenAIInfra
+
+####################################################################################################
+# GMC Install
+####################################################################################################
+  gmc-install:
+    needs: image-build
+    runs-on: "k8s-${{ inputs.node }}"
+    steps:
+      - name: Checkout GenAIInfra repository
+        uses: actions/checkout@v4
+        with:
+          repository: opea-project/GenAIInfra
+          ref: ${{ inputs.opea_branch }}
+          path: GenAIInfra
+
+      - name: Set variables
+        run: |
+          echo "SYSTEM_NAMESPACE=opea-system" >> $GITHUB_ENV
+          echo "VERSION=${{ inputs.tag }}" >> $GITHUB_ENV
+          echo "SET_VERSION=true" >> $GITHUB_ENV # to change the tag of microservice images
+
+      - name: Cleanup existing GMC
+        run: |
+          cd GenAIInfra
+          .github/workflows/scripts/e2e/gmc_install.sh cleanup_gmc
+          cd ..
+
+      - name: Install GMC
+        run: |
+          cd GenAIInfra
+          .github/workflows/scripts/e2e/gmc_install.sh install_gmc
+          cd ..
+
+      - name: Clean up GenAIInfra source codes
+        if: always()
+        run: |
+           rm -rf ${{github.workspace}}/GenAIInfra
--- a/.github/workflows/reuse-image-build.yml
+++ b/.github/workflows/reuse-image-build.yml
@@ -34,6 +34,10 @@ jobs:
      image_repo: ${{ steps.build-megaservice-image.outputs.image_repo }}
      image_tag: ${{ steps.build-megaservice-image.outputs.image_tag }}
    steps:
+      - name: Clean up Working Directory
+        run: |
+          sudo rm -rf ${{github.workspace}}/* || true
+
      - name: Get checkout ref
        run: |
          if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
@@ -62,3 +66,4 @@ jobs:
          fi
          echo "IMAGE_TAG=${IMAGE_TAG}"
          echo "image_tag=$IMAGE_TAG" >> $GITHUB_OUTPUT
+          echo "image_repo=${IMAGE_REPO}" >> $GITHUB_OUTPUT
--- a/.github/workflows/_manifest-e2e.yml
+++ b/.github/workflows/_manifest-e2e.yml
@@ -0,0 +1,105 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Single Kubernetes Manifest E2e Test For Call
+on:
+  workflow_call:
+    inputs:
+      example:
+        default: "ChatQnA"
+        description: "The example to test on K8s"
+        required: true
+        type: string
+      hardware:
+        default: "xeon"
+        description: "Nodes to run the test, xeon or gaudi"
+        required: true
+        type: string
+      tag:
+        default: "latest"
+        description: "Tag to apply to images, default is latest"
+        required: false
+        type: string
+      context:
+        default: "CI"
+        description: "CI or CD"
+        required: false
+        type: string
+
+jobs:
+  manifest-test:
+    runs-on: "k8s-${{ inputs.hardware }}"
+    continue-on-error: true
+    steps:
+      - name: Clean Up Working Directory
+        run: sudo rm -rf ${{github.workspace}}/*
+
+      - name: Get checkout ref
+        run: |
+          if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
+            echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
+          else
+            echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
+          fi
+          echo "checkout ref ${{ env.CHECKOUT_REF }}"
+
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ env.CHECKOUT_REF }}
+          fetch-depth: 0
+
+      - name: Set variables
+        run: |
+          echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
+          echo "IMAGE_TAG=${{ inputs.tag }}" >> $GITHUB_ENV
+          lower_example=$(echo "${{ inputs.example }}" | tr '[:upper:]' '[:lower:]')
+          echo "NAMESPACE=$lower_example-$(tr -dc a-z0-9 </dev/urandom | head -c 16)" >> $GITHUB_ENV
+          echo "ROLLOUT_TIMEOUT_SECONDS=1800s" >> $GITHUB_ENV
+          echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
+          echo "continue_test=true" >> $GITHUB_ENV
+          echo "should_cleanup=false" >> $GITHUB_ENV
+          echo "skip_validate=true" >> $GITHUB_ENV
+          echo "CONTEXT=${{ inputs.context }}" >> $GITHUB_ENV
+          echo "NAMESPACE=$NAMESPACE"
+
+      - name: Kubectl install
+        id: install
+        run: |
+          if [[ ! -f ${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh ]]; then
+            echo "No test script found, exist test!"
+            exit 0
+          else
+            ${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh init_${{ inputs.example }}
+            echo "should_cleanup=true" >> $GITHUB_ENV
+            kubectl create ns $NAMESPACE
+            ${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh install_${{ inputs.example }} $NAMESPACE
+            echo "Testing ${{ inputs.example }}, waiting for pod ready..."
+            if kubectl rollout status deployment --namespace "$NAMESPACE" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
+              echo "Testing manifests ${{ inputs.example }}, waiting for pod ready done!"
+              echo "skip_validate=false" >> $GITHUB_ENV
+            else
+              echo "Timeout waiting for pods in namespace $NAMESPACE to be ready!"
+              exit 1
+            fi
+            sleep 60
+          fi
+
+      - name: Validate e2e test
+        if: always()
+        run: |
+          if $skip_validate; then
+            echo "Skip validate"
+          else
+            ${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh validate_${{ inputs.example }} $NAMESPACE
+          fi
+
+      - name: Kubectl uninstall
+        if: always()
+        run: |
+          if $should_cleanup; then
+            if ! kubectl delete ns $NAMESPACE --timeout=$KUBECTL_TIMEOUT_SECONDS; then
+              kubectl delete pods --namespace $NAMESPACE --force --grace-period=0 --all
+              kubectl delete ns $NAMESPACE --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
+            fi
+          fi
--- a/.github/workflows/_run-docker-compose.yml
+++ b/.github/workflows/_run-docker-compose.yml
@@ -0,0 +1,117 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Image Build
+permissions: read-all
+on:
+  workflow_call:
+    inputs:
+      registry:
+        description: Container Registry URL
+        required: false
+        default: ""
+        type: string
+      tag:
+        description: Container Tag
+        required: false
+        default: "latest"
+        type: string
+      example:
+        description: Example to test
+        required: true
+        type: string
+      hardware:
+        description: Hardware to run the test on
+        required: true
+        type: string
+jobs:
+  get-test-case:
+    runs-on: ubuntu-latest
+    outputs:
+      test_cases: ${{ steps.test-case-matrix.outputs.test_cases }}
+      CHECKOUT_REF: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }}
+    steps:
+      - name: Get checkout ref
+        id: get-checkout-ref
+        run: |
+          if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
+            CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge
+          else
+            CHECKOUT_REF=${{ github.ref }}
+          fi
+          echo "CHECKOUT_REF=${CHECKOUT_REF}" >> $GITHUB_OUTPUT
+          echo "checkout ref ${CHECKOUT_REF}"
+
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }}
+          fetch-depth: 0
+
+      - name: Get test matrix
+        shell: bash
+        id: test-case-matrix
+        run: |
+          set -x
+          example_l=$(echo ${{ inputs.example }} | tr '[:upper:]' '[:lower:]')
+          cd ${{ github.workspace }}/${{ inputs.example }}/tests
+          test_cases=$(find . -type f -name "test_${example_l}*on_${{ inputs.hardware }}.sh" -print | cut -d/ -f2 | jq -R '.' | jq -sc '.')
+          echo "test_cases=$test_cases" >> $GITHUB_OUTPUT
+
+  run-test:
+    needs: [get-test-case]
+    strategy:
+      matrix:
+        test_case: ${{ fromJSON(needs.get-test-case.outputs.test_cases) }}
+      fail-fast: false
+    runs-on: ${{ inputs.hardware }}
+    continue-on-error: true
+    steps:
+      - name: Clean up Working Directory
+        run: |
+          sudo rm -rf ${{github.workspace}}/* || true
+          docker system prune -f
+          docker rmi $(docker images --filter reference="*/*/*:latest" -q) || true
+          docker rmi $(docker images --filter reference="*/*:ci" -q) || true
+
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ needs.get-test-case.outputs.CHECKOUT_REF }}
+          fetch-depth: 0
+
+      - name: Run test
+        shell: bash
+        env:
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+          PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
+          IMAGE_REPO: ${{ inputs.registry }}
+          IMAGE_TAG: ${{ inputs.tag }}
+          example: ${{ inputs.example }}
+          hardware: ${{ inputs.hardware }}
+          test_case: ${{ matrix.test_case }}
+        run: |
+          cd ${{ github.workspace }}/$example/tests
+          if [[ "$IMAGE_REPO" == "" ]]; then export IMAGE_REPO="${OPEA_IMAGE_REPO}opea"; fi
+          if [ -f ${test_case} ]; then timeout 30m bash ${test_case}; else echo "Test script {${test_case}} not found, skip test!"; fi
+
+      - name: Clean up container
+        shell: bash
+        if: cancelled() || failure()
+        run: |
+          cd ${{ github.workspace }}/${{ inputs.example }}/docker/${{ inputs.hardware }}
+          yaml_files=$(find . -type f -name "*compose*yaml")
+          for file in $yaml_files; do
+              docker compose -f ${file} stop && docker compose -f ${file} rm -f || true
+          done
+          docker system prune -f
+          docker rmi $(docker images --filter reference="*:5000/*/*" -q) || true
+
+      - name: Publish pipeline artifact
+        if: ${{ !cancelled() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.test_case }}
+          path: ${{ github.workspace }}/${{ inputs.example }}/tests/*.log
--- a/.github/workflows/docker-compose-e2e.yml
+++ b/.github/workflows/docker-compose-e2e.yml
@@ -1,91 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-name: E2E test with docker compose
-
-on:
-  pull_request_target:
-    branches: [main]
-    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
-    paths:
-      - "**/docker/**"
-      - "**/tests/**"
-      - "**/ui/**"
-      - "!**.md"
-      - "!**.txt"
-      - .github/workflows/docker-compose-e2e.yml
-  workflow_dispatch:
-
-# If there is a new commit, the previous jobs will be canceled
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  job1:
-    uses: ./.github/workflows/reuse-get-test-matrix.yml
-    with:
-      diff_excluded_files: '.github|README.md|*.txt|deprecate|kubernetes|manifest|gmc|assets'
-
-  mega-image-build:
-    needs: job1
-    strategy:
-      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
-    uses: ./.github/workflows/reuse-image-build.yml
-    with:
-      image_tag: ${{ github.event.pull_request.head.sha }}
-      mega_service: "${{ matrix.example }}"
-      runner_label: "docker-build-${{ matrix.hardware }}"
-
-  Example-test:
-    needs: [job1, mega-image-build]
-    strategy:
-      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
-    runs-on: ${{ matrix.hardware }}
-    continue-on-error: true
-    steps:
-      - name: Test example
-        run: |
-          echo "Matrix - example ${{ matrix.example }}, hardware ${{ matrix.hardware }}"
-
-      - name: Clean Up Working Directory
-        run: sudo rm -rf ${{github.workspace}}/*
-
-      - name: Checkout out Repo
-        uses: actions/checkout@v4
-        with:
-          ref: "refs/pull/${{ github.event.number }}/merge"
-
-      - name: Run test
-        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
-          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
-          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
-          example: ${{ matrix.example }}
-          hardware: ${{ matrix.hardware }}
-          IMAGE_TAG: ${{ needs.mega-image-build.outputs.image_tag }}
-          IMAGE_REPO_GAUDI: ${{ vars.IMAGE_REPO_GAUDI }}
-          IMAGE_REPO_XEON: ${{ vars.IMAGE_REPO_XEON }}
-        run: |
-          cd ${{ github.workspace }}/$example/tests
-          if [ "$hardware" == "gaudi" ]; then IMAGE_REPO=$IMAGE_REPO_GAUDI; else IMAGE_REPO=$IMAGE_REPO_XEON; fi
-          export IMAGE_REPO=${IMAGE_REPO}
-          example_l=$(echo $example | tr '[:upper:]' '[:lower:]')
-          if [ -f test_${example_l}_on_${hardware}.sh ]; then timeout 30m bash test_${example_l}_on_${hardware}.sh; else echo "Test script not found, skip test!"; fi
-
-      - name: Clean up container
-        env:
-          example: ${{ matrix.example }}
-          hardware: ${{ matrix.hardware }}
-        if: cancelled() || failure()
-        run: |
-          cd ${{ github.workspace }}/$example/docker/$hardware
-          docker compose stop && docker compose rm -f
-          echo y | docker system prune
-
-      - name: Publish pipeline artifact
-        if: ${{ !cancelled() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.example }}-${{ matrix.hardware }}
-          path: ${{ github.workspace }}/${{ matrix.example }}/tests/*.log
--- a/.github/workflows/docker/compose/AudioQnA-compose.yaml
+++ b/.github/workflows/docker/compose/AudioQnA-compose.yaml
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# this file should be run in the root of the repo
+services:
+  audioqna:
+    build:
+      context: docker
+      dockerfile: ./Dockerfile
+    image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
--- a/.github/workflows/docker/compose/ChatQnA-compose.yaml
+++ b/.github/workflows/docker/compose/ChatQnA-compose.yaml
@@ -0,0 +1,20 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# this file should be run in the root of the repo
+services:
+  chatqna:
+    build:
+      context: docker
+      dockerfile: ./Dockerfile
+    image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
+  chatqna-ui:
+    build:
+      context: docker/ui
+      dockerfile: ./docker/Dockerfile
+    image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
+  chatqna-conversation-ui:
+    build:
+      context: docker/ui
+      dockerfile: ./docker/Dockerfile.react
+    image: ${REGISTRY:-opea}/chatqna-conversation-ui:${TAG:-latest}
--- a/.github/workflows/docker/compose/CodeGen-compose.yaml
+++ b/.github/workflows/docker/compose/CodeGen-compose.yaml
@@ -0,0 +1,20 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# this file should be run in the root of the repo
+services:
+  codegen:
+    build:
+      context: docker
+      dockerfile: ./Dockerfile
+    image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
+  codegen-ui:
+    build:
+      context: docker/ui
+      dockerfile: ./docker/Dockerfile
+    image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest}
+  codegen-react-ui:
+    build:
+      context: docker/ui
+      dockerfile: ./docker/Dockerfile.react
+    image: ${REGISTRY:-opea}/codegen-conversation-ui:${TAG:-latest}
--- a/.github/workflows/docker/compose/CodeTrans-compose.yaml
+++ b/.github/workflows/docker/compose/CodeTrans-compose.yaml
@@ -0,0 +1,15 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# this file should be run in the root of the repo
+services:
+  codetrans:
+    build:
+      context: docker
+      dockerfile: ./Dockerfile
+    image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}
+  codetrans-ui:
+    build:
+      context: docker/ui
+      dockerfile: ./docker/Dockerfile
+    image: ${REGISTRY:-opea}/codetrans-ui:${TAG:-latest}
--- a/.github/workflows/docker/compose/DocSum-compose.yaml
+++ b/.github/workflows/docker/compose/DocSum-compose.yaml
@@ -0,0 +1,20 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# this file should be run in the root of the repo
+services:
+  docsum:
+    build:
+      context: docker
+      dockerfile: ./Dockerfile
+    image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
+  docsum-ui:
+    build:
+      context: docker/ui
+      dockerfile: ./docker/Dockerfile
+    image: ${REGISTRY:-opea}/docsum-ui:${TAG:-latest}
+  docsum-react-ui:
+    build:
+      context: docker/ui
+      dockerfile: ./docker/Dockerfile.react
+    image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest}
--- a/.github/workflows/docker/compose/FaqGen-compose.yaml
+++ b/.github/workflows/docker/compose/FaqGen-compose.yaml
@@ -0,0 +1,20 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# this file should be run in the root of the repo
+services:
+  faqgen:
+    build:
+      context: docker
+      dockerfile: ./Dockerfile
+    image: ${REGISTRY:-opea}/faqgen:${TAG:-latest}
+  faqgen-ui:
+    build:
+      context: docker/ui
+      dockerfile: ./docker/Dockerfile
+    image: ${REGISTRY:-opea}/faqgen-ui:${TAG:-latest}
+  faqgen-react-ui:
+    build:
+      context: docker/ui
+      dockerfile: ./docker/Dockerfile.react
+    image: ${REGISTRY:-opea}/faqgen-react-ui:${TAG:-latest}
--- a/.github/workflows/docker/compose/SearchQnA-compose.yaml
+++ b/.github/workflows/docker/compose/SearchQnA-compose.yaml
@@ -0,0 +1,15 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# this file should be run in the root of the repo
+services:
+  searchqna:
+    build:
+      context: docker
+      dockerfile: ./Dockerfile
+    image: ${REGISTRY:-opea}/searchqna:${TAG:-latest}
+  searchqna-ui:
+    build:
+      context: docker/ui
+      dockerfile: ./docker/Dockerfile
+    image: ${REGISTRY:-opea}/searchqna-ui:${TAG:-latest}
--- a/.github/workflows/docker/compose/Translation-compose.yaml
+++ b/.github/workflows/docker/compose/Translation-compose.yaml
@@ -0,0 +1,15 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# this file should be run in the root of the repo
+services:
+  translation:
+    build:
+      context: docker
+      dockerfile: ./Dockerfile
+    image: ${REGISTRY:-opea}/translation:${TAG:-latest}
+  translation-ui:
+    build:
+      context: docker/ui
+      dockerfile: ./docker/Dockerfile
+    image: ${REGISTRY:-opea}/translation-ui:${TAG:-latest}
--- a/.github/workflows/image-build-on-push.yml
+++ b/.github/workflows/image-build-on-push.yml
@@ -1,33 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-# Test
-name: Build latest images on push event
-
-on:
-  push:
-    branches: [ 'main' ]
-    paths:
-      - "**/docker/*.py"
-      - "**/docker/Dockerfile"
-      - "**/docker/ui/**"
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}-on-push
-  cancel-in-progress: true
-
-jobs:
-  job1:
-    uses: ./.github/workflows/reuse-get-test-matrix.yml
-
-  mega-image-build:
-    needs: job1
-    strategy:
-      matrix:
-        workload: ${{ fromJSON(needs.job1.outputs.run_matrix).include.*.example }}
-        hardware: ["gaudi","xeon"]
-    uses: ./.github/workflows/reuse-image-build.yml
-    with:
-      image_tag: latest
-      mega_service: "${{ matrix.workload }}"
-      runner_label: docker-build-${{ matrix.hardware }}
--- a/.github/workflows/manifest-e2e.yml
+++ b/.github/workflows/manifest-e2e.yml
@@ -1,111 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-name: E2E test with manifests
-
-on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
-    paths:
-      - "**/kubernetes/manifests/**"
-      - "**/tests/test_manifest**"
-      - "!**.md"
-      - "!**.txt"
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  job1:
-    uses: ./.github/workflows/reuse-get-test-matrix.yml
-    with:
-      diff_excluded_files: '.github|deprecated|docker|assets|*.md|*.txt'
-      xeon_server_label: 'xeon'
-      gaudi_server_label: 'gaudi'
-
-  mega-image-build:
-    needs: job1
-    strategy:
-      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
-    uses: ./.github/workflows/reuse-image-build.yml
-    with:
-      image_tag: ${{ github.event.pull_request.head.sha }}
-      mega_service: "${{ matrix.example }}"
-      runner_label: "docker-build-${{ matrix.hardware }}"
-
-  manifest-test:
-    needs: [job1, mega-image-build]
-    strategy:
-      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
-    runs-on: "k8s-${{ matrix.hardware }}"
-    continue-on-error: true
-    steps:
-      - name: E2e test manifest
-        run: |
-          echo "Matrix - manifest: ${{ matrix.example }}"
-
-      - name: Clean Up Working Directory
-        run: sudo rm -rf ${{github.workspace}}/*
-
-      - name: Checkout out Repo
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Set variables
-        run: |
-          if [ ${{ matrix.hardware }} == "gaudi" ]; then IMAGE_REPO=${{ vars.IMAGE_REPO_GAUDI }}; else IMAGE_REPO=${{ vars.IMAGE_REPO_XEON }}; fi
-          echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
-          echo "IMAGE_TAG=${{needs.mega-image-build.outputs.image_tag}}" >> $GITHUB_ENV
-          lower_example=$(echo "${{ matrix.example }}" | tr '[:upper:]' '[:lower:]')
-          echo "NAMESPACE=$lower_example-$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
-          echo "ROLLOUT_TIMEOUT_SECONDS=1800s" >> $GITHUB_ENV
-          echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
-          echo "continue_test=true" >> $GITHUB_ENV
-          echo "should_cleanup=false" >> $GITHUB_ENV
-          echo "skip_validate=true" >> $GITHUB_ENV
-          echo "NAMESPACE=$NAMESPACE"
-
-      - name: Kubectl install
-        id: install
-        run: |
-          if [[ ! -f ${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh ]]; then
-            echo "No test script found, exist test!"
-            exit 0
-          else
-            ${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh init_${{ matrix.example }}
-            echo "should_cleanup=true" >> $GITHUB_ENV
-            kubectl create ns $NAMESPACE
-            ${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh install_${{ matrix.example }} $NAMESPACE
-            echo "Testing ${{ matrix.example }}, waiting for pod ready..."
-            if kubectl rollout status deployment --namespace "$NAMESPACE" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
-              echo "Testing manifests ${{ matrix.example }}, waiting for pod ready done!"
-              echo "skip_validate=false" >> $GITHUB_ENV
-            else
-              echo "Timeout waiting for pods in namespace $NAMESPACE to be ready!"
-              exit 1
-            fi
-            sleep 60
-          fi
-
-      - name: Validate e2e test
-        if: always()
-        run: |
-          if $skip_validate; then
-            echo "Skip validate"
-          else
-            ${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh validate_${{ matrix.example }} $NAMESPACE
-          fi
-
-      - name: Kubectl uninstall
-        if: always()
-        run: |
-          if $should_cleanup; then
-            if ! kubectl delete ns $NAMESPACE --timeout=$KUBECTL_TIMEOUT_SECONDS; then
-              kubectl delete pods --namespace $NAMESPACE --force --grace-period=0 --all
-              kubectl delete ns $NAMESPACE --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
-            fi
-          fi
--- a/.github/workflows/manual-bom-scan.yml
+++ b/.github/workflows/manual-bom-scan.yml
@@ -0,0 +1,86 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Examples docker images BoM scan on manual event
+on:
+  workflow_dispatch:
+    inputs:
+      node:
+        default: "gaudi"
+        description: "Hardware to run test"
+        required: true
+        type: string
+      examples:
+        default: "ChatQnA"
+        description: 'List of examples to test [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
+        required: true
+        type: string
+      tag:
+        default: "latest"
+        description: "Tag to apply to images"
+        required: true
+        type: string
+
+permissions: read-all
+jobs:
+  get-image-list:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.scan-matrix.outputs.matrix }}
+    steps:
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+
+      - name: Set Matrix
+        id: scan-matrix
+        run: |
+          pip install yq
+          examples=($(echo ${{ inputs.examples }} | tr ',' ' '))
+          image_list=[]
+          for example in ${examples[@]}
+          do
+              images=$(cat ${{ github.workspace }}/${example}/docker/docker_build_compose.yaml | yq -r '.[]' | jq 'keys' | jq -c '.')
+              image_list=$(echo ${image_list} | jq -s '.[0] + .[1] | unique' - <(echo ${images}))
+          done
+          echo "matrix=$(echo ${image_list} | jq -c '.')" >> $GITHUB_OUTPUT
+
+  scan-license:
+    needs: get-image-list
+    runs-on: "docker-build-${{ inputs.node }}"
+    strategy:
+      matrix:
+        image: ${{ fromJson(needs.get-image-list.outputs.matrix) }}
+      fail-fast: false
+    steps:
+      - name: Pull Image
+        run: |
+          docker pull ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}
+          echo "OPEA_IMAGE_REPO=${OPEA_IMAGE_REPO}" >> $GITHUB_ENV
+
+      - name: SBOM Scan Container
+        uses: anchore/sbom-action@v0.17.1
+        with:
+          image: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }}
+          output-file: ${{ matrix.image }}-sbom-scan.txt
+          format: 'spdx-json'
+
+      - name: Security Scan Container
+        uses: aquasecurity/trivy-action@0.24.0
+        with:
+          image-ref: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }}
+          output: ${{ matrix.image }}-trivy-scan.txt
+          format: 'table'
+          exit-code: '1'
+          ignore-unfixed: true
+          vuln-type: 'os,library'
+          severity: 'CRITICAL,HIGH'
+
+      - name: Cleanup
+        if: always()
+        run: docker rmi -f ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}
+
+      - uses: actions/upload-artifact@v4.3.4
+        with:
+          name: ${{ matrix.image }}-scan
+          path: ${{ matrix.image }}-*-scan.txt
+          overwrite: true
--- a/.github/workflows/manual-docker-publish.yml
+++ b/.github/workflows/manual-docker-publish.yml
@@ -0,0 +1,68 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Examples publish docker image on manual event
+on:
+  workflow_dispatch:
+    inputs:
+      nodes:
+        default: "gaudi"
+        description: "Hardware to run test"
+        required: true
+        type: string
+      examples:
+        default: "ChatQnA"
+        description: 'List of examples to test [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
+        required: true
+        type: string
+      tag:
+        default: "latest"
+        description: "Tag to apply to images"
+        required: true
+        type: string
+      publish:
+        default: false
+        description: 'Publish images to docker hub'
+        required: false
+        type: boolean
+      publish_tags:
+        default: "latest,v1.0"
+        description: 'Tag list apply to publish images'
+        required: false
+        type: string
+
+permissions: read-all
+jobs:
+  get-image-list:
+    runs-on: ${{ inputs.node }}
+    outputs:
+      matrix: ${{ steps.scan-matrix.outputs.matrix }}
+    steps:
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+
+      - name: Set Matrix
+        id: scan-matrix
+        run: |
+          examples=($(echo ${{ inputs.examples }} | tr ',' ' '))
+          image_list=[]
+          for example in ${examples[@]}
+          do
+              images=$(cat ${{ github.workspace }}/${example}/docker/docker_build_compose.yaml | yq -r '.[]' | jq 'keys' | jq -c '.')
+              image_list=$(echo ${image_list} | jq -s '.[0] + .[1] | unique' - <(echo ${images}))
+          done
+          echo "matrix=$(echo ${image_list} | jq -c '.')" >> $GITHUB_OUTPUT
+
+  publish:
+    needs: [get-image-list]
+    strategy:
+      matrix:
+        image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
+    runs-on: "docker-build-${{ inputs.node }}"
+    steps:
+      - name: Image Publish
+        uses: opea-project/validation/actions/image-publish@main
+        with:
+          local_image_ref: ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}
+          image_name: opea/${{ matrix.image }}
+          publish_tags: ${{ inputs.publish_tags }}
--- a/.github/workflows/manual-example-workflow.yml
+++ b/.github/workflows/manual-example-workflow.yml
@@ -0,0 +1,110 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Examples CD workflow on manual event
+on:
+  workflow_dispatch:
+    inputs:
+      nodes:
+        default: "gaudi,xeon"
+        description: "Hardware to run test"
+        required: true
+        type: string
+      examples:
+        default: "ChatQnA"
+        description: 'List of examples to test [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
+        required: true
+        type: string
+      tag:
+        default: "latest"
+        description: "Tag to apply to images"
+        required: true
+        type: string
+      deploy_gmc:
+        default: false
+        description: 'Whether to deploy gmc'
+        required: true
+        type: boolean
+      build:
+        default: true
+        description: 'Build test required images for Examples'
+        required: false
+        type: boolean
+      scan:
+        default: true
+        description: 'Scan all images with Trivy'
+        required: false
+        type: boolean
+      test_compose:
+        default: true
+        description: 'Test examples with docker compose'
+        required: false
+        type: boolean
+      test_k8s:
+        default: false
+        description: 'Test examples with k8s'
+        required: false
+        type: boolean
+      test_gmc:
+        default: false
+        description: 'Test examples with gmc'
+        required: false
+        type: boolean
+      opea_branch:
+        default: "main"
+        description: 'OPEA branch for image build'
+        required: false
+        type: string
+
+permissions: read-all
+jobs:
+  get-test-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      examples: ${{ steps.get-matrix.outputs.examples }}
+      nodes: ${{ steps.get-matrix.outputs.nodes }}
+    steps:
+    - name: Create Matrix
+      id: get-matrix
+      run: |
+        examples=($(echo ${{ inputs.examples }} | tr ',' ' '))
+        examples_json=$(printf '%s\n' "${examples[@]}" | sort -u | jq -R '.' | jq -sc '.')
+        echo "examples=$examples_json" >> $GITHUB_OUTPUT
+        nodes=($(echo ${{ inputs.nodes }} | tr ',' ' '))
+        nodes_json=$(printf '%s\n' "${nodes[@]}" | sort -u | jq -R '.' | jq -sc '.')
+        echo "nodes=$nodes_json" >> $GITHUB_OUTPUT
+
+  build-deploy-gmc:
+    needs: [get-test-matrix]
+    if: ${{ fromJSON(inputs.deploy_gmc) }}
+    strategy:
+      matrix:
+        node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
+      fail-fast: false
+    uses: ./.github/workflows/_gmc-workflow.yml
+    with:
+      node: ${{ matrix.node }}
+      tag: ${{ inputs.tag }}
+      opea_branch: ${{ inputs.opea_branch }}
+    secrets: inherit
+
+  run-examples:
+    needs: [get-test-matrix, build-deploy-gmc]
+    if: always()
+    strategy:
+      matrix:
+        example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }}
+        node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
+      fail-fast: false
+    uses: ./.github/workflows/_example-workflow.yml
+    with:
+      node: ${{ matrix.node }}
+      example: ${{ matrix.example }}
+      tag: ${{ inputs.tag }}
+      build: ${{ fromJSON(inputs.build) }}
+      scan: ${{ fromJSON(inputs.scan) }}
+      test_compose: ${{ fromJSON(inputs.test_compose) }}
+      test_k8s: ${{ fromJSON(inputs.test_k8s) }}
+      test_gmc: ${{ fromJSON(inputs.test_gmc) }}
+      opea_branch: ${{ inputs.opea_branch }}
+    secrets: inherit
--- a/.github/workflows/manual-freeze-images.yml
+++ b/.github/workflows/manual-freeze-images.yml
@@ -0,0 +1,43 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Freeze base images and 3rd party images on manual event
+
+on:
+  workflow_dispatch:
+
+jobs:
+  freeze-images:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          ref: ${{ github.ref }}
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: install skopeo
+        run: |
+          sudo apt update
+          sudo apt -y install skopeo
+
+      - name: Set up Git
+        run: |
+          git config --global user.name "NeuralChatBot"
+          git config --global user.email "grp_neural_chat_bot@intel.com"
+          git remote set-url origin https://NeuralChatBot:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
+
+      - name: Run script
+        run: |
+          bash .github/workflows/scripts/freeze_images.sh
+
+      - name: Commit changes
+        run: |
+          git add .
+          git commit -s -m "Freeze third party images tag"
+          git push
--- a/.github/workflows/manual-freeze-tag.yml
+++ b/.github/workflows/manual-freeze-tag.yml
@@ -0,0 +1,46 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Freeze OPEA images release tag in readme on manual event
+
+on:
+  workflow_dispatch:
+    inputs:
+      tag:
+        default: "latest"
+        description: "Tag to apply to images"
+        required: true
+        type: string
+
+jobs:
+  freeze-tag:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          ref: ${{ github.ref }}
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Set up Git
+        run: |
+          git config --global user.name "NeuralChatBot"
+          git config --global user.email "grp_neural_chat_bot@intel.com"
+          git remote set-url origin https://NeuralChatBot:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
+
+      - name: Run script
+        run: |
+          find . -name "*.md" | xargs sed -i "s|^docker\ compose|TAG=${{ github.event.inputs.tag }}\ docker\ compose|g"
+          find . -type f -name "*.yaml" \( -path "*/benchmark/*" -o -path "*/kubernetes/*" \) | xargs sed -i -E 's/(opea\/[A-Za-z0-9\-]*:)latest/\1${{ github.event.inputs.tag }}/g'
+          find . -type f -name "*.md" \( -path "*/benchmark/*" -o -path "*/kubernetes/*" \) | xargs sed -i -E 's/(opea\/[A-Za-z0-9\-]*:)latest/\1${{ github.event.inputs.tag }}/g'
+
+      - name: Commit changes
+        run: |
+          git add .
+          git commit -s -m "Freeze OPEA images tag"
+          git push
--- a/.github/workflows/manual-image-build.yml
+++ b/.github/workflows/manual-image-build.yml
@@ -0,0 +1,78 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Build latest images on manual event
+
+on:
+  workflow_dispatch:
+    inputs:
+      registry:
+        default: ""
+        description: "Registry to store images,e.g., docker.io, default is empty"
+        required: false
+        type: string
+      services:
+        default: "AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation"
+        description: "List of examples to build"
+        required: true
+        type: string
+      tag:
+        default: "latest"
+        description: "Tag to apply to images"
+        required: true
+        type: string
+      nodes:
+        default: "docker-build-xeon,docker-build-gaudi"
+        description: "List of node to run the build on"
+        required: true
+        type: string
+
+jobs:
+  get-build-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      services: ${{ steps.get-services.outputs.services }}
+      nodes: ${{ steps.get-services.outputs.nodes }}
+    steps:
+      - name: Get test Services
+        id: get-services
+        run: |
+          set -x
+          service_list=($(echo ${{ github.event.inputs.services }} | tr ',' ' '))
+          services=$(printf '%s\n' "${service_list[@]}" | sort -u | jq -R '.' | jq -sc '.')
+          echo "services=$services" >> $GITHUB_OUTPUT
+          node_list=($(echo ${{ github.event.inputs.nodes }} | tr ',' ' '))
+          nodes=$(printf '%s\n' "${node_list[@]}" | sort -u | jq -R '.' | jq -sc '.')
+          echo "nodes=$nodes" >> $GITHUB_OUTPUT
+
+  image-build:
+    needs: get-build-matrix
+    strategy:
+      matrix:
+        service: ${{ fromJSON(needs.get-build-matrix.outputs.services) }}
+        node: ${{ fromJSON(needs.get-build-matrix.outputs.nodes) }}
+    runs-on: ${{ matrix.node }}
+    continue-on-error: true
+    steps:
+      - name: Clean Up Working Directory
+        run: |
+          sudo rm -rf ${{github.workspace}}/*
+
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+
+      - name: Config image repo
+        run: |
+          if [[ -z "${{ github.event.inputs.registry }}" ]]; then
+            echo "image_repo=${OPEA_IMAGE_REPO}" >> $GITHUB_ENV
+          else
+            echo "image_repo=${{ github.event.inputs.registry }}/" >> $GITHUB_ENV
+          fi
+
+      - name: Build image
+        uses: opea-project/validation/actions/image-build@main
+        with:
+          work_dir: ${{ github.workspace }}/${{ matrix.service }}
+          docker_compose_path: ${{ github.workspace }}/.github/workflows/docker/compose/${{ matrix.service }}-compose.yaml
+          registry: ${{ env.image_repo }}opea
+          tag: ${{ github.event.inputs.tag }}
--- a/.github/workflows/manual-trellix.yml
+++ b/.github/workflows/manual-trellix.yml
--- a/.github/workflows/pr-bum_list_check.yml
+++ b/.github/workflows/pr-bum_list_check.yml
--- a/.github/workflows/pr-code-scan.yml
+++ b/.github/workflows/pr-code-scan.yml
--- a/.github/workflows/pr-docker-compose-e2e.yml
+++ b/.github/workflows/pr-docker-compose-e2e.yml
@@ -0,0 +1,40 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: E2E test with docker compose
+
+on:
+  pull_request_target:
+    branches: [main]
+    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
+    paths:
+      - "**/docker/**"
+      - "**/tests/**"
+      - "**/ui/**"
+      - "!**.md"
+      - "!**.txt"
+      - .github/workflows/pr-docker-compose-e2e.yml
+
+# If there is a new commit, the previous jobs will be canceled
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  get-test-matrix:
+    uses: ./.github/workflows/_get-test-matrix.yml
+    with:
+      diff_excluded_files: '.github|README.md|*.txt|deprecate|kubernetes|manifest|gmc|assets'
+
+  example-test:
+    needs: [get-test-matrix]
+    strategy:
+      matrix: ${{ fromJSON(needs.get-test-matrix.outputs.run_matrix) }}
+      fail-fast: false
+    uses: ./.github/workflows/_run-docker-compose.yml
+    with:
+      registry: "opea"
+      tag: "ci"
+      example: ${{ matrix.example }}
+      hardware: ${{ matrix.hardware }}
+    secrets: inherit
--- a/.github/workflows/pr-gmc-e2e.yaml
+++ b/.github/workflows/pr-gmc-e2e.yaml
@@ -0,0 +1,38 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: E2E test with GMC
+
+on:
+  pull_request_target:
+    branches: [main]
+    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
+    paths:
+      - "**/kubernetes/**"
+      - "**/tests/test_gmc**"
+      - "!**.md"
+      - "!**.txt"
+      - "!**/kubernetes/manifests/**"
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  job1:
+    uses: ./.github/workflows/_get-test-matrix.yml
+    with:
+      diff_excluded_files: '.github|deprecated|docker|assets|*.md|*.txt'
+      xeon_server_label: 'xeon'
+      gaudi_server_label: 'gaudi'
+
+  gmc-test:
+    needs: [job1]
+    strategy:
+      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
+    uses: ./.github/workflows/_gmc-e2e.yml
+    with:
+      example: ${{ matrix.example }}
+      hardware: ${{ matrix.hardware }}
+    secrets: inherit
--- a/.github/workflows/pr-manifest-e2e.yml
+++ b/.github/workflows/pr-manifest-e2e.yml
@@ -0,0 +1,48 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: E2E test with manifests
+
+on:
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
+    paths:
+      - "**/kubernetes/manifests/**"
+      - "**/tests/test_manifest**"
+      - "!**.md"
+      - "!**.txt"
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  job1:
+    uses: ./.github/workflows/_get-test-matrix.yml
+    with:
+      diff_excluded_files: '.github|deprecated|docker|assets|*.md|*.txt'
+      xeon_server_label: 'xeon'
+      gaudi_server_label: 'gaudi'
+
+  mega-image-build:
+    needs: job1
+    strategy:
+      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
+    uses: ./.github/workflows/_image-build.yml
+    with:
+      image_tag: ${{ github.event.pull_request.head.sha }}
+      mega_service: "${{ matrix.example }}"
+      runner_label: "docker-build-${{ matrix.hardware }}"
+
+  manifest-test:
+    needs: [job1, mega-image-build]
+    strategy:
+      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
+    uses: ./.github/workflows/_manifest-e2e.yml
+    with:
+      example: ${{ matrix.example }}
+      hardware: ${{ matrix.hardware }}
+      tag: ${{ needs.mega-image-build.outputs.image_tag }}
+    secrets: inherit
--- a/.github/workflows/pr-manifest-validate.yml
+++ b/.github/workflows/pr-manifest-validate.yml
--- a/.github/workflows/pr-path_detection.yml
+++ b/.github/workflows/pr-path_detection.yml
--- a/.github/workflows/push-image-build.yml
+++ b/.github/workflows/push-image-build.yml
@@ -0,0 +1,57 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+# Test
+name: Build latest images on push event
+
+on:
+  push:
+    branches: [ 'main' ]
+    paths:
+      - "**/docker/*.py"
+      - "**/docker/Dockerfile"
+      - "**/docker/ui/**"
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}-on-push
+  cancel-in-progress: true
+
+jobs:
+  job1:
+    uses: ./.github/workflows/_get-test-matrix.yml
+
+  mega-image-build:
+    needs: job1
+    strategy:
+      matrix:
+        workload: ${{ fromJSON(needs.job1.outputs.run_matrix).include.*.example }}
+        hardware: ["gaudi","xeon"]
+    runs-on: docker-build-${{ matrix.hardware }}
+    steps:
+      - name: Clean up Working Directory
+        run: |
+          sudo rm -rf ${{github.workspace}}/*
+
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+
+      - name: Check Docker Compose File Exists
+        env:
+          service: ${{ matrix.workload }}
+        run: |
+          docker_compose_path="${{ github.workspace }}/.github/workflows/docker/compose/${service}-compose.yaml"
+          if [ -e $docker_compose_path ]; then
+            echo "file_exists=true" >> $GITHUB_ENV
+            echo "docker_compose_path=${docker_compose_path}" >> $GITHUB_ENV
+          else
+            echo "file_exists=false" >> $GITHUB_ENV
+            echo "docker_compose_path=${docker_compose_path} for this service does not exist, so skipping image build for this service!!!"
+          fi
+
+      - name: Build Image
+        if: env.file_exists == 'true'
+        uses: opea-project/validation/actions/image-build@main
+        with:
+          work_dir: ${{ github.workspace }}/${{ matrix.workload }}
+          docker_compose_path: ${{ env.docker_compose_path }}
+          registry: ${OPEA_IMAGE_REPO}opea
--- a/.github/workflows/push-images-path-detection.yml
+++ b/.github/workflows/push-images-path-detection.yml
--- a/.github/workflows/push-infra-issue-creation.yml
+++ b/.github/workflows/push-infra-issue-creation.yml
--- a/.github/workflows/scripts/freeze_images.sh
+++ b/.github/workflows/scripts/freeze_images.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+declare -A dict
+dict["langchain/langchain"]="docker://docker.io/langchain/langchain"
+dict["ghcr.io/huggingface/text-generation-inference"]="docker://ghcr.io/huggingface/text-generation-inference"
+
+function get_latest_version() {
+    repo_image=$1
+    versions=$(skopeo list-tags ${dict[$repo_image]} | jq -r '.Tags[]')
+    printf "version list:\n$versions\n"
+    latest_version=$(printf "%s\n" "${versions[@]}" | grep -E '^[\.0-9\-]+$' | sort -V | tail -n 1)
+    echo "latest version: $latest_version"
+    replace_image_version $repo_image $latest_version
+}
+
+function replace_image_version() {
+    repo_image=$1
+    version=$2
+    if [[ -z "$version" ]]; then
+        echo "version is empty"
+    else
+        echo "replace $repo_image:latest with $repo_image:$version"
+        find . -name "Dockerfile" | xargs sed -i "s|$repo_image:latest.*|$repo_image:$version|g"
+        find . -name "*.yaml" | xargs sed -i "s|$repo_image:latest[A-Za-z0-9\-]*|$repo_image:$version|g"
+        find . -name "*.md" | xargs sed -i "s|$repo_image:latest[A-Za-z0-9\-]*|$repo_image:$version|g"
+    fi
+}
+
+function check_branch_name() {
+    if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+        echo "$GITHUB_REF_NAME is protected branch"
+        exit 0
+    else
+        echo "branch name is $GITHUB_REF_NAME"
+    fi
+}
+
+function main() {
+    check_branch_name
+    for repo_image in "${!dict[@]}"; do
+        echo "::group::check $repo_image"
+        get_latest_version $repo_image
+        echo "::endgroup::"
+    done
+}
+
+main
--- a/AgentQnA/README.md
+++ b/AgentQnA/README.md
@@ -0,0 +1,106 @@
+# Agents for Question Answering
+
+## Overview
+
+This example showcases a hierarchical multi-agent system for question-answering applications. The architecture diagram is shown below. The supervisor agent interfaces with the user and dispatch tasks to the worker agent and other tools to gather information and come up with answers. The worker agent uses the retrieval tool to generate answers to the queries posted by the supervisor agent. Other tools used by the supervisor agent may include APIs to interface knowledge graphs, SQL databases, external knowledge bases, etc.
+![Architecture Overview](assets/agent_qna_arch.png)
+
+### Why Agent for question answering?
+
+1. Improve relevancy of retrieved context.
+   Agent can rephrase user queries, decompose user queries, and iterate to get the most relevant context for answering user's questions. Compared to conventional RAG, RAG agent can significantly improve the correctness and relevancy of the answer.
+2. Use tools to get additional knowledge.
+   For example, knowledge graphs and SQL databases can be exposed as APIs for Agents to gather knowledge that may be missing in the retrieval vector database.
+3. Hierarchical agent can further improve performance.
+   Expert worker agents, such as retrieval agent, knowledge graph agent, SQL agent, etc., can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information together to provide a comprehensive answer.
+
+### Roadmap
+
+- v0.9: Worker agent uses open-source websearch tool (duckduckgo), agents use OpenAI GPT-4o-mini as llm backend.
+- v1.0: Worker agent uses OPEA retrieval megaservice as tool.
+- v1.0 or later: agents use open-source llm backend.
+- v1.1 or later: add safeguards
+
+## Getting started
+
+1. Build agent docker image </br>
+   First, clone the opea GenAIComps repo
+
+```
+export WORKDIR=<your-work-directory>
+cd $WORKDIR
+git clone https://github.com/opea-project/GenAIComps.git
+```
+
+Then build the agent docker image. Both the supervisor agent and the worker agent will use the same docker image, but when we launch the two agents we will specify different strategies and register different tools.
+
+```
+cd GenAIComps
+docker build -t opea/comps-agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/docker/Dockerfile .
+```
+
+2. Launch tool services </br>
+   In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
+
+```
+docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
+```
+
+3. Set up environment for this example </br>
+   First, clone this repo
+
+```
+cd $WORKDIR
+git clone https://github.com/opea-project/GenAIExamples.git
+```
+
+Second, set up env vars
+
+```
+export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
+# optional: OPANAI_API_KEY
+export OPENAI_API_KEY=<your-openai-key>
+```
+
+4. Launch agent services</br>
+   The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and we plan to add support for llama3.1-70B-instruct (served by TGI-Gaudi) in a subsequent release.
+   To use openai llm, run command below.
+
+```
+cd docker/openai/
+bash launch_agent_service_openai.sh
+```
+
+## Validate services
+
+First look at logs of the agent docker containers:
+
+```
+docker logs docgrader-agent-endpoint
+```
+
+```
+docker logs react-agent-endpoint
+```
+
+You should see something like "HTTP server setup successful" if the docker containers are started successfully.</p>
+
+Second, validate worker agent:
+
+```
+curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+     "query": "Most recent album by Taylor Swift"
+    }'
+```
+
+Third, validate supervisor agent:
+
+```
+curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+     "query": "Most recent album by Taylor Swift"
+    }'
+```
+
+## How to register your own tools with agent
+
+You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/minmin-intel/GenAIComps/tree/agent-comp-dev/comps/agent/langchain#-4-provide-your-own-tools).
--- a/AgentQnA/assets/agent_qna_arch.png
+++ b/AgentQnA/assets/agent_qna_arch.png
--- a/AgentQnA/docker/openai/docker-compose-agent-openai.yaml
+++ b/AgentQnA/docker/openai/docker-compose-agent-openai.yaml
@@ -0,0 +1,63 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  worker-docgrader-agent:
+    image: opea/comps-agent-langchain:latest
+    container_name: docgrader-agent-endpoint
+    volumes:
+      - ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
+      - ${TOOLSET_PATH}:/home/user/tools/
+    ports:
+      - "9095:9095"
+    ipc: host
+    environment:
+      ip_address: ${ip_address}
+      strategy: rag_agent
+      recursion_limit: ${recursion_limit}
+      llm_engine: openai
+      OPENAI_API_KEY: ${OPENAI_API_KEY}
+      model: ${model}
+      temperature: ${temperature}
+      max_new_tokens: ${max_new_tokens}
+      streaming: false
+      tools: /home/user/tools/worker_agent_tools.yaml
+      require_human_feedback: false
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-worker-agent-service"
+      port: 9095
+
+  supervisor-react-agent:
+    image: opea/comps-agent-langchain:latest
+    container_name: react-agent-endpoint
+    volumes:
+      - ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
+      - ${TOOLSET_PATH}:/home/user/tools/
+    ports:
+      - "9090:9090"
+    ipc: host
+    environment:
+      ip_address: ${ip_address}
+      strategy: react_langgraph
+      recursion_limit: ${recursion_limit}
+      llm_engine: openai
+      OPENAI_API_KEY: ${OPENAI_API_KEY}
+      model: ${model}
+      temperature: ${temperature}
+      max_new_tokens: ${max_new_tokens}
+      streaming: ${streaming}
+      tools: /home/user/tools/supervisor_agent_tools.yaml
+      require_human_feedback: false
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-supervisor-agent-service"
+      CRAG_SERVER: $CRAG_SERVER
+      WORKER_AGENT_URL: $WORKER_AGENT_URL
+      port: 9090
--- a/AgentQnA/docker/openai/launch_agent_service_openai.sh
+++ b/AgentQnA/docker/openai/launch_agent_service_openai.sh
@@ -0,0 +1,13 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+export ip_address=$(hostname -I | awk '{print $1}')
+export recursion_limit=12
+export model="gpt-4o-mini-2024-07-18"
+export temperature=0
+export max_new_tokens=512
+export OPENAI_API_KEY=${OPENAI_API_KEY}
+export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
+export CRAG_SERVER=http://${ip_address}:8080
+
+docker compose -f docker-compose-agent-openai.yaml up -d
--- a/AgentQnA/tests/_test_agentqna_on_xeon.sh
+++ b/AgentQnA/tests/_test_agentqna_on_xeon.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+echo "IMAGE_REPO=${IMAGE_REPO}"
+echo "OPENAI_API_KEY=${OPENAI_API_KEY}"
+
+WORKPATH=$(dirname "$PWD")
+export WORKDIR=$WORKPATH/../../
+echo "WORKDIR=${WORKDIR}"
+export ip_address=$(hostname -I | awk '{print $1}')
+export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
+
+function build_agent_docker_image() {
+    cd $WORKDIR
+    if [ ! -d "GenAIComps" ] ; then
+        git clone https://github.com/opea-project/GenAIComps.git
+    fi
+    cd GenAIComps
+    echo PWD: $(pwd)
+    docker build -t opea/comps-agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/docker/Dockerfile .
+}
+
+function start_services() {
+    echo "Starting CRAG server"
+    docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
+    echo "Starting Agent services"
+    cd $WORKDIR/GenAIExamples/AgentQnA/docker/openai
+    bash launch_agent_service_openai.sh
+}
+
+function validate() {
+    local CONTENT="$1"
+    local EXPECTED_RESULT="$2"
+    local SERVICE_NAME="$3"
+
+    if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
+        echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT"
+        echo 0
+    else
+        echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
+        echo 1
+    fi
+}
+
+
+function run_tests() {
+    echo "----------------Test supervisor agent ----------------"
+    local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+     "query": "Most recent album by Taylor Swift"
+    }')
+    local EXIT_CODE=$(validate "$CONTENT" "Taylor" "react-agent-endpoint")
+    docker logs react-agent-endpoint
+    if [ "$EXIT_CODE" == "1" ]; then
+        exit 1
+    fi
+
+}
+
+function stop_services() {
+    echo "Stopping CRAG server"
+    docker stop $(docker ps -q --filter ancestor=docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0)
+    echo "Stopping Agent services"
+    docker stop $(docker ps -q --filter ancestor=opea/comps-agent-langchain:latest)
+}
+
+function main() {
+    build_agent_docker_image
+    start_services
+    run_tests
+    stop_services
+}
+
+main
--- a/AgentQnA/tools/pycragapi.py
+++ b/AgentQnA/tools/pycragapi.py
@@ -0,0 +1,330 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import json
+import os
+from typing import List
+
+import requests
+
+
+class CRAG(object):
+    """A client for interacting with the CRAG server, offering methods to query various domains such as Open, Movie, Finance, Music, and Sports. Each method corresponds to an API endpoint on the CRAG server.
+
+    Attributes:
+        server (str): The base URL of the CRAG server. Defaults to "http://127.0.0.1:8080".
+
+    Methods:
+        open_search_entity_by_name(query: str) -> dict: Search for entities by name in the Open domain.
+        open_get_entity(entity: str) -> dict: Retrieve detailed information about an entity in the Open domain.
+        movie_get_person_info(person_name: str) -> dict: Get information about a person related to movies.
+        movie_get_movie_info(movie_name: str) -> dict: Get information about a movie.
+        movie_get_year_info(year: str) -> dict: Get information about movies released in a specific year.
+        movie_get_movie_info_by_id(movie_id: int) -> dict: Get movie information by its unique ID.
+        movie_get_person_info_by_id(person_id: int) -> dict: Get person information by their unique ID.
+        finance_get_company_name(query: str) -> dict: Search for company names in the finance domain.
+        finance_get_ticker_by_name(query: str) -> dict: Retrieve the ticker symbol for a given company name.
+        finance_get_price_history(ticker_name: str) -> dict: Get the price history for a given ticker symbol.
+        finance_get_detailed_price_history(ticker_name: str) -> dict: Get detailed price history for a ticker symbol.
+        finance_get_dividends_history(ticker_name: str) -> dict: Get dividend history for a ticker symbol.
+        finance_get_market_capitalization(ticker_name: str) -> dict: Retrieve market capitalization for a ticker symbol.
+        finance_get_eps(ticker_name: str) -> dict: Get earnings per share (EPS) for a ticker symbol.
+        finance_get_pe_ratio(ticker_name: str) -> dict: Get the price-to-earnings (PE) ratio for a ticker symbol.
+        finance_get_info(ticker_name: str) -> dict: Get financial information for a ticker symbol.
+        music_search_artist_entity_by_name(artist_name: str) -> dict: Search for music artists by name.
+        music_search_song_entity_by_name(song_name: str) -> dict: Search for songs by name.
+        music_get_billboard_rank_date(rank: int, date: str = None) -> dict: Get Billboard ranking for a specific rank and date.
+        music_get_billboard_attributes(date: str, attribute: str, song_name: str) -> dict: Get attributes of a song from Billboard rankings.
+        music_grammy_get_best_artist_by_year(year: int) -> dict: Get the Grammy Best New Artist for a specific year.
+        music_grammy_get_award_count_by_artist(artist_name: str) -> dict: Get the total Grammy awards won by an artist.
+        music_grammy_get_award_count_by_song(song_name: str) -> dict: Get the total Grammy awards won by a song.
+        music_grammy_get_best_song_by_year(year: int) -> dict: Get the Grammy Song of the Year for a specific year.
+        music_grammy_get_award_date_by_artist(artist_name: str) -> dict: Get the years an artist won a Grammy award.
+        music_grammy_get_best_album_by_year(year: int) -> dict: Get the Grammy Album of the Year for a specific year.
+        music_grammy_get_all_awarded_artists() -> dict: Get all artists awarded the Grammy Best New Artist.
+        music_get_artist_birth_place(artist_name: str) -> dict: Get the birthplace of an artist.
+        music_get_artist_birth_date(artist_name: str) -> dict: Get the birth date of an artist.
+        music_get_members(band_name: str) -> dict: Get the member list of a band.
+        music_get_lifespan(artist_name: str) -> dict: Get the lifespan of an artist.
+        music_get_song_author(song_name: str) -> dict: Get the author of a song.
+        music_get_song_release_country(song_name: str) -> dict: Get the release country of a song.
+        music_get_song_release_date(song_name: str) -> dict: Get the release date of a song.
+        music_get_artist_all_works(artist_name: str) -> dict: Get all works by an artist.
+        sports_soccer_get_games_on_date(team_name: str, date: str) -> dict: Get soccer games on a specific date.
+        sports_nba_get_games_on_date(team_name: str, date: str) -> dict: Get NBA games on a specific date.
+        sports_nba_get_play_by_play_data_by_game_ids(game_ids: List[str]) -> dict: Get NBA play by play data for a set of game ids.
+
+    Note:
+        Each method performs a POST request to the corresponding API endpoint and returns the response as a JSON dictionary.
+    """
+
+    def __init__(self):
+        self.server = os.environ.get("CRAG_SERVER", "http://127.0.0.1:8080")
+
+    def open_search_entity_by_name(self, query: str):
+        url = self.server + "/open/search_entity_by_name"
+        headers = {"accept": "application/json"}
+        data = {"query": query}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def open_get_entity(self, entity: str):
+        url = self.server + "/open/get_entity"
+        headers = {"accept": "application/json"}
+        data = {"query": entity}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def movie_get_person_info(self, person_name: str):
+        url = self.server + "/movie/get_person_info"
+        headers = {"accept": "application/json"}
+        data = {"query": person_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def movie_get_movie_info(self, movie_name: str):
+        url = self.server + "/movie/get_movie_info"
+        headers = {"accept": "application/json"}
+        data = {"query": movie_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def movie_get_year_info(self, year: str):
+        url = self.server + "/movie/get_year_info"
+        headers = {"accept": "application/json"}
+        data = {"query": year}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def movie_get_movie_info_by_id(self, movid_id: int):
+        url = self.server + "/movie/get_movie_info_by_id"
+        headers = {"accept": "application/json"}
+        data = {"query": movid_id}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def movie_get_person_info_by_id(self, person_id: int):
+        url = self.server + "/movie/get_person_info_by_id"
+        headers = {"accept": "application/json"}
+        data = {"query": person_id}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def finance_get_company_name(self, query: str):
+        url = self.server + "/finance/get_company_name"
+        headers = {"accept": "application/json"}
+        data = {"query": query}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def finance_get_ticker_by_name(self, query: str):
+        url = self.server + "/finance/get_ticker_by_name"
+        headers = {"accept": "application/json"}
+        data = {"query": query}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def finance_get_price_history(self, ticker_name: str):
+        url = self.server + "/finance/get_price_history"
+        headers = {"accept": "application/json"}
+        data = {"query": ticker_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def finance_get_detailed_price_history(self, ticker_name: str):
+        url = self.server + "/finance/get_detailed_price_history"
+        headers = {"accept": "application/json"}
+        data = {"query": ticker_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def finance_get_dividends_history(self, ticker_name: str):
+        url = self.server + "/finance/get_dividends_history"
+        headers = {"accept": "application/json"}
+        data = {"query": ticker_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def finance_get_market_capitalization(self, ticker_name: str):
+        url = self.server + "/finance/get_market_capitalization"
+        headers = {"accept": "application/json"}
+        data = {"query": ticker_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def finance_get_eps(self, ticker_name: str):
+        url = self.server + "/finance/get_eps"
+        headers = {"accept": "application/json"}
+        data = {"query": ticker_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def finance_get_pe_ratio(self, ticker_name: str):
+        url = self.server + "/finance/get_pe_ratio"
+        headers = {"accept": "application/json"}
+        data = {"query": ticker_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def finance_get_info(self, ticker_name: str):
+        url = self.server + "/finance/get_info"
+        headers = {"accept": "application/json"}
+        data = {"query": ticker_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_search_artist_entity_by_name(self, artist_name: str):
+        url = self.server + "/music/search_artist_entity_by_name"
+        headers = {"accept": "application/json"}
+        data = {"query": artist_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_search_song_entity_by_name(self, song_name: str):
+        url = self.server + "/music/search_song_entity_by_name"
+        headers = {"accept": "application/json"}
+        data = {"query": song_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_get_billboard_rank_date(self, rank: int, date: str = None):
+        url = self.server + "/music/get_billboard_rank_date"
+        headers = {"accept": "application/json"}
+        data = {"rank": rank, "date": date}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_get_billboard_attributes(self, date: str, attribute: str, song_name: str):
+        url = self.server + "/music/get_billboard_attributes"
+        headers = {"accept": "application/json"}
+        data = {"date": date, "attribute": attribute, "song_name": song_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_grammy_get_best_artist_by_year(self, year: int):
+        url = self.server + "/music/grammy_get_best_artist_by_year"
+        headers = {"accept": "application/json"}
+        data = {"query": year}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_grammy_get_award_count_by_artist(self, artist_name: str):
+        url = self.server + "/music/grammy_get_award_count_by_artist"
+        headers = {"accept": "application/json"}
+        data = {"query": artist_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_grammy_get_award_count_by_song(self, song_name: str):
+        url = self.server + "/music/grammy_get_award_count_by_song"
+        headers = {"accept": "application/json"}
+        data = {"query": song_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_grammy_get_best_song_by_year(self, year: int):
+        url = self.server + "/music/grammy_get_best_song_by_year"
+        headers = {"accept": "application/json"}
+        data = {"query": year}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_grammy_get_award_date_by_artist(self, artist_name: str):
+        url = self.server + "/music/grammy_get_award_date_by_artist"
+        headers = {"accept": "application/json"}
+        data = {"query": artist_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_grammy_get_best_album_by_year(self, year: int):
+        url = self.server + "/music/grammy_get_best_album_by_year"
+        headers = {"accept": "application/json"}
+        data = {"query": year}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_grammy_get_all_awarded_artists(self):
+        url = self.server + "/music/grammy_get_all_awarded_artists"
+        headers = {"accept": "application/json"}
+        result = requests.post(url, headers=headers)
+        return json.loads(result.text)
+
+    def music_get_artist_birth_place(self, artist_name: str):
+        url = self.server + "/music/get_artist_birth_place"
+        headers = {"accept": "application/json"}
+        data = {"query": artist_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_get_artist_birth_date(self, artist_name: str):
+        url = self.server + "/music/get_artist_birth_date"
+        headers = {"accept": "application/json"}
+        data = {"query": artist_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_get_members(self, band_name: str):
+        url = self.server + "/music/get_members"
+        headers = {"accept": "application/json"}
+        data = {"query": band_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_get_lifespan(self, artist_name: str):
+        url = self.server + "/music/get_lifespan"
+        headers = {"accept": "application/json"}
+        data = {"query": artist_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_get_song_author(self, song_name: str):
+        url = self.server + "/music/get_song_author"
+        headers = {"accept": "application/json"}
+        data = {"query": song_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_get_song_release_country(self, song_name: str):
+        url = self.server + "/music/get_song_release_country"
+        headers = {"accept": "application/json"}
+        data = {"query": song_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_get_song_release_date(self, song_name: str):
+        url = self.server + "/music/get_song_release_date"
+        headers = {"accept": "application/json"}
+        data = {"query": song_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def music_get_artist_all_works(self, song_name: str):
+        url = self.server + "/music/get_artist_all_works"
+        headers = {"accept": "application/json"}
+        data = {"query": song_name}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def sports_soccer_get_games_on_date(self, date: str, team_name: str = None):
+        url = self.server + "/sports/soccer/get_games_on_date"
+        headers = {"accept": "application/json"}
+        data = {"team_name": team_name, "date": date}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def sports_nba_get_games_on_date(self, date: str, team_name: str = None):
+        url = self.server + "/sports/nba/get_games_on_date"
+        headers = {"accept": "application/json"}
+        data = {"team_name": team_name, "date": date}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
+
+    def sports_nba_get_play_by_play_data_by_game_ids(self, game_ids: List[str]):
+        url = self.server + "/sports/nba/get_play_by_play_data_by_game_ids"
+        headers = {"accept": "application/json"}
+        data = {"game_ids": game_ids}
+        result = requests.post(url, json=data, headers=headers)
+        return json.loads(result.text)
--- a/AgentQnA/tools/supervisor_agent_tools.yaml
+++ b/AgentQnA/tools/supervisor_agent_tools.yaml
@@ -0,0 +1,59 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+search_knowledge_base:
+  description: Search knowledge base for a given query. Returns text related to the query.
+  callable_api: tools.py:search_knowledge_base
+  args_schema:
+    query:
+      type: str
+      description: query
+  return_output: retrieved_data
+
+get_artist_birth_place:
+  description: Get the birth place of an artist.
+  callable_api: tools.py:get_artist_birth_place
+  args_schema:
+    artist_name:
+      type: str
+      description: artist name
+  return_output: birth_place
+
+get_billboard_rank_date:
+  description: Get Billboard ranking for a specific rank and date.
+  callable_api: tools.py:get_billboard_rank_date
+  args_schema:
+    rank:
+      type: int
+      description: song name
+    date:
+      type: str
+      description: date
+  return_output: billboard_info
+
+get_song_release_date:
+  description: Get the release date of a song.
+  callable_api: tools.py:get_song_release_date
+  args_schema:
+    song_name:
+      type: str
+      description: song name
+  return_output: release_date
+
+get_members:
+  description: Get the member list of a band.
+  callable_api: tools.py:get_members
+  args_schema:
+    band_name:
+      type: str
+      description: band name
+  return_output: members
+
+get_grammy_best_artist_by_year:
+  description: Get the Grammy Best New Artist for a specific year.
+  callable_api: tools.py:get_grammy_best_artist_by_year
+  args_schema:
+    year:
+      type: int
+      description: year
+  return_output: grammy_best_new_artist
--- a/AgentQnA/tools/tools.py
+++ b/AgentQnA/tools/tools.py
@@ -0,0 +1,52 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+import requests
+from tools.pycragapi import CRAG
+
+
+def search_knowledge_base(query: str) -> str:
+    """Search the knowledge base for a specific query."""
+    # use worker agent (DocGrader) to search the knowledge base
+    url = os.environ.get("WORKER_AGENT_URL")
+    print(url)
+    proxies = {"http": ""}
+    payload = {
+        "query": query,
+    }
+    response = requests.post(url, json=payload, proxies=proxies)
+    return response.json()["text"]
+
+
+def get_grammy_best_artist_by_year(year: int) -> dict:
+    """Get the Grammy Best New Artist for a specific year."""
+    api = CRAG()
+    year = int(year)
+    return api.music_grammy_get_best_artist_by_year(year)
+
+
+def get_members(band_name: str) -> dict:
+    """Get the member list of a band."""
+    api = CRAG()
+    return api.music_get_members(band_name)
+
+
+def get_artist_birth_place(artist_name: str) -> dict:
+    """Get the birthplace of an artist."""
+    api = CRAG()
+    return api.music_get_artist_birth_place(artist_name)
+
+
+def get_billboard_rank_date(rank: int, date: str = None) -> dict:
+    """Get Billboard ranking for a specific rank and date."""
+    api = CRAG()
+    rank = int(rank)
+    return api.music_get_billboard_rank_date(rank, date)
+
+
+def get_song_release_date(song_name: str) -> dict:
+    """Get the release date of a song."""
+    api = CRAG()
+    return api.music_get_song_release_date(song_name)
--- a/AgentQnA/tools/worker_agent_tools.yaml
+++ b/AgentQnA/tools/worker_agent_tools.yaml
@@ -0,0 +1,5 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+duckduckgo_search:
+  callable_api: ddg-search
--- a/AudioQnA/docker/docker_build_compose.yaml
+++ b/AudioQnA/docker/docker_build_compose.yaml
@@ -0,0 +1,54 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  audioqna:
+    build:
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+        no_proxy: ${no_proxy}
+      dockerfile: ./Dockerfile
+    image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
+  whisper-gaudi:
+    build:
+      context: GenAIComps
+      dockerfile: comps/asr/whisper/Dockerfile_hpu
+    extends: audioqna
+    image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
+  whisper:
+    build:
+      context: GenAIComps
+      dockerfile: comps/asr/whisper/Dockerfile
+    extends: audioqna
+    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
+  asr:
+    build:
+      context: GenAIComps
+      dockerfile: comps/asr/Dockerfile
+    extends: audioqna
+    image: ${REGISTRY:-opea}/asr:${TAG:-latest}
+  llm-tgi:
+    build:
+      context: GenAIComps
+      dockerfile: comps/llms/text-generation/tgi/Dockerfile
+    extends: audioqna
+    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
+  speecht5-gaudi:
+    build:
+      context: GenAIComps
+      dockerfile: comps/tts/speecht5/Dockerfile_hpu
+    extends: audioqna
+    image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
+  speecht5:
+    build:
+      context: GenAIComps
+      dockerfile: comps/tts/speecht5/Dockerfile
+    extends: audioqna
+    image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
+  tts:
+    build:
+      context: GenAIComps
+      dockerfile: comps/tts/Dockerfile
+    extends: audioqna
+    image: ${REGISTRY:-opea}/tts:${TAG:-latest}
--- a/AudioQnA/docker/gaudi/README.md
+++ b/AudioQnA/docker/gaudi/README.md
@@ -81,7 +81,7 @@ export LLM_SERVICE_PORT=3007

 ```bash
 cd GenAIExamples/AudioQnA/docker/gaudi/
-docker compose up -d
+TAG=v0.9 docker compose up -d
 ```

 ## 🚀 Test MicroServices
--- a/AudioQnA/docker/gaudi/compose.yaml
+++ b/AudioQnA/docker/gaudi/compose.yaml
@@ -1,12 +1,9 @@
-
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-version: "3.8"
-
 services:
  whisper-service:
-    image: opea/whisper-gaudi:latest
+    image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
    container_name: whisper-service
    ports:
      - "7066:7066"
@@ -22,7 +19,7 @@ services:
      - SYS_NICE
    restart: unless-stopped
  asr:
-    image: opea/asr:latest
+    image: ${REGISTRY:-opea}/asr:${TAG:-latest}
    container_name: asr-service
    ports:
      - "3001:9099"
@@ -30,7 +27,7 @@ services:
    environment:
      ASR_ENDPOINT: ${ASR_ENDPOINT}
  speecht5-service:
-    image: opea/speecht5-gaudi:latest
+    image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
    container_name: speecht5-service
    ports:
      - "7055:7055"
@@ -46,7 +43,7 @@ services:
      - SYS_NICE
    restart: unless-stopped
  tts:
-    image: opea/tts:latest
+    image: ${REGISTRY:-opea}/tts:${TAG:-latest}
    container_name: tts-service
    ports:
      - "3002:9088"
@@ -75,7 +72,7 @@ services:
    ipc: host
    command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
  llm:
-    image: opea/llm-tgi:latest
+    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
    container_name: llm-tgi-gaudi-server
    depends_on:
      - tgi-service
@@ -90,7 +87,7 @@ services:
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  audioqna-gaudi-backend-server:
-    image: opea/audioqna:latest
+    image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
    container_name: audioqna-gaudi-backend-server
    depends_on:
      - asr
--- a/AudioQnA/docker/xeon/README.md
+++ b/AudioQnA/docker/xeon/README.md
@@ -81,7 +81,7 @@ export LLM_SERVICE_PORT=3007

 ```bash
 cd GenAIExamples/AudioQnA/docker/xeon/
-docker compose up -d
+TAG=v0.9 docker compose up -d
 ```

 ## 🚀 Test MicroServices
--- a/AudioQnA/docker/xeon/compose.yaml
+++ b/AudioQnA/docker/xeon/compose.yaml
@@ -1,12 +1,9 @@
-
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-version: "3.8"
-
 services:
  whisper-service:
-    image: opea/whisper:latest
+    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
    container_name: whisper-service
    ports:
      - "7066:7066"
@@ -17,7 +14,7 @@ services:
      https_proxy: ${https_proxy}
    restart: unless-stopped
  asr:
-    image: opea/asr:latest
+    image: ${REGISTRY:-opea}/asr:${TAG:-latest}
    container_name: asr-service
    ports:
      - "3001:9099"
@@ -25,7 +22,7 @@ services:
    environment:
      ASR_ENDPOINT: ${ASR_ENDPOINT}
  speecht5-service:
-    image: opea/speecht5:latest
+    image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
    container_name: speecht5-service
    ports:
      - "7055:7055"
@@ -36,7 +33,7 @@ services:
      https_proxy: ${https_proxy}
    restart: unless-stopped
  tts:
-    image: opea/tts:latest
+    image: ${REGISTRY:-opea}/tts:${TAG:-latest}
    container_name: tts-service
    ports:
      - "3002:9088"
@@ -44,7 +41,7 @@ services:
    environment:
      TTS_ENDPOINT: ${TTS_ENDPOINT}
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:1.4
+    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
    container_name: tgi-service
    ports:
      - "3006:80"
@@ -56,9 +53,9 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-    command: --model-id ${LLM_MODEL_ID}
+    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
  llm:
-    image: opea/llm-tgi:latest
+    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
    container_name: llm-tgi-server
    depends_on:
      - tgi-service
@@ -73,7 +70,7 @@ services:
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  audioqna-xeon-backend-server:
-    image: opea/audioqna:latest
+    image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
    container_name: audioqna-xeon-backend-server
    depends_on:
      - asr
--- a/AudioQnA/kubernetes/README.md
+++ b/AudioQnA/kubernetes/README.md
@@ -0,0 +1,74 @@
+# Deploy AudioQnA in Kubernetes Cluster on Xeon and Gaudi
+
+This document outlines the deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline components on Intel Xeon server and Gaudi machines.
+
+The AudioQnA Service leverages a Kubernetes operator called genai-microservices-connector(GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere.
+
+Install GMC  in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
+
+
+The AudioQnA application is defined as a Custom Resource (CR) file that the above GMC operator acts  upon. It first checks if the microservices listed in the CR yaml file are running, if not starts them and then proceeds to connect them. When the AudioQnA pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular `asr`, `tts`, and `llm`.
+
+
+## Using prebuilt images
+
+The AudioQnA uses the below prebuilt images if you choose a Xeon deployment
+
+- tgi-service: ghcr.io/huggingface/text-generation-inference:1.4
+- llm: opea/llm-tgi:v0.9
+- asr: opea/asr:v0.9
+- whisper: opea/whisper:v0.9
+- tts: opea/tts:v0.9
+- speecht5: opea/speecht5:v0.9
+
+
+Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
+For Gaudi:
+
+- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
+- whisper-gaudi: opea/whisper-gaudi:v0.9
+- speecht5-gaudi: opea/speecht5-gaudi:v0.9
+
+> [NOTE]  
+> Please refer to [Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/docker/xeon/README.md) or [Gaudi README](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/docker/gaudi/README.md) to build the OPEA images. These too will be available on Docker Hub soon to simplify use.
+
+## Deploy AudioQnA pipeline
+This involves deploying the AudioQnA custom resource. You can use audioQnA_xeon.yaml or if you have a Gaudi cluster, you could use audioQnA_gaudi.yaml. 
+
+1. Create namespace and deploy application
+   ```sh
+   kubectl create ns audioqa
+   kubectl apply -f $(pwd)/audioQnA_xeon.yaml
+   ```
+
+2. GMC will reconcile the AudioQnA custom resource and get all related components/services ready. Check if the service up.
+
+   ```sh
+   kubectl get service -n audioqa
+   ```
+
+3. Retrieve the application access URL
+
+   ```sh
+   kubectl get gmconnectors.gmc.opea.io -n audioqa
+   NAME      URL                                                    READY   AGE
+   audioqa   http://router-service.audioqa.svc.cluster.local:8080   6/0/6   5m
+   ```
+
+4. Deploy a client pod to test the application
+
+   ```sh
+   kubectl create deployment client-test -n audioqa --image=python:3.8.13 -- sleep infinity
+   ```
+
+5. Access the application using the above URL from the client pod
+
+   ```sh
+   export CLIENT_POD=$(kubectl get pod -n audioqa -l app=client-test -o jsonpath={.items..metadata.name})
+   export accessUrl=$(kubectl get gmc -n audioqa -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
+   kubectl exec "$CLIENT_POD" -n audioqa -- curl $accessUrl  -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json'
+   ```
+
+> [NOTE]
+
+You can remove your AudioQnA pipeline by executing standard Kubernetes kubectl commands to remove a custom resource. Verify it was removed by executing kubectl get pods in the audioqa namespace.
--- a/AudioQnA/kubernetes/audioQnA_gaudi.yaml
+++ b/AudioQnA/kubernetes/audioQnA_gaudi.yaml
@@ -0,0 +1,58 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: gmc.opea.io/v1alpha3
+kind: GMConnector
+metadata:
+  labels:
+    app.kubernetes.io/name: gmconnector
+    app.kubernetes.io/managed-by: kustomize
+    gmc/platform: gaudi
+  name: audioqa
+  namespace: audioqa
+spec:
+  routerConfig:
+    name: router
+    serviceName: router-service
+  nodes:
+    root:
+      routerType: Sequence
+      steps:
+      - name: Asr
+        internalService:
+          serviceName: asr-svc
+          config:
+            endpoint: /v1/audio/transcriptions
+            ASR_ENDPOINT: whisper-gaudi-svc
+      - name: WhisperGaudi
+        internalService:
+          serviceName: whisper-gaudi-svc
+          config:
+            endpoint: /v1/asr
+          isDownstreamService: true
+      - name: Llm
+        data: $response
+        internalService:
+          serviceName: llm-svc
+          config:
+            endpoint: /v1/chat/completions
+            TGI_LLM_ENDPOINT: tgi-gaudi-svc
+      - name: TgiGaudi
+        internalService:
+          serviceName: tgi-gaudi-svc
+          config:
+            endpoint: /generate
+          isDownstreamService: true
+      - name: Tts
+        data: $response
+        internalService:
+          serviceName: tts-svc
+          config:
+            endpoint: /v1/audio/speech
+            TTS_ENDPOINT: speecht5-gaudi-svc
+      - name: SpeechT5Gaudi
+        internalService:
+          serviceName: speecht5-gaudi-svc
+          config:
+            endpoint: /v1/tts
+          isDownstreamService: true
--- a/AudioQnA/kubernetes/audioQnA_xeon.yaml
+++ b/AudioQnA/kubernetes/audioQnA_xeon.yaml
@@ -0,0 +1,58 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: gmc.opea.io/v1alpha3
+kind: GMConnector
+metadata:
+  labels:
+    app.kubernetes.io/name: gmconnector
+    app.kubernetes.io/managed-by: kustomize
+    gmc/platform: xeon
+  name: audioqa
+  namespace: audioqa
+spec:
+  routerConfig:
+    name: router
+    serviceName: router-service
+  nodes:
+    root:
+      routerType: Sequence
+      steps:
+      - name: Asr
+        internalService:
+          serviceName: asr-svc
+          config:
+            endpoint: /v1/audio/transcriptions
+            ASR_ENDPOINT: whisper-svc
+      - name: Whisper
+        internalService:
+          serviceName: whisper-svc
+          config:
+            endpoint: /v1/asr
+          isDownstreamService: true
+      - name: Llm
+        data: $response
+        internalService:
+          serviceName: llm-svc
+          config:
+            endpoint: /v1/chat/completions
+            TGI_LLM_ENDPOINT: tgi-svc
+      - name: Tgi
+        internalService:
+          serviceName: tgi-svc
+          config:
+            endpoint: /generate
+          isDownstreamService: true
+      - name: Tts
+        data: $response
+        internalService:
+          serviceName: tts-svc
+          config:
+            endpoint: /v1/audio/speech
+            TTS_ENDPOINT: speecht5-svc
+      - name: SpeechT5
+        internalService:
+          serviceName: speecht5-svc
+          config:
+            endpoint: /v1/tts
+          isDownstreamService: true
--- a/AudioQnA/kubernetes/manifests/README.md
+++ b/AudioQnA/kubernetes/manifests/README.md
@@ -0,0 +1,32 @@
+# Deploy VisualQnA in a Kubernetes Cluster
+
+> [NOTE]
+> The following values must be set before you can deploy:
+> HUGGINGFACEHUB_API_TOKEN
+> You can also customize the "MODEL_ID" and "model-volume"
+
+## Deploy On Xeon
+```
+cd GenAIExamples/AudioQnA/kubernetes/manifests/xeon
+export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
+sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
+kubectl apply -f audioqna.yaml
+```
+## Deploy On Gaudi
+```
+cd GenAIExamples/AudioQnA/kubernetes/manifests/gaudi
+export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
+sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
+kubectl apply -f audioqna.yaml
+```
+
+
+## Verify Services
+
+Make sure all the pods are running, and restart the audioqna-xxxx pod if necessary.
+
+```bash
+kubectl get pods
+
+curl http://${host_ip}:3008/v1/audioqna   -X POST   -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}'   -H 'Content-Type: application/json'
+```
--- a/AudioQnA/kubernetes/manifests/gaudi/audioqna.yaml
+++ b/AudioQnA/kubernetes/manifests/gaudi/audioqna.yaml
@@ -0,0 +1,439 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: audio-qna-config
+  namespace: default
+data:
+  ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
+  TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
+  MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
+  ASR_SERVICE_HOST_IP: asr-svc
+  ASR_SERVICE_PORT: "3001"
+  LLM_SERVICE_HOST_IP: llm-svc
+  LLM_SERVICE_PORT: "3007"
+  TTS_SERVICE_HOST_IP: tts-svc
+  TTS_SERVICE_PORT: "3002"
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: asr-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: asr-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: asr-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: asr-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: audio-qna-config
+        image: opea/asr:v0.9
+        imagePullPolicy: IfNotPresent
+        name: asr-deploy
+        args: null
+        ports:
+        - containerPort: 9099
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: asr-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: asr-deploy
+  ports:
+  - name: service
+    port: 3001
+    targetPort: 9099
+---
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: whisper-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: whisper-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: whisper-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: whisper-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: audio-qna-config
+        image: opea/whisper-gaudi:v0.9
+        imagePullPolicy: IfNotPresent
+        name: whisper-deploy
+        args: null
+        ports:
+        - containerPort: 7066
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: whisper-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: whisper-deploy
+  ports:
+  - name: service
+    port: 7066
+    targetPort: 7066
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: tts-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: tts-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: tts-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: tts-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: audio-qna-config
+        image: opea/tts:v0.9
+        imagePullPolicy: IfNotPresent
+        name: tts-deploy
+        args: null
+        ports:
+        - containerPort: 9088
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: tts-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: tts-deploy
+  ports:
+  - name: service
+    port: 3002
+    targetPort: 9088
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: speecht5-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: speecht5-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: speecht5-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: speecht5-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: audio-qna-config
+        image: opea/speecht5-gaudi:v0.9
+        imagePullPolicy: IfNotPresent
+        name: speecht5-deploy
+        args: null
+        ports:
+        - containerPort: 7055
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: speecht5-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: speecht5-deploy
+  ports:
+  - name: service
+    port: 7055
+    targetPort: 7055
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: audio-qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: PREFILL_BATCH_BUCKET_SIZE
+          value: "1"
+        - name: BATCH_BUCKET_SIZE
+          value: "8"
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /home/sdp/cesg
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 3006
+    targetPort: 80
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: audio-qna-config
+        image: opea/llm-tgi:v0.9
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 3007
+    targetPort: 9000
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: audioqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: audioqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: audioqna-backend-server-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: audioqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: audio-qna-config
+        image: opea/audioqna:v0.9
+        imagePullPolicy: IfNotPresent
+        name: audioqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: audioqna-backend-server-svc
+spec:
+  type: NodePort
+  selector:
+    app: audioqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 3008
+    targetPort: 8888
+    nodePort: 30666
--- a/AudioQnA/kubernetes/manifests/xeon/audioqna.yaml
+++ b/AudioQnA/kubernetes/manifests/xeon/audioqna.yaml
@@ -0,0 +1,395 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: audio-qna-config
+  namespace: default
+data:
+  ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
+  TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
+  MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
+  ASR_SERVICE_HOST_IP: asr-svc
+  ASR_SERVICE_PORT: "3001"
+  LLM_SERVICE_HOST_IP: llm-svc
+  LLM_SERVICE_PORT: "3007"
+  TTS_SERVICE_HOST_IP: tts-svc
+  TTS_SERVICE_PORT: "3002"
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: asr-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: asr-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: asr-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: asr-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: audio-qna-config
+        image: opea/asr:v0.9
+        imagePullPolicy: IfNotPresent
+        name: asr-deploy
+        args: null
+        ports:
+        - containerPort: 9099
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: asr-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: asr-deploy
+  ports:
+  - name: service
+    port: 3001
+    targetPort: 9099
+---
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: whisper-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: whisper-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: whisper-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: whisper-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: audio-qna-config
+        image: opea/whisper:v0.9
+        imagePullPolicy: IfNotPresent
+        name: whisper-deploy
+        args: null
+        ports:
+        - containerPort: 7066
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: whisper-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: whisper-deploy
+  ports:
+  - name: service
+    port: 7066
+    targetPort: 7066
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: tts-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: tts-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: tts-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: tts-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: audio-qna-config
+        image: opea/tts:v0.9
+        imagePullPolicy: IfNotPresent
+        name: tts-deploy
+        args: null
+        ports:
+        - containerPort: 9088
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: tts-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: tts-deploy
+  ports:
+  - name: service
+    port: 3002
+    targetPort: 9088
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: speecht5-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: speecht5-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: speecht5-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: speecht5-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: audio-qna-config
+        image: opea/speecht5:v0.9
+        imagePullPolicy: IfNotPresent
+        name: speecht5-deploy
+        args: null
+        ports:
+        - containerPort: 7055
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: speecht5-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: speecht5-deploy
+  ports:
+  - name: service
+    port: 7055
+    targetPort: 7055
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: audio-qna-config
+        image: ghcr.io/huggingface/text-generation-inference:2.2.0
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /home/sdp/cesg
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 3006
+    targetPort: 80
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: audio-qna-config
+        image: opea/llm-tgi:v0.9
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 3007
+    targetPort: 9000
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: audioqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: audioqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: audioqna-backend-server-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: audioqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: audio-qna-config
+        image: opea/audioqna:v0.9
+        imagePullPolicy: IfNotPresent
+        name: audioqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: audioqna-backend-server-svc
+spec:
+  type: NodePort
+  selector:
+    app: audioqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 3008
+    targetPort: 8888
+    nodePort: 30666
--- a/AudioQnA/tests/test_audioqna_on_gaudi.sh
+++ b/AudioQnA/tests/test_audioqna_on_gaudi.sh
@@ -3,35 +3,27 @@
 # SPDX-License-Identifier: Apache-2.0

 set -e
-echo "IMAGE_REPO=${IMAGE_REPO}"
+IMAGE_REPO=${IMAGE_REPO:-"opea"}
+IMAGE_TAG=${IMAGE_TAG:-"latest"}
+echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
+echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
+export REGISTRY=${IMAGE_REPO}
+export TAG=${IMAGE_TAG}

 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')

 function build_docker_images() {
-    cd $WORKPATH
+    cd $WORKPATH/docker
    git clone https://github.com/opea-project/GenAIComps.git
-    cd GenAIComps

-    docker build -t opea/whisper-gaudi:latest  -f comps/asr/whisper/Dockerfile_hpu .
-
-    docker build -t opea/asr:latest  -f comps/asr/Dockerfile .
-    docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile .
-    docker build -t opea/speecht5-gaudi:latest  -f comps/tts/speecht5/Dockerfile_hpu .
-    docker build -t opea/tts:latest  -f comps/tts/Dockerfile .
+    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
+    service_list="audioqna whisper-gaudi asr llm-tgi speecht5-gaudi tts"
+    docker compose -f docker_build_compose.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
-
-    cd ..
-
-    cd $WORKPATH/docker
-    docker build --no-cache -t opea/audioqna:latest -f Dockerfile .
-
-    # cd $WORKPATH/docker/ui
-    # docker build --no-cache -t opea/audioqna-ui:latest -f docker/Dockerfile .
-
-    docker images
+    docker images && sleep 1s
 }

 function start_services() {
@@ -55,25 +47,25 @@ function start_services() {

    # sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env

-    if [[ "$IMAGE_REPO" != "" ]]; then
-        # Replace the container name with a test-specific name
-        echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
-        sed -i "s#image: opea/audioqna:latest#image: opea/audioqna:${IMAGE_TAG}#g" compose.yaml
-        sed -i "s#image: opea/audioqna-ui:latest#image: opea/audioqna-ui:${IMAGE_TAG}#g" compose.yaml
-        sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" compose.yaml
-        echo "cat compose.yaml"
-        cat compose.yaml
-    fi
-
    # Start Docker Containers
-    docker compose up -d
+    docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
    n=0
-    until [[ "$n" -ge 500 ]]; do
+    until [[ "$n" -ge 100 ]]; do
       docker logs tgi-gaudi-server > $LOG_PATH/tgi_service_start.log
       if grep -q Connected $LOG_PATH/tgi_service_start.log; then
           break
       fi
-       sleep 1s
+       sleep 5s
+       n=$((n+1))
+    done
+
+    n=0
+    until [[ "$n" -ge 100 ]]; do
+       docker logs whisper-service > $LOG_PATH/whisper_service_start.log
+       if grep -q "Uvicorn server setup on port" $LOG_PATH/whisper_service_start.log; then
+           break
+       fi
+       sleep 5s
       n=$((n+1))
    done
 }
@@ -131,7 +123,7 @@ function stop_docker() {
 function main() {

    stop_docker
-    if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi
+    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
    start_services

    # validate_microservices
--- a/AudioQnA/tests/test_audioqna_on_xeon.sh
+++ b/AudioQnA/tests/test_audioqna_on_xeon.sh
@@ -3,32 +3,27 @@
 # SPDX-License-Identifier: Apache-2.0

 set -e
-echo "IMAGE_REPO=${IMAGE_REPO}"
+IMAGE_REPO=${IMAGE_REPO:-"opea"}
+IMAGE_TAG=${IMAGE_TAG:-"latest"}
+echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
+echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
+export REGISTRY=${IMAGE_REPO}
+export TAG=${IMAGE_TAG}

 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')

 function build_docker_images() {
-    cd $WORKPATH
+    cd $WORKPATH/docker
    git clone https://github.com/opea-project/GenAIComps.git
-    cd GenAIComps

-    docker build -t opea/whisper:latest -f comps/asr/whisper/Dockerfile .
-    docker build -t opea/asr:latest -f comps/asr/Dockerfile .
-    docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile .
-    docker build -t opea/speecht5:latest -f comps/tts/speecht5/Dockerfile .
-    docker build -t opea/tts:latest -f comps/tts/Dockerfile .
+    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
+    service_list="audioqna whisper asr llm-tgi speecht5 tts"
+    docker compose -f docker_build_compose.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
-
-    cd $WORKPATH/docker
-    docker build --no-cache -t opea/audioqna:latest -f Dockerfile .
-
-    # cd $WORKPATH/docker/ui
-    # docker build --no-cache -t opea/audioqna-ui:latest -f docker/Dockerfile .
-
-    docker images
+    docker images && sleep 1s
 }

 function start_services() {
@@ -51,25 +46,15 @@ function start_services() {

    # sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env

-    if [[ "$IMAGE_REPO" != "" ]]; then
-        # Replace the container name with a test-specific name
-        echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
-        sed -i "s#image: opea/audioqna:latest#image: opea/audioqna:${IMAGE_TAG}#g" compose.yaml
-        sed -i "s#image: opea/audioqna-ui:latest#image: opea/audioqna-ui:${IMAGE_TAG}#g" compose.yaml
-        sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" compose.yaml
-        echo "cat compose.yaml"
-        cat compose.yaml
-    fi
-
    # Start Docker Containers
-    docker compose up -d
+    docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
    n=0
-    until [[ "$n" -ge 500 ]]; do
+    until [[ "$n" -ge 100 ]]; do
       docker logs tgi-service > $LOG_PATH/tgi_service_start.log
       if grep -q Connected $LOG_PATH/tgi_service_start.log; then
           break
       fi
-       sleep 1s
+       sleep 5s
       n=$((n+1))
    done
 }
@@ -128,7 +113,7 @@ function stop_docker() {
 function main() {

    stop_docker
-    if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi
+    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
    start_services

    validate_megaservice
--- a/AudioQnA/tests/test_gmc_on_gaudi.sh
+++ b/AudioQnA/tests/test_gmc_on_gaudi.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -xe
+USER_ID=$(whoami)
+LOG_PATH=/home/$(whoami)/logs
+MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
+IMAGE_REPO=${IMAGE_REPO:-}
+
+function install_audioqa() {
+    kubectl create ns $APP_NAMESPACE
+    sed -i "s|namespace: audioqa|namespace: $APP_NAMESPACE|g"  ./audioQnA_gaudi.yaml
+    kubectl apply -f ./audioQnA_gaudi.yaml
+
+    # Wait until the router service is ready
+    echo "Waiting for the audioqa router service to be ready..."
+    wait_until_pod_ready "audioqa router" $APP_NAMESPACE "router-service"
+    output=$(kubectl get pods -n $APP_NAMESPACE)
+    echo $output
+}
+
+function validate_audioqa() {
+    # deploy client pod for testing
+    kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity
+
+    # wait for client pod ready
+    wait_until_pod_ready "client-test" $APP_NAMESPACE "client-test"
+    # giving time to populating data
+    sleep 60
+
+    kubectl get pods -n $APP_NAMESPACE
+    # send request to audioqa
+    export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
+    echo "$CLIENT_POD"
+    accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
+    byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
+    echo "$byte_str" > $LOG_PATH/curl_audioqa.log
+    if [ -z "$byte_str" ]; then
+	echo "audioqa failed, please check the logs in ${LOG_PATH}!"
+        exit 1
+    fi
+    echo "Audioqa response check succeed!"
+}
+
+function wait_until_pod_ready() {
+    echo "Waiting for the $1 to be ready..."
+    max_retries=30
+    retry_count=0
+    while ! is_pod_ready $2 $3; do
+        if [ $retry_count -ge $max_retries ]; then
+            echo "$1 is not ready after waiting for a significant amount of time"
+            get_gmc_controller_logs
+            exit 1
+        fi
+        echo "$1 is not ready yet. Retrying in 10 seconds..."
+        sleep 10
+        output=$(kubectl get pods -n $2)
+        echo $output
+        retry_count=$((retry_count + 1))
+    done
+}
+
+function is_pod_ready() {
+    if [ "$2" == "gmc-controller" ]; then
+      pod_status=$(kubectl get pods -n $1 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}')
+    else
+      pod_status=$(kubectl get pods -n $1 -l app=$2 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}')
+    fi
+    if [ "$pod_status" == "True" ]; then
+        return 0
+    else
+        return 1
+    fi
+}
+
+function get_gmc_controller_logs() {
+    # Fetch the name of the pod with the app-name gmc-controller in the specified namespace
+    pod_name=$(kubectl get pods -n $SYSTEM_NAMESPACE -l control-plane=gmc-controller -o jsonpath='{.items[0].metadata.name}')
+
+    # Check if the pod name was found
+    if [ -z "$pod_name" ]; then
+        echo "No pod found with app-name gmc-controller in namespace $SYSTEM_NAMESPACE"
+        return 1
+    fi
+
+    # Get the logs of the found pod
+    echo "Fetching logs for pod $pod_name in namespace $SYSTEM_NAMESPACE..."
+    kubectl logs $pod_name -n $SYSTEM_NAMESPACE
+}
+
+if [ $# -eq 0 ]; then
+    echo "Usage: $0 <function_name>"
+    exit 1
+fi
+
+case "$1" in
+    install_AudioQnA)
+        pushd AudioQnA/kubernetes
+        install_audioqa
+        popd
+        ;;
+    validate_AudioQnA)
+        pushd AudioQnA/kubernetes
+        validate_audioqa
+        popd
+        ;;
+    *)
+        echo "Unknown function: $1"
+        ;;
+esac
--- a/AudioQnA/tests/test_gmc_on_xeon.sh
+++ b/AudioQnA/tests/test_gmc_on_xeon.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -xe
+USER_ID=$(whoami)
+LOG_PATH=/home/$(whoami)/logs
+MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
+IMAGE_REPO=${IMAGE_REPO:-}
+
+function install_audioqa() {
+    kubectl create ns $APP_NAMESPACE
+    sed -i "s|namespace: audioqa|namespace: $APP_NAMESPACE|g"  ./audioQnA_xeon.yaml
+    kubectl apply -f ./audioQnA_xeon.yaml
+
+    # Wait until the router service is ready
+    echo "Waiting for the audioqa router service to be ready..."
+    wait_until_pod_ready "audioqa router" $APP_NAMESPACE "router-service"
+    output=$(kubectl get pods -n $APP_NAMESPACE)
+    echo $output
+}
+
+function validate_audioqa() {
+    # deploy client pod for testing
+    kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity
+
+    # wait for client pod ready
+    wait_until_pod_ready "client-test" $APP_NAMESPACE "client-test"
+    # giving time to populating data
+    sleep 60
+
+    kubectl get pods -n $APP_NAMESPACE
+    # send request to audioqa
+    export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
+    echo "$CLIENT_POD"
+    accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
+    byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
+    echo "$byte_str" > $LOG_PATH/curl_audioqa.log
+    if [ -z "$byte_str" ]; then
+        echo "audioqa failed, please check the logs in ${LOG_PATH}!"
+        exit 1
+    fi
+    echo "Audioqa response check succeed!"
+}
+
+function wait_until_pod_ready() {
+    echo "Waiting for the $1 to be ready..."
+    max_retries=30
+    retry_count=0
+    while ! is_pod_ready $2 $3; do
+        if [ $retry_count -ge $max_retries ]; then
+            echo "$1 is not ready after waiting for a significant amount of time"
+            get_gmc_controller_logs
+            exit 1
+        fi
+        echo "$1 is not ready yet. Retrying in 10 seconds..."
+        sleep 10
+        output=$(kubectl get pods -n $2)
+        echo $output
+        retry_count=$((retry_count + 1))
+    done
+}
+
+function is_pod_ready() {
+    if [ "$2" == "gmc-controller" ]; then
+      pod_status=$(kubectl get pods -n $1 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}')
+    else
+      pod_status=$(kubectl get pods -n $1 -l app=$2 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}')
+    fi
+    if [ "$pod_status" == "True" ]; then
+        return 0
+    else
+        return 1
+    fi
+}
+
+function get_gmc_controller_logs() {
+    # Fetch the name of the pod with the app-name gmc-controller in the specified namespace
+    pod_name=$(kubectl get pods -n $SYSTEM_NAMESPACE -l control-plane=gmc-controller -o jsonpath='{.items[0].metadata.name}')
+
+    # Check if the pod name was found
+    if [ -z "$pod_name" ]; then
+        echo "No pod found with app-name gmc-controller in namespace $SYSTEM_NAMESPACE"
+        return 1
+    fi
+
+    # Get the logs of the found pod
+    echo "Fetching logs for pod $pod_name in namespace $SYSTEM_NAMESPACE..."
+    kubectl logs $pod_name -n $SYSTEM_NAMESPACE
+}
+
+if [ $# -eq 0 ]; then
+    echo "Usage: $0 <function_name>"
+    exit 1
+fi
+
+case "$1" in
+    install_AudioQnA)
+        pushd AudioQnA/kubernetes
+        install_audioqa
+        popd
+        ;;
+    validate_AudioQnA)
+        pushd AudioQnA/kubernetes
+        validate_audioqa
+        popd
+        ;;
+    *)
+        echo "Unknown function: $1"
+        ;;
+esac
--- a/ChatQnA/README.md
+++ b/ChatQnA/README.md
@@ -10,7 +10,90 @@ ChatQnA architecture shows below:

 ChatQnA is implemented on top of [GenAIComps](https://github.com/opea-project/GenAIComps), the ChatQnA Flow Chart shows below:

-![Flow Chart](./assets/img/chatqna_flow_chart.png)
+```mermaid
+---
+config:
+  flowchart:
+    nodeSpacing: 100
+    rankSpacing: 100
+    curve: linear
+  theme: base
+  themeVariables:
+    fontSize: 42px
+---
+flowchart LR
+    %% Colors %%
+    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef invisible fill:transparent,stroke:transparent;
+    style ChatQnA-MegaService stroke:#000000
+    %% Subgraphs %%
+    subgraph ChatQnA-MegaService["ChatQnA-MegaService"]
+        direction LR
+        EM([Embedding <br>]):::blue
+        RET([Retrieval <br>]):::blue
+        RER([Rerank <br>]):::blue
+        LLM([LLM <br>]):::blue
+    end
+    subgraph User Interface
+        direction TB
+        a([User Input Query]):::orchid
+        Ingest([Ingest data]):::orchid
+        UI([UI server<br>]):::orchid
+    end
+    subgraph ChatQnA GateWay
+        direction LR
+        invisible1[ ]:::invisible
+        GW([ChatQnA GateWay<br>]):::orange
+    end
+    subgraph .
+        X([OPEA Micsrservice]):::blue
+        Y{{Open Source Service}}
+        Z([OPEA Gateway]):::orange
+        Z1([UI]):::orchid
+    end
+
+    TEI_RER{{Reranking service<br>'TEI'<br>}}
+    TEI_EM{{Embedding service <br>'TEI LangChain'<br>}}
+    VDB{{Vector DB<br>'Redis'<br>}}
+    R_RET{{Retriever service <br>'LangChain Redis'<br>}}
+    DP([Data Preparation<br>'LangChain Redis'<br>]):::blue
+    LLM_gen{{LLM Service <br>'TGI'<br>}}
+
+    %% Data Preparation flow
+    %% Ingest data flow
+    direction LR
+    Ingest[Ingest data] -->|a| UI
+    UI -->|b| DP
+    DP <-.->|c| TEI_EM
+
+    %% Questions interaction
+    direction LR
+    a[User Input Query] -->|1| UI
+    UI -->|2| GW
+    GW <==>|3| ChatQnA-MegaService
+    EM ==>|4| RET
+    RET ==>|5| RER
+    RER ==>|6| LLM
+
+
+    %% Embedding service flow
+    direction TB
+    EM <-.->|3'| TEI_EM
+    RET <-.->|4'| R_RET
+    RER <-.->|5'| TEI_RER
+    LLM <-.->|6'| LLM_gen
+
+    direction TB
+    %% Vector DB interaction
+    R_RET <-.->|d|VDB
+    DP <-.->|d|VDB
+
+
+
+
+```

 This ChatQnA use case performs RAG using LangChain, Redis VectorDB and Text Generation Inference on Intel Gaudi2 or Intel XEON Scalable Processors. The Intel Gaudi2 accelerator supports both training and inference for deep learning models in particular for LLMs. Visit [Habana AI products](https://habana.ai/products) for more details.

@@ -78,7 +161,7 @@ Find the corresponding [compose.yaml](./docker/gaudi/compose.yaml).

 ```bash
 cd GenAIExamples/ChatQnA/docker/gaudi/
-docker compose up -d
+TAG=v0.9 docker compose up -d
 ```

 > Notice: Currently only the **Habana Driver 1.16.x** is supported for Gaudi.
@@ -91,7 +174,7 @@ Find the corresponding [compose.yaml](./docker/xeon/compose.yaml).

 ```bash
 cd GenAIExamples/ChatQnA/docker/xeon/
-docker compose up -d
+TAG=v0.9 docker compose up -d
 ```

 Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on building docker images from source.
@@ -100,7 +183,7 @@ Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on buil

 ```bash
 cd GenAIExamples/ChatQnA/docker/gpu/
-docker compose up -d
+TAG=v0.9 docker compose up -d
 ```

 Refer to the [NVIDIA GPU Guide](./docker/gpu/README.md) for more instructions on building docker images from source.
@@ -123,6 +206,10 @@ Refer to the [ChatQnA helm chart](https://github.com/opea-project/GenAIInfra/tre

 Refer to the [AI PC Guide](./docker/aipc/README.md) for instructions on deploying ChatQnA on AI PC.

+### Deploy ChatQnA on Red Hat OpenShift Container Platform (RHOCP)
+
+Refer to the [Intel Technology enabling for Openshift readme](https://github.com/intel/intel-technology-enabling-for-openshift/blob/main/workloads/opea/chatqna/README.md) for instructions to deploy ChatQnA prototype on RHOCP with [Red Hat OpenShift AI (RHOAI)](https://www.redhat.com/en/technologies/cloud-computing/openshift/openshift-ai).
+
 ## Consume ChatQnA Service

 Two ways of consuming ChatQnA Service:
--- a/ChatQnA/benchmark/README.md
+++ b/ChatQnA/benchmark/README.md
@@ -0,0 +1,546 @@
+# ChatQnA Benchmarking
+
+This folder contains a collection of Kubernetes manifest files for deploying the ChatQnA service across scalable nodes. It includes a comprehensive [benchmarking tool](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/README.md) that enables throughput analysis to assess inference performance.
+
+By following this guide, you can run benchmarks on your deployment and share the results with the OPEA community.
+
+# Purpose
+
+We aim to run these benchmarks and share them with the OPEA community for three primary reasons:
+
+- To offer insights on inference throughput in real-world scenarios, helping you choose the best service or deployment for your needs.
+- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
+- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading llms, serving frameworks etc.
+
+# Metrics
+
+The benchmark will report the below metrics, including:
+
+- Number of Concurrent Requests
+- End-to-End Latency: P50, P90, P99 (in milliseconds)
+- End-to-End First Token Latency: P50, P90, P99 (in milliseconds)
+- Average Next Token Latency (in milliseconds)
+- Average Token Latency (in milliseconds)
+- Requests Per Second (RPS)
+- Output Tokens Per Second
+- Input Tokens Per Second
+
+Results will be displayed in the terminal and saved as CSV file named `1_stats.csv` for easy export to spreadsheets.
+
+# Getting Started
+
+## Prerequisites
+
+- Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md).
+
+- Every node has direct internet access
+- Set up kubectl on the master node with access to the Kubernetes cluster.
+- Install Python 3.8+ on the master node for running the stress tool.
+- Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods.
+
+## Kubernetes Cluster Example
+
+```bash
+$ kubectl get nodes
+NAME                STATUS   ROLES           AGE   VERSION
+k8s-master          Ready    control-plane   35d   v1.29.6
+k8s-work1           Ready    <none>          35d   v1.29.5
+k8s-work2           Ready    <none>          35d   v1.29.6
+k8s-work3           Ready    <none>          35d   v1.29.6
+```
+
+## Manifest preparation
+
+We have created the [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single node, two nodes and four nodes K8s cluster. In order to apply, we need to check out and configure some values.
+
+```bash
+# on k8s-master node
+git clone https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples/ChatQnA/benchmark
+
+# replace the image tag from latest to v0.9 since we want to test with v0.9 release
+IMAGE_TAG=v0.9
+find . -name '*.yaml' -type f -exec sed -i "s#image: opea/\(.*\):latest#image: opea/\1:${IMAGE_TAG}#g" {} \;
+
+# set the huggingface token
+HUGGINGFACE_TOKEN=<your token>
+find . -name '*.yaml' -type f -exec sed -i "s#\${HF_TOKEN}#${HUGGINGFACE_TOKEN}#g" {} \;
+
+# set models
+LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
+EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
+RERANK_MODEL_ID=BAAI/bge-reranker-base
+find . -name '*.yaml' -type f -exec sed -i "s#\$(LLM_MODEL_ID)#${LLM_MODEL_ID}#g" {} \;
+find . -name '*.yaml' -type f -exec sed -i "s#\$(EMBEDDING_MODEL_ID)#${EMBEDDING_MODEL_ID}#g" {} \;
+find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL_ID}#g" {} \;
+```
+
+## Benchmark tool preparation
+
+The test uses the [benchmark tool](https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark) to do performance test. We need to set up benchmark tool at the master node of Kubernetes which is k8s-master.
+
+```bash
+# on k8s-master node
+git clone https://github.com/opea-project/GenAIEval.git
+cd GenAIEval
+python3 -m venv stress_venv
+source stress_venv/bin/activate
+pip install -r requirements.txt
+```
+
+## Test Configurations
+
+Workload configuration:
+
+| Key      | Value   |
+| -------- | ------- |
+| Workload | ChatQnA |
+| Tag      | V0.9    |
+
+Models configuration
+| Key | Value |
+| ---------- | ------------------ |
+| Embedding | BAAI/bge-base-en-v1.5 |
+| Reranking | BAAI/bge-reranker-base |
+| Inference | Intel/neural-chat-7b-v3-3 |
+
+Benchmark parameters
+| Key | Value |
+| ---------- | ------------------ |
+| LLM input tokens | 1024 |
+| LLM output tokens | 128 |
+
+Number of test requests for different scheduled node number:
+| Node count | Concurrency | Query number |
+| ----- | -------- | -------- |
+| 1 | 128 | 640 |
+| 2 | 256 | 1280 |
+| 4 | 512 | 2560 |
+
+More detailed configuration can be found in configuration file [benchmark.yaml](./benchmark.yaml).
+
+## Test Steps
+
+### Single node test
+
+#### 1. Preparation
+
+We add label to 1 Kubernetes node to make sure all pods are scheduled to this node:
+
+```bash
+kubectl label nodes k8s-worker1 node-type=chatqna-opea
+```
+
+#### 2. Install ChatQnA
+
+Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark/single_gaudi) and apply to K8s.
+
+```bash
+# on k8s-master node
+cd GenAIExamples/ChatQnA/benchmark/single_gaudi
+kubectl apply -f .
+```
+
+#### 3. Run tests
+
+We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
+
+```bash
+export USER_QUERIES="[4, 8, 16, 640]"
+export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_1"
+envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
+```
+
+And then run the benchmark tool by:
+
+```bash
+cd GenAIEval/evals/benchmark
+python benchmark.py
+```
+
+#### 4. Data collection
+
+All the test results will come to this folder `/home/sdp/benchmark_output/node_1` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
+
+#### 5. Clean up
+
+```bash
+# on k8s-master node
+cd GenAIExamples/ChatQnA/benchmark/single_gaudi
+kubectl delete -f .
+kubectl label nodes k8s-worker1 node-type-
+```
+
+### Two node test
+
+#### 1. Preparation
+
+We add label to 2 Kubernetes node to make sure all pods are scheduled to this node:
+
+```bash
+kubectl label nodes k8s-worker1 k8s-worker2 node-type=chatqna-opea
+```
+
+#### 2. Install ChatQnA
+
+Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark/two_gaudi) and apply to K8s.
+
+```bash
+# on k8s-master node
+cd GenAIExamples/ChatQnA/benchmark/two_gaudi
+kubectl apply -f .
+```
+
+#### 3. Run tests
+
+We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
+
+```bash
+export USER_QUERIES="[4, 8, 16, 1280]"
+export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_2"
+envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
+```
+
+And then run the benchmark tool by:
+
+```bash
+cd GenAIEval/evals/benchmark
+python benchmark.py
+```
+
+#### 4. Data collection
+
+All the test results will come to this folder `/home/sdp/benchmark_output/node_2` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
+
+#### 5. Clean up
+
+```bash
+# on k8s-master node
+kubectl delete -f .
+kubectl label nodes k8s-worker1 k8s-worker2 node-type-
+```
+
+### Four node test
+
+#### 1. Preparation
+
+We add label to 4 Kubernetes node to make sure all pods are scheduled to this node:
+
+```bash
+kubectl label nodes k8s-master k8s-worker1 k8s-worker2 k8s-worker3 node-type=chatqna-opea
+```
+
+#### 2. Install ChatQnA
+
+Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark/four_gaudi) and apply to K8s.
+
+```bash
+# on k8s-master node
+cd GenAIExamples/ChatQnA/benchmark/four_gaudi
+kubectl apply -f .
+```
+
+#### 3. Run tests
+
+We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
+
+```bash
+export USER_QUERIES="[4, 8, 16, 2560]"
+export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_4"
+envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
+```
+
+And then run the benchmark tool by:
+
+```bash
+cd GenAIEval/evals/benchmark
+python benchmark.py
+```
+
+#### 4. Data collection
+
+All the test results will come to this folder `/home/sdp/benchmark_output/node_4` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
+
+#### 5. Clean up
+
+```bash
+# on k8s-master node
+cd GenAIExamples/ChatQnA/benchmark/single_gaudi
+kubectl delete -f .
+kubectl label nodes k8s-master k8s-worker1 k8s-worker2 k8s-worker3 node-type-
+```
+
+### Example Result
+
+The following is a summary of the test result, with files saved at `TEST_OUTPUT_DIR`.
+
+```statistics
+Concurrency       : 512
+Max request count : 2560
+Http timeout      : 60000
+
+Benchmark target  : chatqnafixed
+
+=================Total statistics=====================
+Succeed Response:  2560 (Total 2560, 100.0% Success), Duration: 26.44s, Input Tokens: 61440, Output Tokens: 255985, RPS: 96.82, Input Tokens per Second: 2323.71, Output Tokens per Second: 9681.57
+End to End latency(ms),    P50: 3576.34,   P90: 4242.19,   P99: 5252.23,   Avg: 3581.55
+First token latency(ms),   P50: 726.64,   P90: 1128.27,   P99: 1796.09,   Avg: 769.58
+Average Next token latency(ms): 28.41
+Average token latency(ms)     : 35.85
+======================================================
+```
+
+```test spec
+benchmarkresult:
+  Average_Next_token_latency: '28.41'
+  Average_token_latency: '35.85'
+  Duration: '26.44'
+  End_to_End_latency_Avg: '3581.55'
+  End_to_End_latency_P50: '3576.34'
+  End_to_End_latency_P90: '4242.19'
+  End_to_End_latency_P99: '5252.23'
+  First_token_latency_Avg: '769.58'
+  First_token_latency_P50: '726.64'
+  First_token_latency_P90: '1128.27'
+  First_token_latency_P99: '1796.09'
+  Input_Tokens: '61440'
+  Input_Tokens_per_Second: '2323.71'
+  Onput_Tokens: '255985'
+  Output_Tokens_per_Second: '9681.57'
+  RPS: '96.82'
+  Succeed_Response: '2560'
+  locust_P50: '160'
+  locust_P99: '810'
+  locust_num_failures: '0'
+  locust_num_requests: '2560'
+benchmarkspec:
+  bench-target: chatqnafixed
+  endtest_time: '2024-08-25T14:19:25.955973'
+  host: http://10.110.105.197:8888
+  llm-model: Intel/neural-chat-7b-v3-3
+  locustfile: /home/sdp/lvl/GenAIEval/evals/benchmark/stresscli/locust/aistress.py
+  max_requests: 2560
+  namespace: default
+  processes: 2
+  run_name: benchmark
+  runtime: 60m
+  starttest_time: '2024-08-25T14:18:50.366514'
+  stop_timeout: 120
+  tool: locust
+  users: 512
+hardwarespec:
+  aise-gaudi-00:
+    architecture: amd64
+    containerRuntimeVersion: containerd://1.7.18
+    cpu: '160'
+    habana.ai/gaudi: '8'
+    kernelVersion: 5.15.0-92-generic
+    kubeProxyVersion: v1.29.7
+    kubeletVersion: v1.29.7
+    memory: 1056375272Ki
+    operatingSystem: linux
+    osImage: Ubuntu 22.04.3 LTS
+  aise-gaudi-01:
+    architecture: amd64
+    containerRuntimeVersion: containerd://1.7.18
+    cpu: '160'
+    habana.ai/gaudi: '8'
+    kernelVersion: 5.15.0-92-generic
+    kubeProxyVersion: v1.29.7
+    kubeletVersion: v1.29.7
+    memory: 1056375256Ki
+    operatingSystem: linux
+    osImage: Ubuntu 22.04.3 LTS
+  aise-gaudi-02:
+    architecture: amd64
+    containerRuntimeVersion: containerd://1.7.18
+    cpu: '160'
+    habana.ai/gaudi: '8'
+    kernelVersion: 5.15.0-92-generic
+    kubeProxyVersion: v1.29.7
+    kubeletVersion: v1.29.7
+    memory: 1056375260Ki
+    operatingSystem: linux
+    osImage: Ubuntu 22.04.3 LTS
+  aise-gaudi-03:
+    architecture: amd64
+    containerRuntimeVersion: containerd://1.6.8
+    cpu: '160'
+    habana.ai/gaudi: '8'
+    kernelVersion: 5.15.0-112-generic
+    kubeProxyVersion: v1.29.7
+    kubeletVersion: v1.29.7
+    memory: 1056374404Ki
+    operatingSystem: linux
+    osImage: Ubuntu 22.04.4 LTS
+workloadspec:
+  aise-gaudi-00:
+    chatqna-backend-server-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '8'
+          memory: 4000Mi
+        requests:
+          cpu: '8'
+          memory: 4000Mi
+    embedding-dependency-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '80'
+          memory: 20000Mi
+        requests:
+          cpu: '80'
+          memory: 20000Mi
+    embedding-deploy:
+      replica: 1
+    llm-dependency-deploy:
+      replica: 8
+      resources:
+        limits:
+          habana.ai/gaudi: '1'
+        requests:
+          habana.ai/gaudi: '1'
+    llm-deploy:
+      replica: 1
+    retriever-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '8'
+          memory: 2500Mi
+        requests:
+          cpu: '8'
+          memory: 2500Mi
+  aise-gaudi-01:
+    chatqna-backend-server-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '8'
+          memory: 4000Mi
+        requests:
+          cpu: '8'
+          memory: 4000Mi
+    embedding-dependency-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '80'
+          memory: 20000Mi
+        requests:
+          cpu: '80'
+          memory: 20000Mi
+    embedding-deploy:
+      replica: 1
+    llm-dependency-deploy:
+      replica: 8
+      resources:
+        limits:
+          habana.ai/gaudi: '1'
+        requests:
+          habana.ai/gaudi: '1'
+    llm-deploy:
+      replica: 1
+    prometheus-operator:
+      replica: 1
+      resources:
+        limits:
+          cpu: 200m
+          memory: 200Mi
+        requests:
+          cpu: 100m
+          memory: 100Mi
+    retriever-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '8'
+          memory: 2500Mi
+        requests:
+          cpu: '8'
+          memory: 2500Mi
+  aise-gaudi-02:
+    chatqna-backend-server-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '8'
+          memory: 4000Mi
+        requests:
+          cpu: '8'
+          memory: 4000Mi
+    embedding-dependency-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '80'
+          memory: 20000Mi
+        requests:
+          cpu: '80'
+          memory: 20000Mi
+    embedding-deploy:
+      replica: 1
+    llm-dependency-deploy:
+      replica: 8
+      resources:
+        limits:
+          habana.ai/gaudi: '1'
+        requests:
+          habana.ai/gaudi: '1'
+    llm-deploy:
+      replica: 1
+    retriever-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '8'
+          memory: 2500Mi
+        requests:
+          cpu: '8'
+          memory: 2500Mi
+  aise-gaudi-03:
+    chatqna-backend-server-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '8'
+          memory: 4000Mi
+        requests:
+          cpu: '8'
+          memory: 4000Mi
+    dataprep-deploy:
+      replica: 1
+    embedding-dependency-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '80'
+          memory: 20000Mi
+        requests:
+          cpu: '80'
+          memory: 20000Mi
+    embedding-deploy:
+      replica: 1
+    llm-dependency-deploy:
+      replica: 8
+      resources:
+        limits:
+          habana.ai/gaudi: '1'
+        requests:
+          habana.ai/gaudi: '1'
+    llm-deploy:
+      replica: 1
+    retriever-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '8'
+          memory: 2500Mi
+        requests:
+          cpu: '8'
+          memory: 2500Mi
+    vector-db:
+      replica: 1
+```
--- a/ChatQnA/benchmark/benchmark.yaml
+++ b/ChatQnA/benchmark/benchmark.yaml
@@ -0,0 +1,55 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+test_suite_config: # Overall configuration settings for the test suite
+  examples: ["chatqna"]  # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
+  concurrent_level: 5  # The concurrency level, adjustable based on requirements
+  user_queries: ${USER_QUERIES}  # Number of test requests at each concurrency level
+  random_prompt: false  # Use random prompts if true, fixed prompts if false
+  run_time: 60m  # The max total run time for the test suite
+  collect_service_metric: false  # Collect service metrics if true, do not collect service metrics if false
+  data_visualization: false # Generate data visualization if true, do not generate data visualization if false
+  llm_model: "Intel/neural-chat-7b-v3-3"  # The LLM model used for the test
+  test_output_dir: "${TEST_OUTPUT_DIR}"  # The directory to store the test output
+
+test_cases:
+  chatqna:
+    embedding:
+      run_test: false
+      service_name: "embedding-svc"  # Replace with your service name
+    embedserve:
+      run_test: false
+      service_name: "embedding-dependency-svc"  # Replace with your service name
+    retriever:
+      run_test: false
+      service_name: "retriever-svc"  # Replace with your service name
+      parameters:
+        search_type: "similarity"
+        k: 4
+        fetch_k: 20
+        lambda_mult: 0.5
+        score_threshold: 0.2
+    reranking:
+      run_test: false
+      service_name: "reranking-svc"  # Replace with your service name
+      parameters:
+        top_n: 1
+    rerankserve:
+      run_test: false
+      service_name: "reranking-dependency-svc"  # Replace with your service name
+    llm:
+      run_test: false
+      service_name: "llm-svc"  # Replace with your service name
+      parameters:
+        max_new_tokens: 128
+        temperature: 0.01
+        top_k: 10
+        top_p: 0.95
+        repetition_penalty: 1.03
+        streaming: true
+    llmserve:
+      run_test: false
+      service_name: "llm-dependency-svc"  # Replace with your service name
+    e2e:
+      run_test: true
+      service_name: "chatqna-backend-server-svc"  # Replace with your service name
--- a/ChatQnA/benchmark/four_gaudi/chatqna_config_map.yaml
+++ b/ChatQnA/benchmark/four_gaudi/chatqna_config_map.yaml
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: {HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
--- a/ChatQnA/benchmark/four_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/chatqna_mega_service_run.yaml
@@ -0,0 +1,62 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna:v0.9
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+        resources:
+          limits:
+            cpu: 8
+            memory: 4000Mi
+          requests:
+            cpu: 8
+            memory: 4000Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
--- a/ChatQnA/benchmark/four_gaudi/dataprep-microservice_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/dataprep-microservice_run.yaml
@@ -0,0 +1,70 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:v0.9
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
--- a/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml
@@ -0,0 +1,69 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            cpu: 80
+            memory: 20000Mi
+          requests:
+            cpu: 80
+            memory: 20000Mi
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
--- a/ChatQnA/benchmark/four_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/embedding-microservice_run.yaml
@@ -0,0 +1,59 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:v0.9
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
--- a/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml
@@ -0,0 +1,88 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 31
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
--- a/ChatQnA/benchmark/four_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/llm-microservice_run.yaml
@@ -0,0 +1,59 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:v0.9
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
--- a/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml
@@ -0,0 +1,85 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-dependency-deploy
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/tei-gaudi:v0.9
+        name: reranking-dependency-deploy
+        args:
+        - --model-id
+        - $(RERANK_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+        - name: MAX_WARMUP_SEQUENCE_LENGTH
+          value: '512'
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-dependency-deploy
+  ports:
+  - name: service
+    port: 8808
+    targetPort: 80
--- a/ChatQnA/benchmark/four_gaudi/reranking-microservice_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/reranking-microservice_run.yaml
@@ -0,0 +1,59 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: reranking-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/reranking-tei:v0.9
+        imagePullPolicy: IfNotPresent
+        name: reranking-deploy
+        args: null
+        ports:
+        - containerPort: 8000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-deploy
+  ports:
+  - name: service
+    port: 8000
+    targetPort: 8000
--- a/ChatQnA/benchmark/four_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/retrieval-microservice_run.yaml
@@ -0,0 +1,69 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:v0.9
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+        resources:
+          limits:
+            cpu: 8
+            memory: 2500Mi
+          requests:
+            cpu: 8
+            memory: 2500Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
--- a/ChatQnA/benchmark/four_gaudi/vector-db_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/vector-db_run.yaml
@@ -0,0 +1,48 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
--- a/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml
+++ b/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml
@@ -15,7 +15,9 @@ data:
  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
  EMBEDDING_SERVICE_HOST_IP: embedding-svc
  RETRIEVER_SERVICE_HOST_IP: retriever-svc
  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
  LLM_SERVICE_HOST_IP: llm-svc
--- a/ChatQnA/benchmark/single_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/chatqna_mega_service_run.yaml
@@ -18,6 +18,8 @@ spec:
      labels:
        app: chatqna-backend-server-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: kubernetes.io/hostname
@@ -30,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/chatqna:latest
+        image: opea/chatqna:v0.9
        imagePullPolicy: IfNotPresent
        name: chatqna-backend-server-deploy
        args: null
@@ -48,7 +50,7 @@ spec:
 kind: Service
 apiVersion: v1
 metadata:
-  name: chaqna-backend-server-svc
+  name: chatqna-backend-server-svc
 spec:
  type: NodePort
  selector:
--- a/ChatQnA/benchmark/single_gaudi/dataprep-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/dataprep-microservice_run.yaml
@@ -18,6 +18,8 @@ spec:
      labels:
        app: dataprep-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: kubernetes.io/hostname
@@ -38,7 +40,7 @@ spec:
            configMapKeyRef:
              name: qna-config
              key: INDEX_NAME
-        image: opea/dataprep-redis:latest
+        image: opea/dataprep-redis:v0.9
        imagePullPolicy: IfNotPresent
        name: dataprep-deploy
        args: null
--- a/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml
@@ -7,7 +7,7 @@ metadata:
  name: embedding-dependency-deploy
  namespace: default
 spec:
-  replicas: 4
+  replicas: 1
  selector:
    matchLabels:
      app: embedding-dependency-deploy
@@ -18,11 +18,13 @@ spec:
      labels:
        app: embedding-dependency-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      containers:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
        name: embedding-dependency-deploy
        args:
        - --model-id
@@ -37,16 +39,16 @@ spec:
        - containerPort: 80
        resources:
          limits:
-            cpu: 24
-            memory: 4000Mi
+            cpu: 80
+            memory: 20000Mi
          requests:
-            cpu: 24
-            memory: 4000Mi
+            cpu: 80
+            memory: 20000Mi
      serviceAccountName: default
      volumes:
      - name: model-volume
        hostPath:
-          path: /home/sdp/cesg
+          path: /mnt/models
          type: Directory
      - name: shm
        emptyDir:
--- a/ChatQnA/benchmark/single_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/embedding-microservice_run.yaml
@@ -18,6 +18,8 @@ spec:
      labels:
        app: embedding-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: kubernetes.io/hostname
@@ -30,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/embedding-tei:latest
+        image: opea/embedding-tei:v0.9
        imagePullPolicy: IfNotPresent
        name: embedding-deploy
        args: null
--- a/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml
@@ -18,12 +18,14 @@ spec:
      labels:
        app: llm-dependency-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      hostIPC: true
      containers:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: tgi_gaudi:2.0.1
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
        name: llm-dependency-deploy-demo
        securityContext:
          capabilities:
@@ -36,6 +38,10 @@ spec:
        - '2048'
        - --max-total-tokens
        - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
        volumeMounts:
        - mountPath: /data
          name: model-volume
@@ -56,12 +62,12 @@ spec:
        - name: HABANA_VISIBLE_DEVICES
          value: all
        - name: HF_TOKEN
-          value: $(HF_TOKEN)
+          value: ${HF_TOKEN}
      serviceAccountName: default
      volumes:
      - name: model-volume
        hostPath:
-          path: /home/sdp/cesg
+          path: /mnt/models
          type: Directory
      - name: shm
        emptyDir:
--- a/ChatQnA/benchmark/single_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/llm-microservice_run.yaml
@@ -18,6 +18,8 @@ spec:
      labels:
        app: llm-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: kubernetes.io/hostname
@@ -30,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/llm-tgi:latest
+        image: opea/llm-tgi:v0.9
        imagePullPolicy: IfNotPresent
        name: llm-deploy
        args: null
--- a/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml
@@ -18,6 +18,8 @@ spec:
      labels:
        app: reranking-dependency-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: kubernetes.io/hostname
@@ -29,7 +31,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: tei_gaudi:rerank
+        image: opea/tei-gaudi:v0.9
        name: reranking-dependency-deploy
        args:
        - --model-id
@@ -55,14 +57,14 @@ spec:
        - name: HABANA_VISIBLE_DEVICES
          value: all
        - name: HF_TOKEN
-          value: $(HF_TOKEN)
+          value: ${HF_TOKEN}
        - name: MAX_WARMUP_SEQUENCE_LENGTH
          value: '512'
      serviceAccountName: default
      volumes:
      - name: model-volume
        hostPath:
-          path: /home/sdp/cesg
+          path: /mnt/models
          type: Directory
      - name: shm
        emptyDir:
--- a/ChatQnA/benchmark/single_gaudi/reranking-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/reranking-microservice_run.yaml
@@ -18,6 +18,8 @@ spec:
      labels:
        app: reranking-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: kubernetes.io/hostname
@@ -30,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/reranking-tei:latest
+        image: opea/reranking-tei:v0.9
        imagePullPolicy: IfNotPresent
        name: reranking-deploy
        args: null
--- a/ChatQnA/benchmark/single_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/retrieval-microservice_run.yaml
@@ -18,6 +18,8 @@ spec:
      labels:
        app: retriever-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: kubernetes.io/hostname
@@ -38,7 +40,7 @@ spec:
            configMapKeyRef:
              name: qna-config
              key: INDEX_NAME
-        image: opea/retriever-redis:latest
+        image: opea/retriever-redis:v0.9
        imagePullPolicy: IfNotPresent
        name: retriever-deploy
        args: null
--- a/ChatQnA/benchmark/single_gaudi/vector-db_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/vector-db_run.yaml
@@ -15,6 +15,8 @@ spec:
      labels:
        app: vector-db
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: kubernetes.io/hostname
--- a/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml
+++ b/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml
@@ -15,8 +15,9 @@ data:
  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
  INDEX_NAME: rag-redis
-  HUGGINGFACEHUB_API_TOKEN: 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
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
  EMBEDDING_SERVICE_HOST_IP: embedding-svc
  RETRIEVER_SERVICE_HOST_IP: retriever-svc
  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
  LLM_SERVICE_HOST_IP: llm-svc
--- a/ChatQnA/benchmark/two_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/chatqna_mega_service_run.yaml
@@ -7,7 +7,7 @@ metadata:
  name: chatqna-backend-server-deploy
  namespace: default
 spec:
-  replicas: 1
+  replicas: 2
  selector:
    matchLabels:
      app: chatqna-backend-server-deploy
@@ -18,6 +18,8 @@ spec:
      labels:
        app: chatqna-backend-server-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: kubernetes.io/hostname
@@ -30,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/chatqna:latest
+        image: opea/chatqna:v0.9
        imagePullPolicy: IfNotPresent
        name: chatqna-backend-server-deploy
        args: null
@@ -48,7 +50,7 @@ spec:
 kind: Service
 apiVersion: v1
 metadata:
-  name: chaqna-backend-server-svc
+  name: chatqna-backend-server-svc
 spec:
  type: NodePort
  selector:
--- a/ChatQnA/benchmark/two_gaudi/dataprep-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/dataprep-microservice_run.yaml
@@ -18,6 +18,8 @@ spec:
      labels:
        app: dataprep-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: kubernetes.io/hostname
@@ -38,7 +40,7 @@ spec:
            configMapKeyRef:
              name: qna-config
              key: INDEX_NAME
-        image: opea/dataprep-redis:latest
+        image: opea/dataprep-redis:v0.9
        imagePullPolicy: IfNotPresent
        name: dataprep-deploy
        args: null
--- a/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml
@@ -7,7 +7,7 @@ metadata:
  name: embedding-dependency-deploy
  namespace: default
 spec:
-  replicas: 10
+  replicas: 2
  selector:
    matchLabels:
      app: embedding-dependency-deploy
@@ -18,11 +18,13 @@ spec:
      labels:
        app: embedding-dependency-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      containers:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
        name: embedding-dependency-deploy
        args:
        - --model-id
@@ -37,16 +39,16 @@ spec:
        - containerPort: 80
        resources:
          limits:
-            cpu: 24
-            memory: 4000Mi
+            cpu: 80
+            memory: 20000Mi
          requests:
-            cpu: 24
-            memory: 4000Mi
+            cpu: 80
+            memory: 20000Mi
      serviceAccountName: default
      volumes:
      - name: model-volume
        hostPath:
-          path: /home/sdp/cesg
+          path: /mnt/models
          type: Directory
      - name: shm
        emptyDir:
--- a/ChatQnA/benchmark/two_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/embedding-microservice_run.yaml
@@ -7,7 +7,7 @@ metadata:
  name: embedding-deploy
  namespace: default
 spec:
-  replicas: 1
+  replicas: 2
  selector:
    matchLabels:
      app: embedding-deploy
@@ -18,6 +18,8 @@ spec:
      labels:
        app: embedding-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: kubernetes.io/hostname
@@ -30,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/embedding-tei:latest
+        image: opea/embedding-tei:v0.9
        imagePullPolicy: IfNotPresent
        name: embedding-deploy
        args: null
--- a/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml
@@ -18,12 +18,14 @@ spec:
      labels:
        app: llm-dependency-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      hostIPC: true
      containers:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: tgi_gaudi:2.0.1
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
        name: llm-dependency-deploy-demo
        securityContext:
          capabilities:
@@ -36,6 +38,10 @@ spec:
        - '2048'
        - --max-total-tokens
        - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
        volumeMounts:
        - mountPath: /data
          name: model-volume
@@ -56,12 +62,12 @@ spec:
        - name: HABANA_VISIBLE_DEVICES
          value: all
        - name: HF_TOKEN
-          value: $(HF_TOKEN)
+          value: ${HF_TOKEN}
      serviceAccountName: default
      volumes:
      - name: model-volume
        hostPath:
-          path: /home/sdp/cesg
+          path: /mnt/models
          type: Directory
      - name: shm
        emptyDir:
--- a/ChatQnA/benchmark/two_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/llm-microservice_run.yaml
@@ -7,7 +7,7 @@ metadata:
  name: llm-deploy
  namespace: default
 spec:
-  replicas: 1
+  replicas: 2
  selector:
    matchLabels:
      app: llm-deploy
@@ -18,6 +18,8 @@ spec:
      labels:
        app: llm-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: kubernetes.io/hostname
@@ -30,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/llm-tgi:latest
+        image: opea/llm-tgi:v0.9
        imagePullPolicy: IfNotPresent
        name: llm-deploy
        args: null
--- a/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml
@@ -18,6 +18,8 @@ spec:
      labels:
        app: reranking-dependency-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: kubernetes.io/hostname
@@ -29,7 +31,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: tei_gaudi:rerank
+        image: opea/tei-gaudi:v0.9
        name: reranking-dependency-deploy
        args:
        - --model-id
@@ -55,14 +57,14 @@ spec:
        - name: HABANA_VISIBLE_DEVICES
          value: all
        - name: HF_TOKEN
-          value: $(HF_TOKEN)
+          value: ${HF_TOKEN}
        - name: MAX_WARMUP_SEQUENCE_LENGTH
          value: '512'
      serviceAccountName: default
      volumes:
      - name: model-volume
        hostPath:
-          path: /home/sdp/cesg
+          path: /mnt/models
          type: Directory
      - name: shm
        emptyDir:
--- a/ChatQnA/benchmark/two_gaudi/reranking-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/reranking-microservice_run.yaml
@@ -7,7 +7,7 @@ metadata:
  name: reranking-deploy
  namespace: default
 spec:
-  replicas: 1
+  replicas: 2
  selector:
    matchLabels:
      app: reranking-deploy
@@ -18,6 +18,8 @@ spec:
      labels:
        app: reranking-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: kubernetes.io/hostname
@@ -30,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/reranking-tei:latest
+        image: opea/reranking-tei:v0.9
        imagePullPolicy: IfNotPresent
        name: reranking-deploy
        args: null
--- a/ChatQnA/benchmark/two_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/retrieval-microservice_run.yaml
@@ -7,7 +7,7 @@ metadata:
  name: retriever-deploy
  namespace: default
 spec:
-  replicas: 1
+  replicas: 2
  selector:
    matchLabels:
      app: retriever-deploy
@@ -18,6 +18,8 @@ spec:
      labels:
        app: retriever-deploy
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: kubernetes.io/hostname
@@ -38,7 +40,7 @@ spec:
            configMapKeyRef:
              name: qna-config
              key: INDEX_NAME
-        image: opea/retriever-redis:latest
+        image: opea/retriever-redis:v0.9
        imagePullPolicy: IfNotPresent
        name: retriever-deploy
        args: null
--- a/ChatQnA/benchmark/two_gaudi/vector-db_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/vector-db_run.yaml
@@ -15,6 +15,8 @@ spec:
      labels:
        app: vector-db
    spec:
+      nodeSelector:
+        node-type: chatqna-opea
      topologySpreadConstraints:
      - maxSkew: 1
        topologyKey: kubernetes.io/hostname
--- a/ChatQnA/docker/Dockerfile_without_rerank
+++ b/ChatQnA/docker/Dockerfile_without_rerank
@@ -0,0 +1,33 @@
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    vim \
+    git
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+WORKDIR /home/user/
+RUN git clone https://github.com/opea-project/GenAIComps.git
+
+WORKDIR /home/user/GenAIComps
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
+
+COPY ./chatqna_without_rerank.py /home/user/chatqna_without_rerank.py
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
+
+USER user
+
+WORKDIR /home/user
+
+ENTRYPOINT ["python", "chatqna_without_rerank.py"]
--- a/ChatQnA/docker/aipc/README.md
+++ b/ChatQnA/docker/aipc/README.md
@@ -160,7 +160,7 @@ Note: Please replace with `host_ip` with you external IP address, do not use loc

 ```bash
 cd GenAIExamples/ChatQnA/docker/aipc/
-docker compose up -d
+TAG=v0.9 docker compose up -d

 # let ollama service runs
 # e.g. ollama run llama3
--- a/ChatQnA/docker/aipc/compose.yaml
+++ b/ChatQnA/docker/aipc/compose.yaml
@@ -1,8 +1,6 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-version: "3.8"
-
 services:
  redis-vector-db:
    image: redis/redis-stack:7.2.0-v9
@@ -11,7 +9,7 @@ services:
      - "6379:6379"
      - "8001:8001"
  dataprep-redis-service:
-    image: opea/dataprep-redis:latest
+    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
    container_name: dataprep-redis-server
    depends_on:
      - redis-vector-db
@@ -23,6 +21,8 @@ services:
      https_proxy: ${https_proxy}
      REDIS_URL: ${REDIS_URL}
      INDEX_NAME: ${INDEX_NAME}
+      TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
  tei-embedding-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
    container_name: tei-embedding-server
@@ -37,7 +37,7 @@ services:
      https_proxy: ${https_proxy}
    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
  embedding:
-    image: opea/embedding-tei:latest
+    image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
    container_name: embedding-tei-server
    depends_on:
      - tei-embedding-service
@@ -49,12 +49,9 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
-      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
-      LANGCHAIN_PROJECT: "opea-embedding-service"
    restart: unless-stopped
  retriever:
-    image: opea/retriever-redis:latest
+    image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
    container_name: retriever-redis-server
    depends_on:
      - redis-vector-db
@@ -68,9 +65,6 @@ services:
      REDIS_URL: ${REDIS_URL}
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
-      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
-      LANGCHAIN_PROJECT: "opea-retriever-service"
    restart: unless-stopped
  tei-reranking-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -89,7 +83,7 @@ services:
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
  reranking:
-    image: opea/reranking-tei:latest
+    image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
    container_name: reranking-tei-aipc-server
    depends_on:
      - tei-reranking-service
@@ -104,12 +98,9 @@ services:
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
-      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
-      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
-      LANGCHAIN_PROJECT: "opea-reranking-service"
    restart: unless-stopped
  llm:
-    image: opea/llm-ollama
+    image: ${REGISTRY:-opea}/llm-ollama
    container_name: llm-ollama
    ports:
      - "9000:9000"
@@ -122,12 +113,10 @@ services:
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
-      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
-      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
-      LANGCHAIN_PROJECT: "opea-llm-service"
      OLLAMA_ENDPOINT: ${OLLAMA_ENDPOINT}
+      OLLAMA_MODEL: ${OLLAMA_MODEL}
  chaqna-aipc-backend-server:
-    image: opea/chatqna:latest
+    image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
    container_name: chatqna-aipc-backend-server
    depends_on:
      - redis-vector-db
@@ -151,7 +140,7 @@ services:
    ipc: host
    restart: always
  chaqna-aipc-ui-server:
-    image: opea/chatqna-ui:latest
+    image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
    container_name: chatqna-aipc-ui-server
    depends_on:
      - chaqna-aipc-backend-server
--- a/Show More
+++ b/Show More