Freeze OPEA images tag

Signed-off-by: NeuralChatBot <grp_neural_chat_bot@intel.com>
Adjustments for helm release change (#1173 )
2024-11-21 14:24:16 +00:00 · 2024-11-21 16:57:30 +08:00 · 2024-11-21 16:57:29 +08:00 · 2024-11-21 16:57:28 +08:00 · 2024-11-20 10:56:49 +08:00 · 2024-11-19 22:57:25 +08:00
661 changed files with 25429 additions and 1876 deletions
--- a/.github/code_spell_ignore.txt
+++ b/.github/code_spell_ignore.txt
@@ -0,0 +1,2 @@
+ModelIn
+modelin
--- a/.github/workflows/_example-workflow.yml
+++ b/.github/workflows/_example-workflow.yml
@@ -40,6 +40,11 @@ on:
        default: "main"
        required: false
        type: string
+      inject_commit:
+        default: false
+        required: false
+        type: string
+
 jobs:
 ####################################################################################################
 # Image Build
@@ -72,6 +77,10 @@ jobs:
              git clone https://github.com/vllm-project/vllm.git
              cd vllm && git rev-parse HEAD && cd ../
          fi
+          if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
+               git clone https://github.com/HabanaAI/vllm-fork.git
+               cd vllm-fork && git checkout 3c39626 && cd ../
+          fi
          git clone https://github.com/opea-project/GenAIComps.git
          cd GenAIComps && git checkout ${{ inputs.opea_branch }} && git rev-parse HEAD && cd ../

@@ -83,6 +92,7 @@ jobs:
          docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
          service_list: ${{ inputs.services }}
          registry: ${OPEA_IMAGE_REPO}opea
+          inject_commit: ${{ inputs.inject_commit }}
          tag: ${{ inputs.tag }}

 ####################################################################################################
--- a/.github/workflows/_get-test-matrix.yml
+++ b/.github/workflows/_get-test-matrix.yml
@@ -14,7 +14,7 @@ on:
      test_mode:
        required: false
        type: string
-        default: 'docker_compose'
+        default: 'compose'
    outputs:
      run_matrix:
        description: "The matrix string"
--- a/.github/workflows/_manifest-e2e.yml
+++ b/.github/workflows/_manifest-e2e.yml
@@ -90,9 +90,15 @@ jobs:
              echo "Validate ${{ inputs.example }} successful!"
            else
              echo "Validate ${{ inputs.example }} failure!!!"
+              echo "Check the logs in 'Dump logs when e2e test failed' step!!!"
+              exit 1
+            fi
+          fi
+
+      - name: Dump logs when e2e test failed
+        if: failure()
+        run: |
          .github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE
-            fi
-          fi

      - name: Kubectl uninstall
        if: always()
--- a/.github/workflows/_run-docker-compose.yml
+++ b/.github/workflows/_run-docker-compose.yml
@@ -119,6 +119,8 @@ jobs:
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
          PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
          PINECONE_KEY_LANGCHAIN_TEST: ${{ secrets.PINECONE_KEY_LANGCHAIN_TEST }}
+          SDK_BASE_URL: ${{ secrets.SDK_BASE_URL }}
+          SERVING_TOKEN: ${{ secrets.SERVING_TOKEN }}
          IMAGE_REPO: ${{ inputs.registry }}
          IMAGE_TAG: ${{ inputs.tag }}
          example: ${{ inputs.example }}
@@ -139,7 +141,11 @@ jobs:
          flag=${flag#test_}
          yaml_file=$(find . -type f -wholename "*${{ inputs.hardware }}/${flag}.yaml")
          echo $yaml_file
-          docker compose -f $yaml_file stop && docker compose -f $yaml_file rm -f || true
+          container_list=$(cat $yaml_file | grep container_name | cut -d':' -f2)
+          for container_name in $container_list; do
+              cid=$(docker ps -aq --filter "name=$container_name")
+              if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+          done
          docker system prune -f
          docker rmi $(docker images --filter reference="*:5000/*/*" -q) || true

--- a/.github/workflows/check-online-doc-build.yml
+++ b/.github/workflows/check-online-doc-build.yml
@@ -0,0 +1,35 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Check Online Document Building
+permissions: {}
+
+on:
+  pull_request:
+    branches: [main]
+    paths:
+      - "**.md"
+      - "**.rst"
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+
+    - name: Checkout
+      uses: actions/checkout@v4
+      with:
+        path: GenAIExamples
+
+    - name: Checkout docs
+      uses: actions/checkout@v4
+      with:
+        repository: opea-project/docs
+        path: docs
+
+    - name: Build Online Document
+      shell: bash
+      run: |
+        echo "build online doc"
+        cd docs
+        bash scripts/build.sh
--- a/.github/workflows/manual-example-workflow.yml
+++ b/.github/workflows/manual-example-workflow.yml
@@ -50,6 +50,11 @@ on:
        description: 'OPEA branch for image build'
        required: false
        type: string
+      inject_commit:
+        default: true
+        description: "inject commit to docker images true or false"
+        required: false
+        type: string

 permissions: read-all
 jobs:
@@ -101,4 +106,5 @@ jobs:
      test_k8s: ${{ fromJSON(inputs.test_k8s) }}
      test_gmc: ${{ fromJSON(inputs.test_gmc) }}
      opea_branch: ${{ inputs.opea_branch }}
+      inject_commit: ${{ inputs.inject_commit }}
    secrets: inherit
--- a/.github/workflows/manual-freeze-tag.yml
+++ b/.github/workflows/manual-freeze-tag.yml
@@ -1,13 +1,13 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-name: Freeze OPEA images release tag in readme on manual event
+name: Freeze OPEA images release tag

 on:
  workflow_dispatch:
    inputs:
      tag:
-        default: "latest"
+        default: "1.1.0"
        description: "Tag to apply to images"
        required: true
        type: string
@@ -23,10 +23,6 @@ jobs:
          fetch-depth: 0
          ref: ${{ github.ref }}

-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.10"
-
      - name: Set up Git
        run: |
          git config --global user.name "NeuralChatBot"
@@ -35,9 +31,10 @@ jobs:

      - name: Run script
        run: |
-          find . -name "*.md" | xargs sed -i "s|^docker\ compose|TAG=${{ github.event.inputs.tag }}\ docker\ compose|g"
-          find . -type f -name "*.yaml" \( -path "*/benchmark/*" -o -path "*/kubernetes/*" \) | xargs sed -i -E 's/(opea\/[A-Za-z0-9\-]*:)latest/\1${{ github.event.inputs.tag }}/g'
-          find . -type f -name "*.md" \( -path "*/benchmark/*" -o -path "*/kubernetes/*" \) | xargs sed -i -E 's/(opea\/[A-Za-z0-9\-]*:)latest/\1${{ github.event.inputs.tag }}/g'
+          IFS='.' read -r major minor patch <<< "${{ github.event.inputs.tag }}"
+          echo "VERSION_MAJOR ${major}"  > version.txt
+          echo "VERSION_MINOR ${minor}" >> version.txt
+          echo "VERSION_PATCH ${patch}" >> version.txt

      - name: Commit changes
        run: |
--- a/.github/workflows/manual-image-build.yml
+++ b/.github/workflows/manual-image-build.yml
@@ -30,6 +30,12 @@ on:
        description: 'OPEA branch for image build'
        required: false
        type: string
+      inject_commit:
+        default: true
+        description: "inject commit to docker images true or false"
+        required: false
+        type: string
+
 jobs:
  get-test-matrix:
    runs-on: ubuntu-latest
@@ -56,4 +62,5 @@ jobs:
      services: ${{ inputs.services }}
      tag: ${{ inputs.tag }}
      opea_branch: ${{ inputs.opea_branch }}
+      inject_commit: ${{ inputs.inject_commit }}
    secrets: inherit
--- a/.github/workflows/nightly-docker-build-publish.yml
+++ b/.github/workflows/nightly-docker-build-publish.yml
@@ -0,0 +1,70 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Nightly build/publish latest docker images
+
+on:
+  schedule:
+    - cron: "30 13 * * *" # UTC time
+  workflow_dispatch:
+
+env:
+  EXAMPLES: "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"
+  TAG: "latest"
+  PUBLISH_TAGS: "latest"
+
+jobs:
+  get-build-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      examples_json: ${{ steps.get-matrix.outputs.examples_json }}
+      EXAMPLES: ${{ steps.get-matrix.outputs.EXAMPLES }}
+      TAG: ${{ steps.get-matrix.outputs.TAG }}
+      PUBLISH_TAGS: ${{ steps.get-matrix.outputs.PUBLISH_TAGS }}
+    steps:
+      - name: Create Matrix
+        id: get-matrix
+        run: |
+          examples=($(echo ${EXAMPLES} | tr ',' ' '))
+          examples_json=$(printf '%s\n' "${examples[@]}" | sort -u | jq -R '.' | jq -sc '.')
+          echo "examples_json=$examples_json" >> $GITHUB_OUTPUT
+          echo "EXAMPLES=$EXAMPLES" >> $GITHUB_OUTPUT
+          echo "TAG=$TAG" >> $GITHUB_OUTPUT
+          echo "PUBLISH_TAGS=$PUBLISH_TAGS" >> $GITHUB_OUTPUT
+
+  build:
+    needs: get-build-matrix
+    strategy:
+      matrix:
+        example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
+      fail-fast: false
+    uses: ./.github/workflows/_example-workflow.yml
+    with:
+      node: gaudi
+      example: ${{ matrix.example }}
+    secrets: inherit
+
+  get-image-list:
+    needs: get-build-matrix
+    uses: ./.github/workflows/_get-image-list.yml
+    with:
+      examples: ${{ needs.get-build-matrix.outputs.EXAMPLES }}
+
+  publish:
+    needs: [get-build-matrix, get-image-list, build]
+    strategy:
+      matrix:
+        image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
+    runs-on: "docker-build-gaudi"
+    steps:
+      - uses: docker/login-action@v3.2.0
+        with:
+          username: ${{ secrets.DOCKERHUB_USER }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Image Publish
+        uses: opea-project/validation/actions/image-publish@main
+        with:
+          local_image_ref: ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ needs.get-build-matrix.outputs.TAG }}
+          image_name: opea/${{ matrix.image }}
+          publish_tags: ${{ needs.get-build-matrix.outputs.PUBLISH_TAGS }}
--- a/.github/workflows/pr-gmc-e2e.yaml
+++ b/.github/workflows/pr-gmc-e2e.yaml
@@ -12,7 +12,7 @@ on:
      - "**/tests/test_gmc**"
      - "!**.md"
      - "!**.txt"
-      - "!**/kubernetes/**/manifests/**"
+      - "!**/kubernetes/**/manifest/**"

 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
--- a/.github/workflows/pr-manifest-e2e.yml
+++ b/.github/workflows/pr-manifest-e2e.yml
@@ -10,7 +10,7 @@ on:
    paths:
      - "**/Dockerfile**"
      - "**.py"
-      - "**/kubernetes/**/manifests/**"
+      - "**/kubernetes/**/manifest/**"
      - "**/tests/test_manifest**"
      - "!**.md"
      - "!**.txt"
--- a/.github/workflows/pr-path-detection.yml
+++ b/.github/workflows/pr-path-detection.yml
@@ -61,14 +61,14 @@ jobs:
          changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
          if  [ -n "$changed_files" ]; then
            for changed_file in $changed_files; do
-              echo $changed_file
+              # echo $changed_file
              url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIExamples/blob/main') || true
              if [ -n "$url_lines" ]; then
                for url_line in $url_lines; do
-                  echo $url_line
+                  # echo $url_line
                  url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
                  path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
-                  response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
+                  response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")|| true
                  if [ "$response" -ne 200 ]; then
                    echo "**********Validation failed, try again**********"
                    response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
--- a/.github/workflows/push-image-build.yml
+++ b/.github/workflows/push-image-build.yml
@@ -8,7 +8,8 @@ on:
    branches: [ 'main' ]
    paths:
      - "**.py"
-      - "**Dockerfile"
+      - "**Dockerfile*"
+      - "**docker_image_build/build.yaml"

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}-on-push
@@ -18,7 +19,7 @@ jobs:
  job1:
    uses: ./.github/workflows/_get-test-matrix.yml
    with:
-      test_mode: "docker_image_build/build.yaml"
+      test_mode: "docker_image_build"

  image-build:
    needs: job1
--- a/.github/workflows/scripts/get_test_matrix.sh
+++ b/.github/workflows/scripts/get_test_matrix.sh
@@ -9,12 +9,20 @@ set -e
 changed_files=$changed_files
 test_mode=$test_mode
 run_matrix="{\"include\":["
-hardware_list="xeon gaudi" # current support hardware list

 examples=$(printf '%s\n' "${changed_files[@]}" | grep '/' | cut -d'/' -f1 | sort -u)
 for example in ${examples}; do
    cd $WORKSPACE/$example
    if [[ ! $(find . -type f | grep ${test_mode}) ]]; then continue; fi
+    cd tests
+    ls -l
+    if [[ "$test_mode" == "docker_image_build" ]]; then
+        find_name="test_manifest_on_*.sh"
+    else
+        find_name="test_${test_mode}*_on_*.sh"
+    fi
+    hardware_list=$(find . -type f -name "${find_name}" | cut -d/ -f2 | cut -d. -f1 | awk -F'_on_' '{print $2}'| sort -u)
+    echo -e "Test supported hardware list: \n${hardware_list}"

    run_hardware=""
    if [[ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | cut -d'/' -f2 | grep -E '*.py|Dockerfile*|ui|docker_image_build' ) ]]; then
--- a/.set_env.sh
+++ b/.set_env.sh
@@ -0,0 +1,16 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+#To anounce the version of the codes, please create a version.txt and have following format.
+#VERSION_MAJOR 1
+#VERSION_MINOR 0
+#VERSION_PATCH 0
+
+VERSION_FILE="version.txt"
+if [ -f $VERSION_FILE ]; then
+    VER_OPEA_MAJOR=$(grep "VERSION_MAJOR" $VERSION_FILE | cut -d " " -f 2)
+    VER_OPEA_MINOR=$(grep "VERSION_MINOR" $VERSION_FILE | cut -d " " -f 2)
+    VER_OPEA_PATCH=$(grep "VERSION_PATCH" $VERSION_FILE | cut -d " " -f 2)
+    export TAG=$VER_OPEA_MAJOR.$VER_OPEA_MINOR
+    echo OPEA Version:$TAG
+fi
--- a/AgentQnA/README.md
+++ b/AgentQnA/README.md
@@ -81,72 +81,122 @@ flowchart LR
 3. Hierarchical agent can further improve performance.
   Expert worker agents, such as retrieval agent, knowledge graph agent, SQL agent, etc., can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information together to provide a comprehensive answer.

-### Roadmap
+## Deployment with docker

- v0.9: Worker agent uses open-source websearch tool (duckduckgo), agents use OpenAI GPT-4o-mini as llm backend.
- v1.0: Worker agent uses OPEA retrieval megaservice as tool.
- v1.0 or later: agents use open-source llm backend.
- v1.1 or later: add safeguards
+1. Build agent docker image [Optional]

-## Getting started
+> [!NOTE]
+> the step is optional. The docker images will be automatically pulled when running the docker compose commands. This step is only needed if pulling images failed.

-1. Build agent docker image </br>
-   First, clone the opea GenAIComps repo
+First, clone the opea GenAIComps repo.
+
+```
+export WORKDIR=<your-work-directory>
+cd $WORKDIR
+git clone https://github.com/opea-project/GenAIComps.git
+```
+
+Then build the agent docker image. Both the supervisor agent and the worker agent will use the same docker image, but when we launch the two agents we will specify different strategies and register different tools.
+
+```
+cd GenAIComps
+docker build -t opea/agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/Dockerfile .
+```
+
+2. Set up environment for this example </br>
+
+   First, clone this repo.

   ```
   export WORKDIR=<your-work-directory>
   cd $WORKDIR
-   git clone https://github.com/opea-project/GenAIComps.git
+   git clone https://github.com/opea-project/GenAIExamples.git
   ```

-   Then build the agent docker image. Both the supervisor agent and the worker agent will use the same docker image, but when we launch the two agents we will specify different strategies and register different tools.
+   Second, set up env vars.

   ```
-   cd GenAIComps
-   docker build -t opea/agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/Dockerfile .
+   # Example: host_ip="192.168.1.1" or export host_ip="External_Public_IP"
+   export host_ip=$(hostname -I | awk '{print $1}')
+   # if you are in a proxy environment, also set the proxy-related environment variables
+   export http_proxy="Your_HTTP_Proxy"
+   export https_proxy="Your_HTTPs_Proxy"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   export no_proxy="Your_No_Proxy"
+
+   export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
+   # for using open-source llms
+   export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
+   export HF_CACHE_DIR=<directory-where-llms-are-downloaded> #so that no need to redownload every time
+
+   # optional: OPANAI_API_KEY if you want to use OpenAI models
+   export OPENAI_API_KEY=<your-openai-key>
   ```

-2. Launch tool services </br>
+3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
+
+   First, launch the mega-service.
+
+   ```
+   cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
+   bash launch_retrieval_tool.sh
+   ```
+
+   Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
+
+   ```
+   bash run_ingest_data.sh
+   ```
+
+4. Launch other tools. </br>
   In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.

   ```
   docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
   ```

-3. Set up environment for this example </br>
-   First, clone this repo
+5. Launch agent services</br>
+   We provide two options for `llm_engine` of the agents: 1. open-source LLMs, 2. OpenAI models via API calls.
+
+   Deploy it on Gaudi or Xeon respectively
+
+   ::::{tab-set}
+   :::{tab-item} Gaudi
+   :sync: Gaudi
+
+   To use open-source LLMs on Gaudi2, run commands below.

   ```
-   cd $WORKDIR
-   git clone https://github.com/opea-project/GenAIExamples.git
+   cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
+   bash launch_tgi_gaudi.sh
+   bash launch_agent_service_tgi_gaudi.sh
   ```

-   Second, set up env vars
+   :::
+   :::{tab-item} Xeon
+   :sync: Xeon
+
+   To use OpenAI models, run commands below.

   ```
-   export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
-   # optional: OPANAI_API_KEY
-   export OPENAI_API_KEY=<your-openai-key>
-   ```
-
-4. Launch agent services</br>
-   The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and we plan to add support for llama3.1-70B-instruct (served by TGI-Gaudi) in a subsequent release.
-   To use openai llm, run command below.
-
-   ```
-   cd docker_compose/intel/cpu/xeon
+   cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
   bash launch_agent_service_openai.sh
   ```

+   :::
+   ::::
+
 ## Validate services

 First look at logs of the agent docker containers:

 ```
-docker logs docgrader-agent-endpoint
+# worker agent
+docker logs rag-agent-endpoint
 ```

 ```
+# supervisor agent
 docker logs react-agent-endpoint
 ```

@@ -155,7 +205,7 @@ You should see something like "HTTP server setup successful" if the docker conta
 Second, validate worker agent:

 ```
-curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
     "query": "Most recent album by Taylor Swift"
    }'
 ```
@@ -163,11 +213,11 @@ curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: app
 Third, validate supervisor agent:

 ```
-curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+curl http://${host_ip}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
     "query": "Most recent album by Taylor Swift"
    }'
 ```

 ## How to register your own tools with agent

-You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md#5-customize-agent-strategy).
+You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md).
--- a/AgentQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/README.md
@@ -0,0 +1,100 @@
+# Single node on-prem deployment with Docker Compose on Xeon Scalable processors
+
+This example showcases a hierarchical multi-agent system for question-answering applications. We deploy the example on Xeon. For LLMs, we use OpenAI models via API calls. For instructions on using open-source LLMs, please refer to the deployment guide [here](../../../../README.md).
+
+## Deployment with docker
+
+1. First, clone this repo.
+   ```
+   export WORKDIR=<your-work-directory>
+   cd $WORKDIR
+   git clone https://github.com/opea-project/GenAIExamples.git
+   ```
+2. Set up environment for this example </br>
+
+   ```
+   # Example: host_ip="192.168.1.1" or export host_ip="External_Public_IP"
+   export host_ip=$(hostname -I | awk '{print $1}')
+   # if you are in a proxy environment, also set the proxy-related environment variables
+   export http_proxy="Your_HTTP_Proxy"
+   export https_proxy="Your_HTTPs_Proxy"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   export no_proxy="Your_No_Proxy"
+
+   export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
+   #OPANAI_API_KEY if you want to use OpenAI models
+   export OPENAI_API_KEY=<your-openai-key>
+   ```
+
+3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
+
+   First, launch the mega-service.
+
+   ```
+   cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
+   bash launch_retrieval_tool.sh
+   ```
+
+   Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
+
+   ```
+   bash run_ingest_data.sh
+   ```
+
+4. Launch Tool service
+   In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
+   ```
+   docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
+   ```
+5. Launch `Agent` service
+
+   The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and llama3.1-70B-instruct (served by TGI-Gaudi) in Gaudi example. To use openai llm, run command below.
+
+   ```
+   cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
+   bash launch_agent_service_openai.sh
+   ```
+
+6. [Optional] Build `Agent` docker image if pulling images failed.
+
+   ```
+   git clone https://github.com/opea-project/GenAIComps.git
+   cd GenAIComps
+   docker build -t opea/agent-langchain:latest -f comps/agent/langchain/Dockerfile .
+   ```
+
+## Validate services
+
+First look at logs of the agent docker containers:
+
+```
+# worker agent
+docker logs rag-agent-endpoint
+```
+
+```
+# supervisor agent
+docker logs react-agent-endpoint
+```
+
+You should see something like "HTTP server setup successful" if the docker containers are started successfully.</p>
+
+Second, validate worker agent:
+
+```
+curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+     "query": "Most recent album by Taylor Swift"
+    }'
+```
+
+Third, validate supervisor agent:
+
+```
+curl http://${host_ip}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+     "query": "Most recent album by Taylor Swift"
+    }'
+```
+
+## How to register your own tools with agent
+
+You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md).
--- a/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
@@ -2,11 +2,10 @@
 # SPDX-License-Identifier: Apache-2.0

 services:
-  worker-docgrader-agent:
+  worker-rag-agent:
    image: opea/agent-langchain:latest
-    container_name: docgrader-agent-endpoint
+    container_name: rag-agent-endpoint
    volumes:
-      - ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
      - ${TOOLSET_PATH}:/home/user/tools/
    ports:
      - "9095:9095"
@@ -36,8 +35,9 @@ services:
  supervisor-react-agent:
    image: opea/agent-langchain:latest
    container_name: react-agent-endpoint
+    depends_on:
+      - worker-rag-agent
    volumes:
-      - ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
      - ${TOOLSET_PATH}:/home/user/tools/
    ports:
      - "9090:9090"
--- a/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh
@@ -1,13 +1,16 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

+pushd "../../../../../" > /dev/null
+source .set_env.sh
+popd > /dev/null
 export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
 export ip_address=$(hostname -I | awk '{print $1}')
 export recursion_limit_worker=12
 export recursion_limit_supervisor=10
 export model="gpt-4o-mini-2024-07-18"
 export temperature=0
-export max_new_tokens=512
+export max_new_tokens=4096
 export OPENAI_API_KEY=${OPENAI_API_KEY}
 export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
 export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -0,0 +1,105 @@
+# Single node on-prem deployment AgentQnA on Gaudi
+
+This example showcases a hierarchical multi-agent system for question-answering applications. We deploy the example on Gaudi using open-source LLMs,
+For more details, please refer to the deployment guide [here](../../../../README.md).
+
+## Deployment with docker
+
+1. First, clone this repo.
+   ```
+   export WORKDIR=<your-work-directory>
+   cd $WORKDIR
+   git clone https://github.com/opea-project/GenAIExamples.git
+   ```
+2. Set up environment for this example </br>
+
+   ```
+   # Example: host_ip="192.168.1.1" or export host_ip="External_Public_IP"
+   export host_ip=$(hostname -I | awk '{print $1}')
+   # if you are in a proxy environment, also set the proxy-related environment variables
+   export http_proxy="Your_HTTP_Proxy"
+   export https_proxy="Your_HTTPs_Proxy"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   export no_proxy="Your_No_Proxy"
+
+   export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
+   # for using open-source llms
+   export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
+   # Example export HF_CACHE_DIR=$WORKDIR so that no need to redownload every time
+   export HF_CACHE_DIR=<directory-where-llms-are-downloaded>
+
+   ```
+
+3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
+
+   First, launch the mega-service.
+
+   ```
+   cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
+   bash launch_retrieval_tool.sh
+   ```
+
+   Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
+
+   ```
+   bash run_ingest_data.sh
+   ```
+
+4. Launch Tool service
+   In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
+   ```
+   docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
+   ```
+5. Launch `Agent` service
+
+   To use open-source LLMs on Gaudi2, run commands below.
+
+   ```
+   cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
+   bash launch_tgi_gaudi.sh
+   bash launch_agent_service_tgi_gaudi.sh
+   ```
+
+6. [Optional] Build `Agent` docker image if pulling images failed.
+
+   ```
+   git clone https://github.com/opea-project/GenAIComps.git
+   cd GenAIComps
+   docker build -t opea/agent-langchain:latest -f comps/agent/langchain/Dockerfile .
+   ```
+
+## Validate services
+
+First look at logs of the agent docker containers:
+
+```
+# worker agent
+docker logs rag-agent-endpoint
+```
+
+```
+# supervisor agent
+docker logs react-agent-endpoint
+```
+
+You should see something like "HTTP server setup successful" if the docker containers are started successfully.</p>
+
+Second, validate worker agent:
+
+```
+curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+     "query": "Most recent album by Taylor Swift"
+    }'
+```
+
+Third, validate supervisor agent:
+
+```
+curl http://${host_ip}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+     "query": "Most recent album by Taylor Swift"
+    }'
+```
+
+## How to register your own tools with agent
+
+You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md).
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -2,37 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0

 services:
-  tgi-server:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
-    container_name: tgi-server
-    ports:
-      - "8085:80"
-    volumes:
-      - ${HF_CACHE_DIR}:/data
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      PT_HPU_ENABLE_LAZY_COLLECTIVES: true
-      ENABLE_HPU_GRAPH: true
-      LIMIT_HPU_GRAPH: true
-      USE_FLASH_ATTENTION: true
-      FLASH_ATTENTION_RECOMPUTE: true
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
-    command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --sharded true --num-shard ${NUM_SHARDS}
-  worker-docgrader-agent:
+  worker-rag-agent:
    image: opea/agent-langchain:latest
-    container_name: docgrader-agent-endpoint
-    depends_on:
-      - tgi-server
+    container_name: rag-agent-endpoint
    volumes:
      # - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
      - ${TOOLSET_PATH}:/home/user/tools/
@@ -41,7 +13,7 @@ services:
    ipc: host
    environment:
      ip_address: ${ip_address}
-      strategy: rag_agent
+      strategy: rag_agent_llama
      recursion_limit: ${recursion_limit_worker}
      llm_engine: tgi
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -66,8 +38,7 @@ services:
    image: opea/agent-langchain:latest
    container_name: react-agent-endpoint
    depends_on:
-      - tgi-server
-      - worker-docgrader-agent
+      - worker-rag-agent
    volumes:
      # - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
      - ${TOOLSET_PATH}:/home/user/tools/
@@ -76,7 +47,7 @@ services:
    ipc: host
    environment:
      ip_address: ${ip_address}
-      strategy: react_langgraph
+      strategy: react_llama
      recursion_limit: ${recursion_limit_supervisor}
      llm_engine: tgi
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_tgi_gaudi.sh
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_tgi_gaudi.sh
@@ -1,6 +1,9 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

+pushd "../../../../../" > /dev/null
+source .set_env.sh
+popd > /dev/null
 WORKPATH=$(dirname "$PWD")/..
 # export WORKDIR=$WORKPATH/../../
 echo "WORKDIR=${WORKDIR}"
@@ -15,7 +18,7 @@ export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
 export NUM_SHARDS=4
 export LLM_ENDPOINT_URL="http://${ip_address}:8085"
 export temperature=0.01
-export max_new_tokens=512
+export max_new_tokens=4096

 # agent related environment variables
 export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
@@ -27,17 +30,3 @@ export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
 export CRAG_SERVER=http://${ip_address}:8080

 docker compose -f compose.yaml up -d
-
-sleep 5s
-echo "Waiting tgi gaudi ready"
-n=0
-until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
-    docker logs tgi-server &> tgi-gaudi-service.log
-    n=$((n+1))
-    if grep -q Connected tgi-gaudi-service.log; then
-        break
-    fi
-    sleep 5s
-done
-sleep 5s
-echo "Service started successfully"
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/launch_tgi_gaudi.sh
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/launch_tgi_gaudi.sh
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# LLM related environment variables
+export HF_CACHE_DIR=${HF_CACHE_DIR}
+ls $HF_CACHE_DIR
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
+export NUM_SHARDS=4
+
+docker compose -f tgi_gaudi.yaml up -d
+
+sleep 5s
+echo "Waiting tgi gaudi ready"
+n=0
+until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
+    docker logs tgi-server &> tgi-gaudi-service.log
+    n=$((n+1))
+    if grep -q Connected tgi-gaudi-service.log; then
+        break
+    fi
+    sleep 5s
+done
+sleep 5s
+echo "Service started successfully"
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  tgi-server:
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
+    container_name: tgi-server
+    ports:
+      - "8085:80"
+    volumes:
+      - ${HF_CACHE_DIR}:/data
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      PT_HPU_ENABLE_LAZY_COLLECTIVES: true
+      ENABLE_HPU_GRAPH: true
+      LIMIT_HPU_GRAPH: true
+      USE_FLASH_ATTENTION: true
+      FLASH_ATTENTION_RECOMPUTE: true
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --sharded true --num-shard ${NUM_SHARDS}
--- a/AgentQnA/tests/step1_build_images.sh
+++ b/AgentQnA/tests/step1_build_images.sh
--- a/AgentQnA/tests/step2_start_retrieval_tool.sh
+++ b/AgentQnA/tests/step2_start_retrieval_tool.sh
--- a/AgentQnA/tests/step3_ingest_data_and_validate_retrieval.sh
+++ b/AgentQnA/tests/step3_ingest_data_and_validate_retrieval.sh
--- a/AgentQnA/tests/step4_launch_and_validate_agent_openai.sh
+++ b/AgentQnA/tests/step4_launch_and_validate_agent_openai.sh
--- a/AgentQnA/tests/step4_launch_and_validate_agent_tgi.sh
+++ b/AgentQnA/tests/step4_launch_and_validate_agent_tgi.sh
@@ -17,6 +17,12 @@ if [ ! -d "$HF_CACHE_DIR" ]; then
 fi
 ls $HF_CACHE_DIR

+function start_tgi(){
+    echo "Starting tgi-gaudi server"
+    cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
+    bash launch_tgi_gaudi.sh
+
+}

 function start_agent_and_api_server() {
    echo "Starting CRAG server"
@@ -25,6 +31,7 @@ function start_agent_and_api_server() {
    echo "Starting Agent services"
    cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
    bash launch_agent_service_tgi_gaudi.sh
+    sleep 10
 }

 function validate() {
@@ -43,18 +50,22 @@ function validate() {

 function validate_agent_service() {
    echo "----------------Test agent ----------------"
-    local CONTENT=$(http_proxy="" curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "query": "Tell me about Michael Jackson song thriller"
-    }')
-    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
-    docker logs docgrader-agent-endpoint
+    # local CONTENT=$(http_proxy="" curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+    #  "query": "Tell me about Michael Jackson song thriller"
+    # }')
+    export agent_port="9095"
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py)
+    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint")
+    docker logs rag-agent-endpoint
    if [ "$EXIT_CODE" == "1" ]; then
        exit 1
    fi

-    local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "query": "Tell me about Michael Jackson song thriller"
-    }')
+    # local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+    #  "query": "Tell me about Michael Jackson song thriller"
+    # }')
+    export agent_port="9090"
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py)
    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
    docker logs react-agent-endpoint
    if [ "$EXIT_CODE" == "1" ]; then
@@ -64,6 +75,10 @@ function validate_agent_service() {
 }

 function main() {
+    echo "==================== Start TGI ===================="
+    start_tgi
+    echo "==================== TGI started ===================="
+
    echo "==================== Start agent ===================="
    start_agent_and_api_server
    echo "==================== Agent started ===================="
--- a/AgentQnA/tests/test.py
+++ b/AgentQnA/tests/test.py
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+import requests
+
+
+def generate_answer_agent_api(url, prompt):
+    proxies = {"http": ""}
+    payload = {
+        "query": prompt,
+    }
+    response = requests.post(url, json=payload, proxies=proxies)
+    answer = response.json()["text"]
+    return answer
+
+
+if __name__ == "__main__":
+    ip_address = os.getenv("ip_address", "localhost")
+    agent_port = os.getenv("agent_port", "9095")
+    url = f"http://{ip_address}:{agent_port}/v1/chat/completions"
+    prompt = "Tell me about Michael Jackson song thriller"
+    answer = generate_answer_agent_api(url, prompt)
+    print(answer)
--- a/AgentQnA/tests/test_compose_on_gaudi.sh
+++ b/AgentQnA/tests/test_compose_on_gaudi.sh
@@ -19,7 +19,6 @@ function stop_crag() {

 function stop_agent_docker() {
    cd $WORKPATH/docker_compose/intel/hpu/gaudi/
-    # docker compose -f compose.yaml down
    container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
    for container_name in $container_list; do
        cid=$(docker ps -aq --filter "name=$container_name")
@@ -28,11 +27,21 @@ function stop_agent_docker() {
    done
 }

+function stop_tgi(){
+    cd $WORKPATH/docker_compose/intel/hpu/gaudi/
+    container_list=$(cat tgi_gaudi.yaml | grep container_name | cut -d':' -f2)
+    for container_name in $container_list; do
+        cid=$(docker ps -aq --filter "name=$container_name")
+        echo "Stopping container $container_name"
+        if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
+    done
+
+}
+
 function stop_retrieval_tool() {
    echo "Stopping Retrieval tool"
    local RETRIEVAL_TOOL_PATH=$WORKPATH/../DocIndexRetriever
    cd $RETRIEVAL_TOOL_PATH/docker_compose/intel/cpu/xeon/
-    # docker compose -f compose.yaml down
    container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
    for container_name in $container_list; do
        cid=$(docker ps -aq --filter "name=$container_name")
@@ -43,25 +52,26 @@ function stop_retrieval_tool() {
 echo "workpath: $WORKPATH"
 echo "=================== Stop containers ===================="
 stop_crag
+stop_tgi
 stop_agent_docker
 stop_retrieval_tool

 cd $WORKPATH/tests

 echo "=================== #1 Building docker images===================="
-bash 1_build_images.sh
+bash step1_build_images.sh
 echo "=================== #1 Building docker images completed===================="

 echo "=================== #2 Start retrieval tool===================="
-bash 2_start_retrieval_tool.sh
+bash step2_start_retrieval_tool.sh
 echo "=================== #2 Retrieval tool started===================="

 echo "=================== #3 Ingest data and validate retrieval===================="
-bash 3_ingest_data_and_validate_retrieval.sh
+bash step3_ingest_data_and_validate_retrieval.sh
 echo "=================== #3 Data ingestion and validation completed===================="

 echo "=================== #4 Start agent and API server===================="
-bash 4_launch_and_validate_agent_tgi.sh
+bash step4_launch_and_validate_agent_tgi.sh
 echo "=================== #4 Agent test passed ===================="

 echo "=================== #5 Stop agent and API server===================="
@@ -70,4 +80,6 @@ stop_agent_docker
 stop_retrieval_tool
 echo "=================== #5 Agent and API server stopped===================="

+echo y | docker system prune
+
 echo "ALL DONE!"
--- a/AgentQnA/tools/supervisor_agent_tools.yaml
+++ b/AgentQnA/tools/supervisor_agent_tools.yaml
@@ -25,7 +25,7 @@ get_billboard_rank_date:
  args_schema:
    rank:
      type: int
-      description: song name
+      description: the rank of interest, for example 1 for top 1
    date:
      type: str
      description: date
--- a/AgentQnA/tools/worker_agent_tools.py
+++ b/AgentQnA/tools/worker_agent_tools.py
@@ -12,10 +12,11 @@ def search_knowledge_base(query: str) -> str:
    print(url)
    proxies = {"http": ""}
    payload = {
-        "text": query,
+        "messages": query,
    }
    response = requests.post(url, json=payload, proxies=proxies)
    print(response)
+    if "documents" in response.json():
        docs = response.json()["documents"]
        context = ""
        for i, doc in enumerate(docs):
@@ -23,5 +24,19 @@ def search_knowledge_base(query: str) -> str:
                context = doc
            else:
                context += "\n" + doc
-    print(context)
+        # print(context)
        return context
+    elif "text" in response.json():
+        return response.json()["text"]
+    elif "reranked_docs" in response.json():
+        docs = response.json()["reranked_docs"]
+        context = ""
+        for i, doc in enumerate(docs):
+            if i == 0:
+                context = doc["text"]
+            else:
+                context += "\n" + doc["text"]
+        # print(context)
+        return context
+    else:
+        return "Error parsing response from the knowledge base."
--- a/AudioQnA/Dockerfile
+++ b/AudioQnA/Dockerfile
@@ -18,7 +18,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git

 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt

 COPY ./audioqna.py /home/user/audioqna.py
--- a/AudioQnA/Dockerfile.multilang
+++ b/AudioQnA/Dockerfile.multilang
@@ -18,7 +18,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git

 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt

 COPY ./audioqna_multilang.py /home/user/audioqna_multilang.py
--- a/AudioQnA/benchmark/accuracy/README.md
+++ b/AudioQnA/benchmark/accuracy/README.md
@@ -36,9 +36,9 @@ Evaluate the performance with the LLM:

 ```py
 # validate the offline model
-# python offline_evaluate.py
+# python offline_eval.py
 # validate the online asr microservice accuracy
-python online_evaluate.py
+python online_eval.py
 ```

 ### Performance Result
--- a/AudioQnA/benchmark/accuracy/run_acc.sh
+++ b/AudioQnA/benchmark/accuracy/run_acc.sh
@@ -2,4 +2,4 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-python online_evaluate.py
+python online_eval.py
--- a/AudioQnA/benchmark/performance/README.md
+++ b/AudioQnA/benchmark/performance/README.md
@@ -0,0 +1,77 @@
+# AudioQnA Benchmarking
+
+This folder contains a collection of scripts to enable inference benchmarking by leveraging a comprehensive benchmarking tool, [GenAIEval](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/README.md), that enables throughput analysis to assess inference performance.
+
+By following this guide, you can run benchmarks on your deployment and share the results with the OPEA community.
+
+## Purpose
+
+We aim to run these benchmarks and share them with the OPEA community for three primary reasons:
+
+- To offer insights on inference throughput in real-world scenarios, helping you choose the best service or deployment for your needs.
+- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
+- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading llms, serving frameworks etc.
+
+## Metrics
+
+The benchmark will report the below metrics, including:
+
+- Number of Concurrent Requests
+- End-to-End Latency: P50, P90, P99 (in milliseconds)
+- End-to-End First Token Latency: P50, P90, P99 (in milliseconds)
+- Average Next Token Latency (in milliseconds)
+- Average Token Latency (in milliseconds)
+- Requests Per Second (RPS)
+- Output Tokens Per Second
+- Input Tokens Per Second
+
+Results will be displayed in the terminal and saved as CSV file named `1_stats.csv` for easy export to spreadsheets.
+
+## Getting Started
+
+We recommend using Kubernetes to deploy the AudioQnA service, as it offers benefits such as load balancing and improved scalability. However, you can also deploy the service using Docker if that better suits your needs.
+
+### Prerequisites
+
+- Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md).
+
+- Every node has direct internet access
+- Set up kubectl on the master node with access to the Kubernetes cluster.
+- Install Python 3.8+ on the master node for running GenAIEval.
+- Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods.
+- Ensure that the container's ulimit can meet the the number of requests.
+
+```bash
+# The way to modify the containered ulimit:
+sudo systemctl edit containerd
+# Add two lines:
+[Service]
+LimitNOFILE=65536:1048576
+
+sudo systemctl daemon-reload; sudo systemctl restart containerd
+```
+
+## Test Steps
+
+Please deploy AudioQnA service before benchmarking.
+
+### Run Benchmark Test
+
+Before the benchmark, we can configure the number of test queries and test output directory by:
+
+```bash
+export USER_QUERIES="[128, 128, 128, 128]"
+export TEST_OUTPUT_DIR="/tmp/benchmark_output"
+```
+
+And then run the benchmark by:
+
+```bash
+bash benchmark.sh -n <node_count>
+```
+
+The argument `-n` refers to the number of test nodes.
+
+### Data collection
+
+All the test results will come to this folder `/tmp/benchmark_output` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
--- a/AudioQnA/benchmark/performance/benchmark.sh
+++ b/AudioQnA/benchmark/performance/benchmark.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+deployment_type="k8s"
+node_number=1
+service_port=8888
+query_per_node=128
+
+benchmark_tool_path="$(pwd)/GenAIEval"
+
+usage() {
+    echo "Usage: $0 [-d deployment_type] [-n node_number] [-i service_ip] [-p service_port]"
+    echo "  -d deployment_type    AudioQnA deployment type, select between k8s and docker (default: k8s)"
+    echo "  -n node_number        Test node number, required only for k8s deployment_type, (default: 1)"
+    echo "  -i service_ip         AudioQnA service ip, required only for docker deployment_type"
+    echo "  -p service_port       AudioQnA service port, required only for docker deployment_type, (default: 8888)"
+    exit 1
+}
+
+while getopts ":d:n:i:p:" opt; do
+    case ${opt} in
+        d )
+            deployment_type=$OPTARG
+            ;;
+        n )
+            node_number=$OPTARG
+            ;;
+        i )
+            service_ip=$OPTARG
+            ;;
+        p )
+            service_port=$OPTARG
+            ;;
+        \? )
+            echo "Invalid option: -$OPTARG" 1>&2
+            usage
+            ;;
+        : )
+            echo "Invalid option: -$OPTARG requires an argument" 1>&2
+            usage
+            ;;
+    esac
+done
+
+if [[ "$deployment_type" == "docker" && -z "$service_ip" ]]; then
+    echo "Error: service_ip is required for docker deployment_type" 1>&2
+    usage
+fi
+
+if [[ "$deployment_type" == "k8s" && ( -n "$service_ip" || -n "$service_port" ) ]]; then
+    echo "Warning: service_ip and service_port are ignored for k8s deployment_type" 1>&2
+fi
+
+function main() {
+    if [[ ! -d ${benchmark_tool_path} ]]; then
+        echo "Benchmark tool not found, setting up..."
+        setup_env
+    fi
+    run_benchmark
+}
+
+function setup_env() {
+    git clone https://github.com/opea-project/GenAIEval.git
+    pushd ${benchmark_tool_path}
+    python3 -m venv stress_venv
+    source stress_venv/bin/activate
+    pip install -r requirements.txt
+    popd
+}
+
+function run_benchmark() {
+    source ${benchmark_tool_path}/stress_venv/bin/activate
+    export DEPLOYMENT_TYPE=${deployment_type}
+    export SERVICE_IP=${service_ip:-"None"}
+    export SERVICE_PORT=${service_port:-"None"}
+    if [[ -z $USER_QUERIES ]]; then
+        user_query=$((query_per_node*node_number))
+        export USER_QUERIES="[${user_query}, ${user_query}, ${user_query}, ${user_query}]"
+        echo "USER_QUERIES not configured, setting to: ${USER_QUERIES}."
+    fi
+    export WARMUP=$(echo $USER_QUERIES | sed -e 's/[][]//g' -e 's/,.*//')
+    if [[ -z $WARMUP ]]; then export WARMUP=0; fi
+    if [[ -z $TEST_OUTPUT_DIR ]]; then
+        if [[ $DEPLOYMENT_TYPE == "k8s" ]]; then
+            export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/node_${node_number}"
+        else
+            export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/docker"
+        fi
+        echo "TEST_OUTPUT_DIR not configured, setting to: ${TEST_OUTPUT_DIR}."
+    fi
+
+    envsubst < ./benchmark.yaml > ${benchmark_tool_path}/evals/benchmark/benchmark.yaml
+    cd ${benchmark_tool_path}/evals/benchmark
+    python benchmark.py
+}
+
+main
--- a/AudioQnA/benchmark/performance/benchmark.yaml
+++ b/AudioQnA/benchmark/performance/benchmark.yaml
@@ -0,0 +1,52 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+test_suite_config: # Overall configuration settings for the test suite
+  examples: ["audioqna"]  # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
+  deployment_type: "k8s"  # Default is "k8s", can also be "docker"
+  service_ip: None  # Leave as None for k8s, specify for Docker
+  service_port: None  # Leave as None for k8s, specify for Docker
+  warm_ups: 0  # Number of test requests for warm-up
+  run_time: 60m  # The max total run time for the test suite
+  seed:  # The seed for all RNGs
+  user_queries: [1, 2, 4, 8, 16, 32, 64, 128]  # Number of test requests at each concurrency level
+  query_timeout: 120  # Number of seconds to wait for a simulated user to complete any executing task before exiting. 120 sec by defeult.
+  random_prompt: false  # Use random prompts if true, fixed prompts if false
+  collect_service_metric: false  # Collect service metrics if true, do not collect service metrics if false
+  data_visualization: false # Generate data visualization if true, do not generate data visualization if false
+  llm_model: "Intel/neural-chat-7b-v3-3"  # The LLM model used for the test
+  test_output_dir: "/tmp/benchmark_output"  # The directory to store the test output
+  load_shape:              # Tenant concurrency pattern
+    name: constant           # poisson or constant(locust default load shape)
+    params:                  # Loadshape-specific parameters
+      constant:                # Poisson load shape specific parameters, activate only if load_shape is poisson
+        concurrent_level: 4      # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
+      poisson:                 # Poisson load shape specific parameters, activate only if load_shape is poisson
+        arrival-rate: 1.0        # Request arrival rate
+  namespace: "" # Fill the user-defined namespace. Otherwise, it will be default.
+
+test_cases:
+  audioqna:
+    asr:
+      run_test: true
+      service_name: "asr-svc"  # Replace with your service name
+    llm:
+      run_test: true
+      service_name: "llm-svc"  # Replace with your service name
+      parameters:
+        model_name: "Intel/neural-chat-7b-v3-3"
+        max_new_tokens: 128
+        temperature: 0.01
+        top_k: 10
+        top_p: 0.95
+        repetition_penalty: 1.03
+        streaming: true
+    llmserve:
+      run_test: true
+      service_name: "llm-svc"  # Replace with your service name
+    tts:
+      run_test: true
+      service_name: "tts-svc"  # Replace with your service name
+    e2e:
+      run_test: true
+      service_name: "audioqna-backend-server-svc"  # Replace with your service name
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -41,7 +41,7 @@ services:
    environment:
      TTS_ENDPOINT: ${TTS_ENDPOINT}
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi-service
    ports:
      - "3006:80"
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
@@ -26,7 +26,7 @@ services:
      https_proxy: ${https_proxy}
    restart: unless-stopped
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
    container_name: tgi-service
    ports:
      - "3006:80"
--- a/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+pushd "../../../../../" > /dev/null
+source .set_env.sh
+popd > /dev/null
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -51,7 +51,7 @@ services:
    environment:
      TTS_ENDPOINT: ${TTS_ENDPOINT}
  tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
    container_name: tgi-gaudi-server
    ports:
      - "3006:80"
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+pushd "../../../../../" > /dev/null
+source .set_env.sh
+popd > /dev/null
--- a/AudioQnA/kubernetes/intel/README.md
+++ b/AudioQnA/kubernetes/intel/README.md
@@ -7,14 +7,14 @@

 ## Deploy On Xeon
 ```
-cd GenAIExamples/AudioQnA/kubernetes/intel/cpu/xeon/manifests
+cd GenAIExamples/AudioQnA/kubernetes/intel/cpu/xeon/manifest
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
 kubectl apply -f audioqna.yaml
 ```
 ## Deploy On Gaudi
 ```
-cd GenAIExamples/AudioQnA/kubernetes/intel/hpu/gaudi/manifests
+cd GenAIExamples/AudioQnA/kubernetes/intel/hpu/gaudi/manifest
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
 kubectl apply -f audioqna.yaml
--- a/AudioQnA/kubernetes/intel/README_gmc.md
+++ b/AudioQnA/kubernetes/intel/README_gmc.md
@@ -25,7 +25,7 @@ The AudioQnA uses the below prebuilt images if you choose a Xeon deployment
 Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
 For Gaudi:

- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.5
+- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.6
 - whisper-gaudi: opea/whisper-gaudi:latest
 - speecht5-gaudi: opea/speecht5-gaudi:latest

--- a/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
+++ b/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
@@ -247,7 +247,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+        image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
        name: llm-dependency-deploy-demo
        securityContext:
          capabilities:
--- a/AudioQnA/kubernetes/intel/hpu/gaudi/manifest/audioqna.yaml
+++ b/AudioQnA/kubernetes/intel/hpu/gaudi/manifest/audioqna.yaml
@@ -271,7 +271,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
        name: llm-dependency-deploy-demo
        securityContext:
          capabilities:
--- a/AudioQnA/tests/test_compose_on_gaudi.sh
+++ b/AudioQnA/tests/test_compose_on_gaudi.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
    service_list="audioqna whisper-gaudi asr llm-tgi speecht5-gaudi tts"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
    docker images && sleep 1s
 }

@@ -100,7 +100,7 @@ function validate_megaservice() {
 #
 #    sed -i "s/localhost/$ip_address/g" playwright.config.ts
 #
-##    conda install -c conda-forge nodejs -y
+##    conda install -c conda-forge nodejs=22.6.0 -y
 #    npm install && npm ci && npx playwright install --with-deps
 #    node -v && npm -v && pip list
 #
--- a/AudioQnA/tests/test_compose_on_xeon.sh
+++ b/AudioQnA/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
    service_list="audioqna whisper asr llm-tgi speecht5 tts"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
    docker images && sleep 1s
 }

@@ -90,7 +90,7 @@ function validate_megaservice() {
 #
 #    sed -i "s/localhost/$ip_address/g" playwright.config.ts
 #
-##    conda install -c conda-forge nodejs -y
+##    conda install -c conda-forge nodejs=22.6.0 -y
 #    npm install && npm ci && npx playwright install --with-deps
 #    node -v && npm -v && pip list
 #
--- a/AvatarChatbot/.gitignore
+++ b/AvatarChatbot/.gitignore
@@ -0,0 +1,8 @@
+*.safetensors
+*.bin
+*.model
+*.log
+docker_compose/intel/cpu/xeon/data
+docker_compose/intel/hpu/gaudi/data
+inputs/
+outputs/
--- a/AvatarChatbot/Dockerfile
+++ b/AvatarChatbot/Dockerfile
@@ -0,0 +1,33 @@
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    vim \
+    git
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+WORKDIR /home/user/
+RUN git clone https://github.com/opea-project/GenAIComps.git
+WORKDIR /home/user/GenAIComps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
+
+COPY ./avatarchatbot.py /home/user/avatarchatbot.py
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
+
+USER user
+
+WORKDIR /home/user
+
+ENTRYPOINT ["python", "avatarchatbot.py"]
--- a/AvatarChatbot/README.md
+++ b/AvatarChatbot/README.md
@@ -0,0 +1,105 @@
+# AvatarChatbot Application
+
+The AvatarChatbot service can be effortlessly deployed on either Intel Gaudi2 or Intel XEON Scalable Processors.
+
+## AI Avatar Workflow
+
+The AI Avatar example is implemented using both megaservices and the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different megaservices and microservices for this example.
+
+```mermaid
+---
+config:
+  flowchart:
+    nodeSpacing: 100
+    rankSpacing: 100
+    curve: linear
+  themeVariables:
+    fontSize: 42px
+---
+flowchart LR
+    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef thistle fill:#D8BFD8,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef invisible fill:transparent,stroke:transparent;
+    style AvatarChatbot-Megaservice stroke:#000000
+
+    subgraph AvatarChatbot-Megaservice["AvatarChatbot Megaservice"]
+        direction LR
+        ASR([ASR Microservice]):::blue
+        LLM([LLM Microservice]):::blue
+        TTS([TTS Microservice]):::blue
+        animation([Animation Microservice]):::blue
+    end
+    subgraph UserInterface["User Interface"]
+        direction LR
+        invis1[ ]:::invisible
+        USER1([User Audio Query]):::orchid
+        USER2([User Image/Video Query]):::orchid
+        UI([UI server<br>]):::orchid
+    end
+    GW([AvatarChatbot GateWay<br>]):::orange
+    subgraph .
+        direction LR
+        X([OPEA Microservice]):::blue
+        Y{{Open Source Service}}:::thistle
+        Z([OPEA Gateway]):::orange
+        Z1([UI]):::orchid
+    end
+
+    WHISPER{{Whisper service}}:::thistle
+    TGI{{LLM service}}:::thistle
+    T5{{Speecht5 service}}:::thistle
+    WAV2LIP{{Wav2Lip service}}:::thistle
+
+    %% Connections %%
+    direction LR
+    USER1 -->|1| UI
+    UI -->|2| GW
+    GW <==>|3| AvatarChatbot-Megaservice
+    ASR ==>|4| LLM ==>|5| TTS ==>|6| animation
+
+    direction TB
+    ASR <-.->|3'| WHISPER
+    LLM <-.->|4'| TGI
+    TTS <-.->|5'| T5
+    animation <-.->|6'| WAV2LIP
+
+    USER2 -->|1| UI
+    UI <-.->|6'| WAV2LIP
+```
+
+## Deploy AvatarChatbot Service
+
+The AvatarChatbot service can be deployed on either Intel Gaudi2 AI Accelerator or Intel Xeon Scalable Processor.
+
+### Deploy AvatarChatbot on Gaudi
+
+Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) for instructions on deploying AvatarChatbot on Gaudi, and on setting up an UI for the application.
+
+### Deploy AvatarChatbot on Xeon
+
+Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for instructions on deploying AvatarChatbot on Xeon.
+
+## Supported Models
+
+### ASR
+
+The default model is [openai/whisper-small](https://huggingface.co/openai/whisper-small). It also supports all models in the Whisper family, such as `openai/whisper-large-v3`, `openai/whisper-medium`, `openai/whisper-base`, `openai/whisper-tiny`, etc.
+
+To replace the model, please edit the `compose.yaml` and add the `command` line to pass the name of the model you want to use:
+
+```yaml
+services:
+  whisper-service:
+    ...
+    command: --model_name_or_path openai/whisper-tiny
+```
+
+### TTS
+
+The default model is [microsoft/SpeechT5](https://huggingface.co/microsoft/speecht5_tts). We currently do not support replacing the model. More models under the commercial license will be added in the future.
+
+### Animation
+
+The default model is [Rudrabha/Wav2Lip](https://github.com/Rudrabha/Wav2Lip) and [TencentARC/GFPGAN](https://github.com/TencentARC/GFPGAN). We currently do not support replacing the model. More models under the commercial license such as [OpenTalker/SadTalker](https://github.com/OpenTalker/SadTalker) will be added in the future.
--- a/AvatarChatbot/assets/audio/eg3_ref.wav
+++ b/AvatarChatbot/assets/audio/eg3_ref.wav
--- a/AvatarChatbot/assets/audio/sample_minecraft.json
+++ b/AvatarChatbot/assets/audio/sample_minecraft.json
--- a/AvatarChatbot/assets/audio/sample_question.json
+++ b/AvatarChatbot/assets/audio/sample_question.json
--- a/AvatarChatbot/assets/audio/sample_whoareyou.json
+++ b/AvatarChatbot/assets/audio/sample_whoareyou.json
--- a/AvatarChatbot/assets/img/UI.png
+++ b/AvatarChatbot/assets/img/UI.png
--- a/AvatarChatbot/assets/img/avatar1.jpg
+++ b/AvatarChatbot/assets/img/avatar1.jpg
--- a/AvatarChatbot/assets/img/avatar2.jpg
+++ b/AvatarChatbot/assets/img/avatar2.jpg
--- a/AvatarChatbot/assets/img/avatar3.png
+++ b/AvatarChatbot/assets/img/avatar3.png
--- a/AvatarChatbot/assets/img/avatar4.png
+++ b/AvatarChatbot/assets/img/avatar4.png
--- a/AvatarChatbot/assets/img/avatar5.png
+++ b/AvatarChatbot/assets/img/avatar5.png
--- a/AvatarChatbot/assets/img/avatar6.png
+++ b/AvatarChatbot/assets/img/avatar6.png
--- a/AvatarChatbot/assets/img/design.png
+++ b/AvatarChatbot/assets/img/design.png
--- a/AvatarChatbot/assets/img/flowchart.png
+++ b/AvatarChatbot/assets/img/flowchart.png
--- a/AvatarChatbot/assets/img/gaudi.png
+++ b/AvatarChatbot/assets/img/gaudi.png
--- a/AvatarChatbot/assets/img/opea_gh_qr.png
+++ b/AvatarChatbot/assets/img/opea_gh_qr.png
--- a/AvatarChatbot/assets/img/opea_qr.png
+++ b/AvatarChatbot/assets/img/opea_qr.png
--- a/AvatarChatbot/assets/img/xeon.jpg
+++ b/AvatarChatbot/assets/img/xeon.jpg
--- a/AvatarChatbot/assets/outputs/result_max_tokens_1024.mp4
+++ b/AvatarChatbot/assets/outputs/result_max_tokens_1024.mp4
--- a/AvatarChatbot/assets/outputs/result_max_tokens_64.mp4
+++ b/AvatarChatbot/assets/outputs/result_max_tokens_64.mp4
--- a/AvatarChatbot/avatarchatbot.py
+++ b/AvatarChatbot/avatarchatbot.py
@@ -0,0 +1,93 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import asyncio
+import os
+import sys
+
+from comps import AvatarChatbotGateway, MicroService, ServiceOrchestrator, ServiceType
+
+MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
+MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
+ASR_SERVICE_HOST_IP = os.getenv("ASR_SERVICE_HOST_IP", "0.0.0.0")
+ASR_SERVICE_PORT = int(os.getenv("ASR_SERVICE_PORT", 9099))
+LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
+LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
+TTS_SERVICE_HOST_IP = os.getenv("TTS_SERVICE_HOST_IP", "0.0.0.0")
+TTS_SERVICE_PORT = int(os.getenv("TTS_SERVICE_PORT", 9088))
+ANIMATION_SERVICE_HOST_IP = os.getenv("ANIMATION_SERVICE_HOST_IP", "0.0.0.0")
+ANIMATION_SERVICE_PORT = int(os.getenv("ANIMATION_SERVICE_PORT", 9066))
+
+
+def check_env_vars(env_var_list):
+    for var in env_var_list:
+        if os.getenv(var) is None:
+            print(f"Error: The environment variable '{var}' is not set.")
+            sys.exit(1)  # Exit the program with a non-zero status code
+    print("All environment variables are set.")
+
+
+class AvatarChatbotService:
+    def __init__(self, host="0.0.0.0", port=8000):
+        self.host = host
+        self.port = port
+        self.megaservice = ServiceOrchestrator()
+
+    def add_remote_service(self):
+        asr = MicroService(
+            name="asr",
+            host=ASR_SERVICE_HOST_IP,
+            port=ASR_SERVICE_PORT,
+            endpoint="/v1/audio/transcriptions",
+            use_remote_service=True,
+            service_type=ServiceType.ASR,
+        )
+        llm = MicroService(
+            name="llm",
+            host=LLM_SERVICE_HOST_IP,
+            port=LLM_SERVICE_PORT,
+            endpoint="/v1/chat/completions",
+            use_remote_service=True,
+            service_type=ServiceType.LLM,
+        )
+        tts = MicroService(
+            name="tts",
+            host=TTS_SERVICE_HOST_IP,
+            port=TTS_SERVICE_PORT,
+            endpoint="/v1/audio/speech",
+            use_remote_service=True,
+            service_type=ServiceType.TTS,
+        )
+        animation = MicroService(
+            name="animation",
+            host=ANIMATION_SERVICE_HOST_IP,
+            port=ANIMATION_SERVICE_PORT,
+            endpoint="/v1/animation",
+            use_remote_service=True,
+            service_type=ServiceType.ANIMATION,
+        )
+        self.megaservice.add(asr).add(llm).add(tts).add(animation)
+        self.megaservice.flow_to(asr, llm)
+        self.megaservice.flow_to(llm, tts)
+        self.megaservice.flow_to(tts, animation)
+        self.gateway = AvatarChatbotGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
+
+
+if __name__ == "__main__":
+    check_env_vars(
+        [
+            "MEGA_SERVICE_HOST_IP",
+            "MEGA_SERVICE_PORT",
+            "ASR_SERVICE_HOST_IP",
+            "ASR_SERVICE_PORT",
+            "LLM_SERVICE_HOST_IP",
+            "LLM_SERVICE_PORT",
+            "TTS_SERVICE_HOST_IP",
+            "TTS_SERVICE_PORT",
+            "ANIMATION_SERVICE_HOST_IP",
+            "ANIMATION_SERVICE_PORT",
+        ]
+    )
+
+    avatarchatbot = AvatarChatbotService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
+    avatarchatbot.add_remote_service()
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
@@ -0,0 +1,210 @@
+# Build Mega Service of AvatarChatbot on Xeon
+
+This document outlines the deployment process for a AvatarChatbot application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server.
+
+## 🚀 Build Docker images
+
+### 1. Source Code install GenAIComps
+
+```bash
+git clone https://github.com/opea-project/GenAIComps.git
+cd GenAIComps
+```
+
+### 2. Build ASR Image
+
+```bash
+docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
+
+
+docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
+```
+
+### 3. Build LLM Image
+
+```bash
+docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
+```
+
+### 4. Build TTS Image
+
+```bash
+docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile .
+
+docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
+```
+
+### 5. Build Animation Image
+
+```bash
+docker build -t opea/wav2lip:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/dependency/Dockerfile .
+
+docker build -t opea/animation:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/Dockerfile .
+```
+
+### 6. Build MegaService Docker Image
+
+To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
+
+```bash
+git clone https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples/AvatarChatbot/
+docker build --no-cache -t opea/avatarchatbot:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+```
+
+Then run the command `docker images`, you will have following images ready:
+
+1. `opea/whisper:latest`
+2. `opea/asr:latest`
+3. `opea/llm-tgi:latest`
+4. `opea/speecht5:latest`
+5. `opea/tts:latest`
+6. `opea/wav2lip:latest`
+7. `opea/animation:latest`
+8. `opea/avatarchatbot:latest`
+
+## 🚀 Set the environment variables
+
+Before starting the services with `docker compose`, you have to recheck the following environment variables.
+
+```bash
+export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
+export host_ip=$(hostname -I | awk '{print $1}')
+
+export TGI_LLM_ENDPOINT=http://$host_ip:3006
+export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
+
+export ASR_ENDPOINT=http://$host_ip:7066
+export TTS_ENDPOINT=http://$host_ip:7055
+export WAV2LIP_ENDPOINT=http://$host_ip:7860
+
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export ASR_SERVICE_HOST_IP=${host_ip}
+export TTS_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP=${host_ip}
+export ANIMATION_SERVICE_HOST_IP=${host_ip}
+
+export MEGA_SERVICE_PORT=8888
+export ASR_SERVICE_PORT=3001
+export TTS_SERVICE_PORT=3002
+export LLM_SERVICE_PORT=3007
+export ANIMATION_SERVICE_PORT=3008
+```
+
+- Xeon CPU
+
+```bash
+export DEVICE="cpu"
+export WAV2LIP_PORT=7860
+export INFERENCE_MODE='wav2lip_only'
+export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
+export FACE="assets/img/avatar1.jpg"
+# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
+export AUDIO='None'
+export FACESIZE=96
+export OUTFILE="/outputs/result.mp4"
+export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
+export UPSCALE_FACTOR=1
+export FPS=10
+```
+
+## 🚀 Start the MegaService
+
+```bash
+cd GenAIExamples/AvatarChatbot/docker_compose/intel/cpu/xeon/
+docker compose -f compose.yaml up -d
+```
+
+## 🚀 Test MicroServices
+
+```bash
+# whisper service
+curl http://${host_ip}:7066/v1/asr \
+  -X POST \
+  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
+  -H 'Content-Type: application/json'
+
+# asr microservice
+curl http://${host_ip}:3001/v1/audio/transcriptions \
+  -X POST \
+  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
+  -H 'Content-Type: application/json'
+
+# tgi service
+curl http://${host_ip}:3006/generate \
+  -X POST \
+  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+  -H 'Content-Type: application/json'
+
+# llm microservice
+curl http://${host_ip}:3007/v1/chat/completions\
+  -X POST \
+  -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
+  -H 'Content-Type: application/json'
+
+# speecht5 service
+curl http://${host_ip}:7055/v1/tts \
+  -X POST \
+  -d '{"text": "Who are you?"}' \
+  -H 'Content-Type: application/json'
+
+# tts microservice
+curl http://${host_ip}:3002/v1/audio/speech \
+  -X POST \
+  -d '{"text": "Who are you?"}' \
+  -H 'Content-Type: application/json'
+
+# wav2lip service
+cd ../../../..
+curl http://${host_ip}:7860/v1/wav2lip \
+  -X POST \
+  -d @assets/audio/sample_minecraft.json \
+  -H 'Content-Type: application/json'
+
+# animation microservice
+curl http://${host_ip}:3008/v1/animation \
+  -X POST \
+  -d @assets/audio/sample_question.json \
+  -H "Content-Type: application/json"
+
+```
+
+## 🚀 Test MegaService
+
+```bash
+curl http://${host_ip}:3009/v1/avatarchatbot \
+  -X POST \
+  -d @assets/audio/sample_whoareyou.json \
+  -H 'Content-Type: application/json'
+```
+
+If the megaservice is running properly, you should see the following output:
+
+```bash
+"/outputs/result.mp4"
+```
+
+The output file will be saved in the current working directory, as `${PWD}` is mapped to `/outputs` inside the wav2lip-service Docker container.
+
+## Gradio UI
+
+```bash
+cd $WORKPATH/GenAIExamples/AvatarChatbot
+python3 ui/gradio/app_gradio_demo_avatarchatbot.py
+```
+
+The UI can be viewed at http://${host_ip}:7861  
+<img src="../../../../assets/img/UI.png" alt="UI Example" width="60%">  
+In the current version v1.0, you need to set the avatar figure image/video and the DL model choice in the environment variables before starting AvatarChatbot backend service and running the UI. Please just customize the audio question in the UI.  
+\*\* We will enable change of avatar figure between runs in v2.0
+
+## Troubleshooting
+
+```bash
+cd GenAIExamples/AvatarChatbot/tests
+export IMAGE_REPO="opea"
+export IMAGE_TAG="latest"
+export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
+
+test_avatarchatbot_on_xeon.sh
+```
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
@@ -0,0 +1,138 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  whisper-service:
+    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
+    container_name: whisper-service
+    ports:
+      - "7066:7066"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    restart: unless-stopped
+  asr:
+    image: ${REGISTRY:-opea}/asr:${TAG:-latest}
+    container_name: asr-service
+    ports:
+      - "3001:9099"
+    ipc: host
+    environment:
+      ASR_ENDPOINT: ${ASR_ENDPOINT}
+  speecht5-service:
+    image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
+    container_name: speecht5-service
+    ports:
+      - "7055:7055"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    restart: unless-stopped
+  tts:
+    image: ${REGISTRY:-opea}/tts:${TAG:-latest}
+    container_name: tts-service
+    ports:
+      - "3002:9088"
+    ipc: host
+    environment:
+      TTS_ENDPOINT: ${TTS_ENDPOINT}
+  tgi-service:
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    container_name: tgi-service
+    ports:
+      - "3006:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
+  llm:
+    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
+    container_name: llm-tgi-server
+    depends_on:
+      - tgi-service
+    ports:
+      - "3007:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    restart: unless-stopped
+  wav2lip-service:
+    image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
+    container_name: wav2lip-service
+    ports:
+      - "7860:7860"
+    ipc: host
+    volumes:
+      - ${PWD}:/outputs
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      DEVICE: ${DEVICE}
+      INFERENCE_MODE: ${INFERENCE_MODE}
+      CHECKPOINT_PATH: ${CHECKPOINT_PATH}
+      FACE: ${FACE}
+      AUDIO: ${AUDIO}
+      FACESIZE: ${FACESIZE}
+      OUTFILE: ${OUTFILE}
+      GFPGAN_MODEL_VERSION: ${GFPGAN_MODEL_VERSION}
+      UPSCALE_FACTOR: ${UPSCALE_FACTOR}
+      FPS: ${FPS}
+      WAV2LIP_PORT: ${WAV2LIP_PORT}
+    restart: unless-stopped
+  animation:
+    image: ${REGISTRY:-opea}/animation:${TAG:-latest}
+    container_name: animation-server
+    ports:
+      - "3008:9066"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      WAV2LIP_ENDPOINT: ${WAV2LIP_ENDPOINT}
+    restart: unless-stopped
+  avatarchatbot-xeon-backend-server:
+    image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
+    container_name: avatarchatbot-xeon-backend-server
+    depends_on:
+      - asr
+      - llm
+      - tts
+      - animation
+    ports:
+      - "3009:8888"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
+      - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
+      - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
+      - ASR_SERVICE_PORT=${ASR_SERVICE_PORT}
+      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
+      - TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP}
+      - TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
+      - ANIMATION_SERVICE_HOST_IP=${ANIMATION_SERVICE_HOST_IP}
+      - ANIMATION_SERVICE_PORT=${ANIMATION_SERVICE_PORT}
+    ipc: host
+    restart: always
+
+networks:
+  default:
+    driver: bridge
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/set_env.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+pushd "../../../../../" > /dev/null
+source .set_env.sh
+popd > /dev/null
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
@@ -0,0 +1,220 @@
+# Build Mega Service of AvatarChatbot on Gaudi
+
+This document outlines the deployment process for a AvatarChatbot application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server.
+
+## 🚀 Build Docker images
+
+### 1. Source Code install GenAIComps
+
+```bash
+git clone https://github.com/opea-project/GenAIComps.git
+cd GenAIComps
+```
+
+### 2. Build ASR Image
+
+```bash
+docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile.intel_hpu .
+
+
+docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
+```
+
+### 3. Build LLM Image
+
+```bash
+docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
+```
+
+### 4. Build TTS Image
+
+```bash
+docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile.intel_hpu .
+
+docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
+```
+
+### 5. Build Animation Image
+
+```bash
+docker build -t opea/wav2lip-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/dependency/Dockerfile.intel_hpu .
+
+docker build -t opea/animation:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/Dockerfile .
+```
+
+### 6. Build MegaService Docker Image
+
+To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
+
+```bash
+git clone https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples/AvatarChatbot/
+docker build --no-cache -t opea/avatarchatbot:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+```
+
+Then run the command `docker images`, you will have following images ready:
+
+1. `opea/whisper-gaudi:latest`
+2. `opea/asr:latest`
+3. `opea/llm-tgi:latest`
+4. `opea/speecht5-gaudi:latest`
+5. `opea/tts:latest`
+6. `opea/wav2lip-gaudi:latest`
+7. `opea/animation:latest`
+8. `opea/avatarchatbot:latest`
+
+## 🚀 Set the environment variables
+
+Before starting the services with `docker compose`, you have to recheck the following environment variables.
+
+```bash
+export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
+export host_ip=$(hostname -I | awk '{print $1}')
+
+export TGI_LLM_ENDPOINT=http://$host_ip:3006
+export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
+
+export ASR_ENDPOINT=http://$host_ip:7066
+export TTS_ENDPOINT=http://$host_ip:7055
+export WAV2LIP_ENDPOINT=http://$host_ip:7860
+
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export ASR_SERVICE_HOST_IP=${host_ip}
+export TTS_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP=${host_ip}
+export ANIMATION_SERVICE_HOST_IP=${host_ip}
+
+export MEGA_SERVICE_PORT=8888
+export ASR_SERVICE_PORT=3001
+export TTS_SERVICE_PORT=3002
+export LLM_SERVICE_PORT=3007
+export ANIMATION_SERVICE_PORT=3008
+```
+
+- Gaudi2 HPU
+
+```bash
+export DEVICE="hpu"
+export WAV2LIP_PORT=7860
+export INFERENCE_MODE='wav2lip_only'
+export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
+export FACE="assets/img/avatar1.jpg"
+# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
+export AUDIO='None'
+export FACESIZE=96
+export OUTFILE="/outputs/result.mp4"
+export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
+export UPSCALE_FACTOR=1
+export FPS=10
+```
+
+## 🚀 Start the MegaService
+
+```bash
+cd GenAIExamples/AvatarChatbot/docker_compose/intel/hpu/gaudi/
+docker compose -f compose.yaml up -d
+```
+
+## 🚀 Test MicroServices
+
+```bash
+# whisper service
+curl http://${host_ip}:7066/v1/asr \
+  -X POST \
+  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
+  -H 'Content-Type: application/json'
+
+# asr microservice
+curl http://${host_ip}:3001/v1/audio/transcriptions \
+  -X POST \
+  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
+  -H 'Content-Type: application/json'
+
+# tgi service
+curl http://${host_ip}:3006/generate \
+  -X POST \
+  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+  -H 'Content-Type: application/json'
+
+# llm microservice
+curl http://${host_ip}:3007/v1/chat/completions\
+  -X POST \
+  -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
+  -H 'Content-Type: application/json'
+
+# speecht5 service
+curl http://${host_ip}:7055/v1/tts \
+  -X POST \
+  -d '{"text": "Who are you?"}' \
+  -H 'Content-Type: application/json'
+
+# tts microservice
+curl http://${host_ip}:3002/v1/audio/speech \
+  -X POST \
+  -d '{"text": "Who are you?"}' \
+  -H 'Content-Type: application/json'
+
+# wav2lip service
+cd ../../../..
+curl http://${host_ip}:7860/v1/wav2lip \
+  -X POST \
+  -d @assets/audio/sample_minecraft.json \
+  -H 'Content-Type: application/json'
+
+# animation microservice
+curl http://${host_ip}:3008/v1/animation \
+  -X POST \
+  -d @assets/audio/sample_question.json \
+  -H "Content-Type: application/json"
+
+```
+
+## 🚀 Test MegaService
+
+```bash
+curl http://${host_ip}:3009/v1/avatarchatbot \
+  -X POST \
+  -d @assets/audio/sample_whoareyou.json \
+  -H 'Content-Type: application/json'
+```
+
+If the megaservice is running properly, you should see the following output:
+
+```bash
+"/outputs/result.mp4"
+```
+
+The output file will be saved in the current working directory, as `${PWD}` is mapped to `/outputs` inside the wav2lip-service Docker container.
+
+## Gradio UI
+
+```bash
+sudo apt update
+sudo apt install -y yasm pkg-config libx264-dev nasm
+cd $WORKPATH
+git clone https://github.com/FFmpeg/FFmpeg.git
+cd FFmpeg
+sudo ./configure --enable-gpl --enable-libx264 && sudo make -j$(nproc-1) && sudo make install && hash -r
+pip install gradio==4.38.1 soundfile
+```
+
+```bash
+cd $WORKPATH/GenAIExamples/AvatarChatbot
+python3 ui/gradio/app_gradio_demo_avatarchatbot.py
+```
+
+The UI can be viewed at http://${host_ip}:7861  
+<img src="../../../../assets/img/UI.png" alt="UI Example" width="60%">  
+In the current version v1.0, you need to set the avatar figure image/video and the DL model choice in the environment variables before starting AvatarChatbot backend service and running the UI. Please just customize the audio question in the UI.  
+\*\* We will enable change of avatar figure between runs in v2.0
+
+## Troubleshooting
+
+```bash
+cd GenAIExamples/AvatarChatbot/tests
+export IMAGE_REPO="opea"
+export IMAGE_TAG="latest"
+export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
+
+test_avatarchatbot_on_gaudi.sh
+```
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -0,0 +1,171 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  whisper-service:
+    image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
+    container_name: whisper-service
+    ports:
+      - "7066:7066"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    restart: unless-stopped
+  asr:
+    image: ${REGISTRY:-opea}/asr:${TAG:-latest}
+    container_name: asr-service
+    ports:
+      - "3001:9099"
+    ipc: host
+    environment:
+      ASR_ENDPOINT: ${ASR_ENDPOINT}
+  speecht5-service:
+    image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
+    container_name: speecht5-service
+    ports:
+      - "7055:7055"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    restart: unless-stopped
+  tts:
+    image: ${REGISTRY:-opea}/tts:${TAG:-latest}
+    container_name: tts-service
+    ports:
+      - "3002:9088"
+    ipc: host
+    environment:
+      TTS_ENDPOINT: ${TTS_ENDPOINT}
+  tgi-service:
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
+    container_name: tgi-gaudi-server
+    ports:
+      - "3006:80"
+    volumes:
+      - "./data:/data"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      ENABLE_HPU_GRAPH: true
+      LIMIT_HPU_GRAPH: true
+      USE_FLASH_ATTENTION: true
+      FLASH_ATTENTION_RECOMPUTE: true
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 128 --max-total-tokens 256
+  llm:
+    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
+    container_name: llm-tgi-gaudi-server
+    depends_on:
+      - tgi-service
+    ports:
+      - "3007:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    restart: unless-stopped
+  wav2lip-service:
+    image: ${REGISTRY:-opea}/wav2lip-gaudi:${TAG:-latest}
+    container_name: wav2lip-service
+    ports:
+      - "7860:7860"
+    ipc: host
+    volumes:
+      - ${PWD}:/outputs
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      DEVICE: ${DEVICE}
+      INFERENCE_MODE: ${INFERENCE_MODE}
+      CHECKPOINT_PATH: ${CHECKPOINT_PATH}
+      FACE: ${FACE}
+      AUDIO: ${AUDIO}
+      FACESIZE: ${FACESIZE}
+      OUTFILE: ${OUTFILE}
+      GFPGAN_MODEL_VERSION: ${GFPGAN_MODEL_VERSION}
+      UPSCALE_FACTOR: ${UPSCALE_FACTOR}
+      FPS: ${FPS}
+      WAV2LIP_PORT: ${WAV2LIP_PORT}
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    restart: unless-stopped
+  animation:
+    image: ${REGISTRY:-opea}/animation:${TAG:-latest}
+    container_name: animation-gaudi-server
+    ports:
+      - "3008:9066"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      WAV2LIP_ENDPOINT: ${WAV2LIP_ENDPOINT}
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    restart: unless-stopped
+  avatarchatbot-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
+    container_name: avatarchatbot-gaudi-backend-server
+    depends_on:
+      - asr
+      - llm
+      - tts
+      - animation
+    ports:
+      - "3009:8888"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
+      - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
+      - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
+      - ASR_SERVICE_PORT=${ASR_SERVICE_PORT}
+      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
+      - TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP}
+      - TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
+      - ANIMATION_SERVICE_HOST_IP=${ANIMATION_SERVICE_HOST_IP}
+      - ANIMATION_SERVICE_PORT=${ANIMATION_SERVICE_PORT}
+    ipc: host
+    restart: always
+
+networks:
+  default:
+    driver: bridge
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+pushd "../../../../../" > /dev/null
+source .set_env.sh
+popd > /dev/null
--- a/AvatarChatbot/docker_image_build/build.yaml
+++ b/AvatarChatbot/docker_image_build/build.yaml
@@ -0,0 +1,73 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  avatarchatbot:
+    build:
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+        no_proxy: ${no_proxy}
+      context: ../
+      dockerfile: ./Dockerfile
+    image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
+  whisper-gaudi:
+    build:
+      context: GenAIComps
+      dockerfile: comps/asr/whisper/dependency/Dockerfile.intel_hpu
+    extends: avatarchatbot
+    image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
+  whisper:
+    build:
+      context: GenAIComps
+      dockerfile: comps/asr/whisper/dependency/Dockerfile
+    extends: avatarchatbot
+    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
+  asr:
+    build:
+      context: GenAIComps
+      dockerfile: comps/asr/whisper/Dockerfile
+    extends: avatarchatbot
+    image: ${REGISTRY:-opea}/asr:${TAG:-latest}
+  llm-tgi:
+    build:
+      context: GenAIComps
+      dockerfile: comps/llms/text-generation/tgi/Dockerfile
+    extends: avatarchatbot
+    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
+  speecht5-gaudi:
+    build:
+      context: GenAIComps
+      dockerfile: comps/tts/speecht5/dependency/Dockerfile.intel_hpu
+    extends: avatarchatbot
+    image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
+  speecht5:
+    build:
+      context: GenAIComps
+      dockerfile: comps/tts/speecht5/dependency/Dockerfile
+    extends: avatarchatbot
+    image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
+  tts:
+    build:
+      context: GenAIComps
+      dockerfile: comps/tts/speecht5/Dockerfile
+    extends: avatarchatbot
+    image: ${REGISTRY:-opea}/tts:${TAG:-latest}
+  wav2lip-gaudi:
+    build:
+      context: GenAIComps
+      dockerfile: comps/animation/wav2lip/dependency/Dockerfile.intel_hpu
+    extends: avatarchatbot
+    image: ${REGISTRY:-opea}/wav2lip-gaudi:${TAG:-latest}
+  wav2lip:
+    build:
+      context: GenAIComps
+      dockerfile: comps/animation/wav2lip/dependency/Dockerfile
+    extends: avatarchatbot
+    image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
+  animation:
+    build:
+      context: GenAIComps
+      dockerfile: comps/animation/wav2lip/Dockerfile
+    extends: avatarchatbot
+    image: ${REGISTRY:-opea}/animation:${TAG:-latest}
--- a/AvatarChatbot/tests/test_compose_on_gaudi.sh
+++ b/AvatarChatbot/tests/test_compose_on_gaudi.sh
@@ -0,0 +1,141 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+IMAGE_REPO=${IMAGE_REPO:-"opea"}
+IMAGE_TAG=${IMAGE_TAG:-"latest"}
+echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
+echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
+export REGISTRY=${IMAGE_REPO}
+export TAG=${IMAGE_TAG}
+
+WORKPATH=$(dirname "$PWD")
+LOG_PATH="$WORKPATH/tests"
+if ls $LOG_PATH/*.log 1> /dev/null 2>&1; then
+    rm $LOG_PATH/*.log
+    echo "Log files removed."
+else
+    echo "No log files to remove."
+fi
+ip_address=$(hostname -I | awk '{print $1}')
+
+
+function build_docker_images() {
+    cd $WORKPATH/docker_image_build
+    git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
+
+    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
+    service_list="avatarchatbot whisper-gaudi asr llm-tgi speecht5-gaudi tts wav2lip-gaudi animation"
+    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
+
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
+
+    docker images && sleep 1s
+}
+
+
+function start_services() {
+    cd $WORKPATH/docker_compose/intel/hpu/gaudi
+
+    export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
+    export host_ip=$(hostname -I | awk '{print $1}')
+
+    export TGI_LLM_ENDPOINT=http://$host_ip:3006
+    export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
+
+    export ASR_ENDPOINT=http://$host_ip:7066
+    export TTS_ENDPOINT=http://$host_ip:7055
+    export WAV2LIP_ENDPOINT=http://$host_ip:7860
+
+    export MEGA_SERVICE_HOST_IP=${host_ip}
+    export ASR_SERVICE_HOST_IP=${host_ip}
+    export TTS_SERVICE_HOST_IP=${host_ip}
+    export LLM_SERVICE_HOST_IP=${host_ip}
+    export ANIMATION_SERVICE_HOST_IP=${host_ip}
+
+    export MEGA_SERVICE_PORT=8888
+    export ASR_SERVICE_PORT=3001
+    export TTS_SERVICE_PORT=3002
+    export LLM_SERVICE_PORT=3007
+    export ANIMATION_SERVICE_PORT=3008
+
+    export DEVICE="hpu"
+    export WAV2LIP_PORT=7860
+    export INFERENCE_MODE='wav2lip+gfpgan'
+    export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
+    export FACE="assets/img/avatar1.jpg"
+    # export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
+    export AUDIO='None'
+    export FACESIZE=96
+    export OUTFILE="/outputs/result.mp4"
+    export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
+    export UPSCALE_FACTOR=1
+    export FPS=10
+
+    # Start Docker Containers
+    docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
+
+    n=0
+    until [[ "$n" -ge 100 ]]; do
+       docker logs tgi-gaudi-server > $LOG_PATH/tgi_service_start.log
+       if grep -q Connected $LOG_PATH/tgi_service_start.log; then
+           break
+       fi
+       sleep 5s
+       n=$((n+1))
+    done
+
+    echo "All services are up and running"
+    sleep 5s
+}
+
+
+function validate_megaservice() {
+    cd $WORKPATH
+    result=$(http_proxy="" curl http://${ip_address}:3009/v1/avatarchatbot -X POST -d @assets/audio/sample_whoareyou.json -H 'Content-Type: application/json')
+    echo "result is === $result"
+    if [[ $result == *"mp4"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong, print docker logs."
+        docker logs whisper-service > $LOG_PATH/whisper-service.log
+        docker logs asr-service > $LOG_PATH/asr-service.log
+        docker logs speecht5-service > $LOG_PATH/speecht5-service.log
+        docker logs tts-service > $LOG_PATH/tts-service.log
+        docker logs tgi-gaudi-server > $LOG_PATH/tgi-gaudi-server.log
+        docker logs llm-tgi-gaudi-server > $LOG_PATH/llm-tgi-gaudi-server.log
+        docker logs wav2lip-service > $LOG_PATH/wav2lip-service.log
+        docker logs animation-gaudi-server > $LOG_PATH/animation-gaudi-server.log
+        echo "Exit test."
+        exit 1
+    fi
+
+}
+
+
+function stop_docker() {
+    cd $WORKPATH/docker_compose/intel/hpu/gaudi
+    docker compose down
+}
+
+
+function main() {
+    stop_docker
+    echo y | docker builder prune --all
+    echo y | docker image prune
+
+    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
+    start_services
+    # validate_microservices
+    validate_megaservice
+    # validate_frontend
+
+    stop_docker
+    echo y | docker builder prune --all
+    echo y | docker image prune
+
+}
+
+
+main
--- a/AvatarChatbot/tests/test_compose_on_xeon.sh
+++ b/AvatarChatbot/tests/test_compose_on_xeon.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+IMAGE_REPO=${IMAGE_REPO:-"opea"}
+IMAGE_TAG=${IMAGE_TAG:-"latest"}
+echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
+echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
+export REGISTRY=${IMAGE_REPO}
+export TAG=${IMAGE_TAG}
+
+WORKPATH=$(dirname "$PWD")
+LOG_PATH="$WORKPATH/tests"
+if ls $LOG_PATH/*.log 1> /dev/null 2>&1; then
+    rm $LOG_PATH/*.log
+    echo "Log files removed."
+else
+    echo "No log files to remove."
+fi
+ip_address=$(hostname -I | awk '{print $1}')
+
+
+function build_docker_images() {
+    cd $WORKPATH/docker_image_build
+    git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
+
+    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
+    service_list="avatarchatbot whisper asr llm-tgi speecht5 tts wav2lip animation"
+    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
+
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
+
+    docker images && sleep 1s
+}
+
+
+function start_services() {
+    cd $WORKPATH/docker_compose/intel/cpu/xeon
+
+    export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
+    export host_ip=$(hostname -I | awk '{print $1}')
+
+    export TGI_LLM_ENDPOINT=http://$host_ip:3006
+    export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
+
+    export ASR_ENDPOINT=http://$host_ip:7066
+    export TTS_ENDPOINT=http://$host_ip:7055
+    export WAV2LIP_ENDPOINT=http://$host_ip:7860
+
+    export MEGA_SERVICE_HOST_IP=${host_ip}
+    export ASR_SERVICE_HOST_IP=${host_ip}
+    export TTS_SERVICE_HOST_IP=${host_ip}
+    export LLM_SERVICE_HOST_IP=${host_ip}
+    export ANIMATION_SERVICE_HOST_IP=${host_ip}
+
+    export MEGA_SERVICE_PORT=8888
+    export ASR_SERVICE_PORT=3001
+    export TTS_SERVICE_PORT=3002
+    export LLM_SERVICE_PORT=3007
+    export ANIMATION_SERVICE_PORT=3008
+
+    export DEVICE="cpu"
+    export WAV2LIP_PORT=7860
+    export INFERENCE_MODE='wav2lip+gfpgan'
+    export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
+    export FACE="assets/img/avatar5.png"
+    # export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
+    export AUDIO='None'
+    export FACESIZE=96
+    export OUTFILE="/outputs/result.mp4"
+    export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
+    export UPSCALE_FACTOR=1
+    export FPS=10
+
+    # Start Docker Containers
+    docker compose up -d
+    n=0
+    until [[ "$n" -ge 100 ]]; do
+       docker logs tgi-service > $LOG_PATH/tgi_service_start.log
+       if grep -q Connected $LOG_PATH/tgi_service_start.log; then
+           break
+       fi
+       sleep 5s
+       n=$((n+1))
+    done
+    echo "All services are up and running"
+    sleep 5s
+}
+
+
+function validate_megaservice() {
+    cd $WORKPATH
+    result=$(http_proxy="" curl http://${ip_address}:3009/v1/avatarchatbot -X POST -d @assets/audio/sample_whoareyou.json -H 'Content-Type: application/json')
+    echo "result is === $result"
+    if [[ $result == *"mp4"* ]]; then
+        echo "Result correct."
+    else
+        docker logs whisper-service > $LOG_PATH/whisper-service.log
+        docker logs asr-service > $LOG_PATH/asr-service.log
+        docker logs speecht5-service > $LOG_PATH/speecht5-service.log
+        docker logs tts-service > $LOG_PATH/tts-service.log
+        docker logs tgi-service > $LOG_PATH/tgi-service.log
+        docker logs llm-tgi-server > $LOG_PATH/llm-tgi-server.log
+        docker logs wav2lip-service > $LOG_PATH/wav2lip-service.log
+        docker logs animation-server > $LOG_PATH/animation-server.log
+
+        echo "Result wrong."
+        exit 1
+    fi
+
+}
+
+
+#function validate_frontend() {
+
+#}
+
+
+function stop_docker() {
+    cd $WORKPATH/docker_compose/intel/cpu/xeon
+    docker compose down
+}
+
+
+function main() {
+
+    stop_docker
+    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
+    start_services
+    # validate_microservices
+    validate_megaservice
+    # validate_frontend
+    stop_docker
+
+    echo y | docker builder prune --all
+    echo y | docker image prune
+
+}
+
+
+main
--- a/AvatarChatbot/ui/gradio/app_gradio_demo_avatarchatbot.py
+++ b/AvatarChatbot/ui/gradio/app_gradio_demo_avatarchatbot.py
@@ -0,0 +1,349 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import asyncio
+import base64
+import io
+import os
+import shutil
+import subprocess
+import time
+
+import aiohttp
+import docker
+import ffmpeg
+import gradio as gr
+import numpy as np
+import soundfile as sf
+from PIL import Image
+
+
+# %% Docker Management
+def update_env_var_in_container(container_name, env_var, new_value):
+    return
+
+
+# %% AudioQnA functions
+def preprocess_audio(audio):
+    """The audio data is a 16-bit integer array with values ranging from -32768 to 32767 and the shape of the audio data array is (samples,)"""
+    sr, y = audio
+
+    # Convert to normalized float32 audio
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+
+    # Save to memory
+    buf = io.BytesIO()
+    sf.write(buf, y, sr, format="WAV")
+    buf.seek(0)  # Reset the buffer position to the beginning
+
+    # Encode the WAV file to base64 string
+    base64_bytes = base64.b64encode(buf.read())
+    base64_string = base64_bytes.decode("utf-8")
+    return base64_string
+
+
+def base64_to_int16(base64_string):
+    wav_bytes = base64.b64decode(base64_string)
+    buf = io.BytesIO(wav_bytes)
+    y, sr = sf.read(buf, dtype="int16")
+    return sr, y
+
+
+async def transcribe(audio_input, face_input, model_choice):
+    """Input: mic audio; Output: ai audio, text, text"""
+    global ai_chatbot_url, chat_history, count
+    chat_history = ""
+    # Preprocess the audio
+    base64bytestr = preprocess_audio(audio_input)
+
+    # Send the audio to the AvatarChatbot backend server endpoint
+    initial_inputs = {"audio": base64bytestr, "max_tokens": 64}
+
+    # TO-DO: update wav2lip-service with the chosen face_input
+    # update_env_var_in_container("wav2lip-service", "DEVICE", "new_device_value")
+
+    async with aiohttp.ClientSession() as session:
+        async with session.post(ai_chatbot_url, json=initial_inputs) as response:
+
+            # Check the response status code
+            if response.status == 200:
+                # response_json = await response.json()
+                # # Decode the base64 string
+                # sampling_rate, audio_int16 = base64_to_int16(response_json["byte_str"])
+                # chat_history += f"User: {response_json['query']}\n\n"
+                # chat_ai = response_json["text"]
+                # hitted_ends = [",", ".", "?", "!", "。", ";"]
+                # last_punc_idx = max([chat_ai.rfind(punc) for punc in hitted_ends])
+                # if last_punc_idx != -1:
+                #     chat_ai = chat_ai[: last_punc_idx + 1]
+                # chat_history += f"AI: {chat_ai}"
+                # chat_history = chat_history.replace("OPEX", "OPEA")
+                # return (sampling_rate, audio_int16)  # handle the response
+
+                result = await response.text()
+                return "docker_compose/intel/hpu/gaudi/result.mp4"
+            else:
+                return {"error": "Failed to transcribe audio", "status_code": response.status_code}
+
+
+def resize_image(image_pil, size=(720, 720)):
+    """Resize the image to the specified size."""
+    return image_pil.resize(size, Image.LANCZOS)
+
+
+def resize_video(video_path, save_path, size=(720, 1280)):
+    """Resize the video to the specified size, and save to the save path."""
+    ffmpeg.input(video_path).output(save_path, vf=f"scale={size[0]}:{size[1]}").overwrite_output().run()
+
+
+# %% AI Avatar demo function
+async def aiavatar_demo(audio_input, face_input, model_choice):
+    """Input: mic/preloaded audio, avatar file path;
+    Output: ai video"""
+    # Wait for response from AvatarChatbot backend
+    output_video = await transcribe(audio_input, face_input, model_choice)  # output video path
+
+    if isinstance(output_video, dict):  # in case of an error
+        return None, None
+    else:
+        return output_video
+
+
+# %% Main
+if __name__ == "__main__":
+    # HOST_IP = os.getenv("host_ip")
+    HOST_IP = subprocess.check_output("hostname -I | awk '{print $1}'", shell=True).decode("utf-8").strip()
+
+    # Fetch the AudioQnA backend server
+    ai_chatbot_url = f"http://{HOST_IP}:3009/v1/avatarchatbot"
+
+    # Collect chat history to print in the interface
+    chat_history = ""
+
+    # Prepare 3 image paths and 3 video paths
+    # image_pils = [
+    #     Image.open(os.path.join("assets/img/woman1.png")),
+    #     Image.open(os.path.join("assets/img/man1.png")),
+    #     Image.open(os.path.join("assets/img/woman2.png")),
+    # ]
+
+    # video_paths = [
+    #     os.path.join("assets/video/man1.mp4"),
+    #     os.path.join("assets/video/woman2.mp4"),
+    #     os.path.join("assets/video/man4.mp4"),
+    # ]
+
+    def image_to_base64(image_path):
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode("utf-8")
+
+    # Convert your images to Base64
+    xeon_base64 = image_to_base64("assets/img/xeon.jpg")
+    gaudi_base64 = image_to_base64("assets/img/gaudi.png")
+
+    # List of prerecorded WAV files containing audio questions
+    # audio_filepaths = [
+    #     "assets/audio/intel2.wav",
+    #     "assets/audio/intel4.wav",
+    # ]
+    # audio_questions = [
+    #     "1. What's the objective of the Open Platform for Enterprise AI? How is it helpful to enterprises building AI solutions?",
+    #     "2. What kinds of Intel AI tools are available to accelerate AI workloads?",
+    # ]
+
+    # Demo frontend
+    demo = gr.Blocks()
+    with demo:
+        # Define processing functions
+        count = 0
+
+        # Make necessary folders:
+        if not os.path.exists("inputs"):
+            os.makedirs("inputs")
+        if not os.path.exists("outputs"):
+            os.makedirs("outputs")
+
+        def initial_process(audio_input, face_input, model_choice):
+            global count
+            start_time = time.time()
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            video_file = loop.run_until_complete(aiavatar_demo(audio_input, face_input, model_choice))
+            count += 1
+            end_time = time.time()
+            return video_file, f"The entire application took {(end_time - start_time):.1f} seconds"
+
+        # def update_selected_image_state(image_index):
+        #     image_index = int(image_index)
+        #     selected_image_state.value = image_index
+        #     # change image_input here
+        #     if image_index < len(image_pils):
+        #         return f"inputs/face_{image_index}.png"
+        #     else:
+        #         return f"inputs/video_{image_index - len(image_pils)}.mp4"
+
+        # def update_audio_input(audio_choice):
+        #     if audio_choice:
+        #         audio_index = int(audio_choice.split(".")[0]) - 1
+        #         audio_filepath_gradio = f"inputs/audio_{audio_index:d}.wav"
+        #         shutil.copyfile(audio_filepaths[audio_index], audio_filepath_gradio)
+        #         return audio_filepath_gradio
+
+        # UI Components
+        # Title & Introduction
+        gr.Markdown("<h1 style='font-size: 36px;'>A PyTorch and OPEA based AI Avatar Audio Chatbot</h1>")
+        with gr.Row():
+            with gr.Column(scale=8):
+                gr.Markdown(
+                    """
+                <p style='font-size: 24px;'>Welcome to our AI Avatar Audio Chatbot! This application leverages PyTorch and <strong>OPEA (Open Platform for Enterprise AI) v0.8</strong> to provide you with a human-like conversational experience. It's run on Intel® Gaudi® AI Accelerator and Intel® Xeon® Processor, with hardware and software optimizations.<br>
+                Please feel free to interact with the AI avatar by choosing your own avatar and talking into the mic.</p>
+                            """
+                )
+            with gr.Column(scale=1):
+                # with gr.Row():
+                #     gr.Markdown(f"""
+                #     <img src='data:image/png;base64,{opea_qr_base64}' alt='OPEA QR Code' style='width: 150px; height: auto;'>
+                #     """, label="OPEA QR Code")
+                #     gr.Markdown(f"""
+                #     <img src='data:image/png;base64,{opea_gh_qr_base64}' alt='OPEA GitHub QR Code' style='width: 150px; height: auto;'>
+                #     """, label="OPEA GitHub QR Code")
+                with gr.Row():
+                    gr.Markdown(
+                        f"""
+                    <img src='data:image/png;base64,{gaudi_base64}' alt='Intel®Gaudi' style='width: 120px; height: auto;'>""",
+                        label="Intel®Gaudi",
+                    )
+                    gr.Markdown(
+                        f"""
+                    <img src='data:image/png;base64,{xeon_base64}' alt='Intel®Xeon' style='width: 120px; height: auto;'>""",
+                        label="Intel®Xeon",
+                    )
+        gr.Markdown("<hr>")  # Divider
+
+        # Inputs
+        # Image gallery
+        selected_image_state = gr.State(value=-1)
+        image_clicks = []
+        image_click_buttons = []
+        video_clicks = []
+        video_click_buttons = []
+        with gr.Row():
+            with gr.Column(scale=1):
+                audio_input = gr.Audio(
+                    sources=["upload", "microphone"], format="wav", label="🎤 or 📤 for your Input audio!"
+                )
+                # audio_choice = gr.Dropdown(
+                #     choices=audio_questions,
+                #     label="Choose an audio question",
+                #     value=None,  # default value
+                # )
+                # Update audio_input when a selection is made from the dropdown
+                # audio_choice.change(fn=update_audio_input, inputs=audio_choice, outputs=audio_input)
+
+                face_input = gr.File(
+                    file_count="single",
+                    file_types=["image", "video"],
+                    label="Choose an avatar or 📤 an image or video!",
+                )
+                model_choice = gr.Dropdown(
+                    choices=["wav2lip", "wav2lip+GAN", "wav2lip+GFPGAN"],
+                    label="Choose a DL model",
+                )
+            # with gr.Column(scale=2):
+            #     # Display 3 images and buttons
+            #     with gr.Row():
+            #         for i, image_pil in enumerate(image_pils):
+            #             image_pil = resize_image(image_pil)
+            #             save_path = f"inputs/face_{int(i)}.png"
+            #             image_pil.save(save_path, "PNG")
+            #             image_clicks.append(gr.Image(type="filepath", value=save_path, label=f"Avatar {int(i)+1}"))
+            #     with gr.Row():
+            #         for i in range(len(image_pils)):
+            #             image_click_buttons.append(gr.Button(f"Use Image {i+1}"))
+
+            #     # Display 3 videos and buttons
+            #     with gr.Row():
+            #         for i, video_path in enumerate(video_paths):
+            #             save_path = f"inputs/video_{int(i)}.mp4"
+            #             resize_video(video_path, save_path)
+            #             video_clicks.append(gr.Video(value=save_path, label=f"Video {int(i)+1}"))
+            #     with gr.Row():
+            #         for i in range(len(video_paths)):
+            #             video_click_buttons.append(gr.Button(f"Use Video {int(i)+1}"))
+
+        submit_button = gr.Button("Submit")
+
+        # Outputs
+        gr.Markdown("<hr>")  # Divider
+        with gr.Row():
+            with gr.Column():
+                video_output = gr.Video(label="Your AI Avatar video: ", format="mp4", width=1280, height=720)
+                video_time_text = gr.Textbox(label="Video processing time", value="0.0 seconds")
+
+        # Technical details
+        gr.Markdown("<hr>")  # Divider
+        with gr.Row():
+            gr.Markdown(
+                """
+                <p style='font-size: 24px;'>OPEA megaservice deployed: <br>
+                <ul style='font-size: 24px;'>
+                    <li><strong>AvatarChatbot</strong></li>
+                </ul></p>
+                <p style='font-size: 24px;'>OPEA microservices deployed:
+                <ul style='font-size: 24px;'>
+                    <li><strong>ASR</strong> (service: opea/whisper-gaudi, model: openai/whisper-small)</li>
+                    <li><strong>LLM 'text-generation'</strong> (service: opea/llm-tgi, model: Intel/neural-chat-7b-v3-3)</li>
+                    <li><strong>TTS</strong> (service: opea/speecht5-gaudi, model: microsoft/speecht5_tts)</li>
+                    <li><strong>Animation</strong> (service: opea/animation, model: wav2lip+gfpgan)</li>
+                </ul></p>
+                        """
+            )
+        with gr.Row():
+            gr.Image("assets/img/flowchart.png", label="Megaservice Flowchart")
+        with gr.Row():
+            gr.Markdown(
+                """
+            <p style='font-size: 24px;'>The AI Avatar Audio Chatbot is powered by the following Intel® AI software:<br>
+                        <ul style='font-size: 24px;'>
+                        <li><strong>Intel Gaudi Software v1.17.0</strong></li>
+                        <li><strong>PyTorch v2.3.1 (Eager mode + torch.compile) </strong></li>
+                        <li><strong>HPU Graph</strong></li>
+                        <li><strong>Intel Neural Compressor (INC)</strong></li>
+                        </ul></p>
+                        """
+            )
+
+        # Disclaimer
+        gr.Markdown("<hr>")  # Divider
+        gr.Markdown("<h2 style='font-size: 24px;'>Notices & Disclaimers</h1>")
+        gr.Markdown(
+            """
+                    <p style='font-size: 20px;'>Intel is committed to respecting human rights and avoiding complicity in human rights abuses. See Intel's Global Human Rights Principles. Intel's products and software are intended only to be used in applications that do not cause or contribute to a violation of an internationally recognized human right.<br></p>
+                    <p style='font-size: 20px;'>© Intel Corporation.  Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries.  Other names and brands may be claimed as the property of others.<br></p>
+                    <p style='font-size: 20px;'>You may not use or facilitate the use of this document in connection with any infringement or other legal analysis concerning Intel products described herein. You agree to grant Intel a non-exclusive, royalty-free license to any patent claim thereafter drafted which includes subject matter disclosed herein.<br></p>
+                    """
+        )
+
+        # State transitions
+        # for i in range(len(image_pils)):
+        #     image_click_buttons[i].click(
+        #         update_selected_image_state, inputs=[gr.Number(value=i, visible=False)], outputs=[face_input]
+        #     )
+        # for i in range(len(video_paths)):
+        #     video_click_buttons[i].click(
+        #         update_selected_image_state,
+        #         inputs=[gr.Number(value=i + len(image_pils), visible=False)],
+        #         outputs=[face_input],
+        #     )
+        submit_button.click(
+            initial_process,
+            inputs=[audio_input, face_input, model_choice],
+            outputs=[
+                video_output,
+                video_time_text,
+            ],
+        )
+
+        demo.queue().launch(server_name="0.0.0.0", server_port=7861)
--- a/ChatQnA/Dockerfile
+++ b/ChatQnA/Dockerfile
@@ -18,7 +18,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git

 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
    pip install --no-cache-dir langchain_core

--- a/ChatQnA/Dockerfile.guardrails
+++ b/ChatQnA/Dockerfile.guardrails
@@ -18,7 +18,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git

 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
    pip install --no-cache-dir langchain_core

--- a/ChatQnA/Dockerfile.without_rerank
+++ b/ChatQnA/Dockerfile.without_rerank
@@ -18,7 +18,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git

 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
    pip install --no-cache-dir langchain_core

--- a/ChatQnA/Dockerfile.wrapper
+++ b/ChatQnA/Dockerfile.wrapper
@@ -0,0 +1,32 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    git
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+WORKDIR /home/user/
+RUN git clone https://github.com/opea-project/GenAIComps.git
+
+WORKDIR /home/user/GenAIComps
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
+
+COPY ./chatqna_wrapper.py /home/user/chatqna.py
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
+
+USER user
+
+WORKDIR /home/user
+
+RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
+
+ENTRYPOINT ["python", "chatqna.py"]
--- a/ChatQnA/README.md
+++ b/ChatQnA/README.md
@@ -4,7 +4,26 @@ Chatbots are the most widely adopted use case for leveraging the powerful chat a

 RAG bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. The core of this architecture are vector databases, which are instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity.

-## Deploy ChatQnA Service
+## 🤖 Automated Terraform Deployment using Intel® Optimized Cloud Modules for **Terraform**
+
+| Cloud Provider       | Intel Architecture                | Intel Optimized Cloud Module for Terraform                                                                                         | Comments                                                             |
+| -------------------- | --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------- |
+| AWS                  | 4th Gen Intel Xeon with Intel AMX | [AWS Module](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna)                          | Uses Intel/neural-chat-7b-v3-3 by default                            |
+| AWS Falcon2-11B      | 4th Gen Intel Xeon with Intel AMX | [AWS Module with Falcon11B](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna-falcon11B) | Uses TII Falcon2-11B LLM Model                                       |
+| GCP                  | 5th Gen Intel Xeon with Intel AMX | [GCP Module](https://github.com/intel/terraform-intel-gcp-vm/tree/main/examples/gen-ai-xeon-opea-chatqna)                          | Also supports Confidential AI by using Intel® TDX with 4th Gen Xeon |
+| Azure                | 5th Gen Intel Xeon with Intel AMX | Work-in-progress                                                                                                                   | Work-in-progress                                                     |
+| Intel Tiber AI Cloud | 5th Gen Intel Xeon with Intel AMX | Work-in-progress                                                                                                                   | Work-in-progress                                                     |
+
+## Automated Deployment to Ubuntu based system(if not using Terraform) using Intel® Optimized Cloud Modules for **Ansible**
+
+To deploy to existing Xeon Ubuntu based system, use our Intel Optimized Cloud Modules for Ansible. This is the same Ansible playbook used by Terraform.
+Use this if you are not using Terraform and have provisioned your system with another tool or manually including bare metal.
+| Operating System | Intel Optimized Cloud Module for Ansible |
+|------------------|------------------------------------------|
+| Ubuntu 20.04 | [ChatQnA Ansible Module](https://github.com/intel/optimized-cloud-recipes/tree/main/recipes/ai-opea-chatqna-xeon) |
+| Ubuntu 22.04 | Work-in-progress |
+
+## Manually Deploy ChatQnA Service

 The ChatQnA service can be effortlessly deployed on Intel Gaudi2, Intel Xeon Scalable Processors and Nvidia GPU.

@@ -206,8 +225,6 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/
 docker compose up -d
 ```

-> Notice: Currently only the **Habana Driver 1.16.x** is supported for Gaudi.
-
 Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) to build docker images from source.

 ### Deploy ChatQnA on Xeon
--- a/ChatQnA/benchmark/accuracy/README.md
+++ b/ChatQnA/benchmark/accuracy/README.md
@@ -48,7 +48,7 @@ To setup a LLM model, we can use [tgi-gaudi](https://github.com/huggingface/tgi-
 docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2

 # for better performance, set `PREFILL_BATCH_BUCKET_SIZE`, `BATCH_BUCKET_SIZE`, `max-batch-total-tokens`, `max-batch-prefill-tokens`
-docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} -e PREFILL_BATCH_BUCKET_SIZE=1 -e BATCH_BUCKET_SIZE=8 --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 2048
+docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} -e PREFILL_BATCH_BUCKET_SIZE=1 -e BATCH_BUCKET_SIZE=8 --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 2048
 ```

 ### Prepare Dataset
--- a/ChatQnA/benchmark/accuracy/eval_multihop.py
+++ b/ChatQnA/benchmark/accuracy/eval_multihop.py
@@ -41,11 +41,11 @@ class MultiHop_Evaluator(Evaluator):
            return []

    def get_retrieved_documents(self, query, arguments):
-        data = {"text": query}
+        data = {"inputs": query}
        headers = {"Content-Type": "application/json"}
-        response = requests.post(arguments.embedding_endpoint, data=json.dumps(data), headers=headers)
+        response = requests.post(arguments.tei_embedding_endpoint + "/embed", data=json.dumps(data), headers=headers)
        if response.ok:
-            embedding = response.json()["embedding"]
+            embedding = response.json()[0]
        else:
            print(f"Request for embedding failed due to {response.text}.")
            return []
--- a/ChatQnA/benchmark/performance-deprecated/README.md
+++ b/ChatQnA/benchmark/performance-deprecated/README.md
--- a/ChatQnA/benchmark/performance-deprecated/benchmark.sh
+++ b/ChatQnA/benchmark/performance-deprecated/benchmark.sh
--- a/ChatQnA/benchmark/performance-deprecated/benchmark.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/benchmark.yaml
--- a/ChatQnA/benchmark/performance-deprecated/helm_charts/.helmignore
+++ b/ChatQnA/benchmark/performance-deprecated/helm_charts/.helmignore
--- a/ChatQnA/benchmark/performance-deprecated/helm_charts/Chart.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/helm_charts/Chart.yaml
--- a/ChatQnA/benchmark/performance-deprecated/helm_charts/README.md
+++ b/ChatQnA/benchmark/performance-deprecated/helm_charts/README.md
--- a/ChatQnA/benchmark/performance-deprecated/helm_charts/customize.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/helm_charts/customize.yaml
--- a/ChatQnA/benchmark/performance-deprecated/helm_charts/templates/configmap.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/helm_charts/templates/configmap.yaml
--- a/Show More
+++ b/Show More