update compose.yaml fix the test issue (#467 )

Signed-off-by: chensuyue <suyue.chen@intel.com>
Add Grafana dashboard support (#466 )
2024-07-28 23:36:39 +08:00 · 2024-07-26 20:43:58 +08:00 · 2024-07-26 20:27:00 +08:00 · 2024-07-26 20:26:45 +08:00 · 2024-07-26 20:26:29 +08:00 · 2024-07-26 20:13:46 +08:00
401 changed files with 21064 additions and 2074 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1 @@
+* text=auto eol=lf
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -13,6 +13,7 @@ List the type of change like below. Please delete options that are not relevant.
 - [ ] Bug fix (non-breaking change which fixes an issue)
 - [ ] New feature (non-breaking change which adds new functionality)
 - [ ] Breaking change (fix or feature that would break existing design and interface)
+- [ ] Others (enhancement, documentation, validation, etc.)

 ## Dependencies

--- a/.github/workflows/chatqna_benchmark.yml
+++ b/.github/workflows/chatqna_benchmark.yml
@@ -0,0 +1,78 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: benchmark test with chatqna_benchmark
+
+on:
+  # pull_request:
+  #   branches: [main]
+  #   types: [opened, reopened, ready_for_review, synchronize]
+  #   # inputs:
+  #   #   variables:
+  #   #     hardware:
+  #   #       description: 'Enter your param' #gaudi or xeon
+  #   #       required: true
+  #   #       default: xeon
+  schedule:
+    - cron: "35 0 * * 6"
+  workflow_dispatch:
+    inputs:
+      hardware:
+        description: 'Enter your hardware' #gaudi or xeon
+        required: true
+        default: gaudi
+
+jobs:
+  Example-test:
+    runs-on: ${{ github.event.inputs.hardware || 'gaudi' }} #xeon    #gaudi
+    steps:
+      - name: Clean Up Working Directory
+        run: sudo rm -rf ${{github.workspace}}/*
+
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+
+      - name: Clone repo GenAIEval
+        run: |
+          git clone https://github.com/opea-project/GenAIEval.git
+          cd GenAIEval && git checkout v0.6
+
+      - name: Run test
+        env:
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+          hardware: ${{ github.event.inputs.hardware || 'gaudi' }}  #xeon
+          mode: perf
+          IMAGE_TAG: latest
+          IMAGE_REPO_GAUDI: ${{ vars.IMAGE_REPO_GAUDI }}
+          IMAGE_REPO_XEON: ${{ vars.IMAGE_REPO_XEON }}
+        run: |
+          # cd ${{ github.workspace }}/$example/tests
+          cd ${{ github.workspace }}/ChatQnA/tests
+          cp ../../GenAIEval/evals/benchmark/chatqna_benchmark.py .
+          cp ../../GenAIEval/evals/benchmark/data.json ${{ github.workspace }}/ChatQnA/docker/${hardware}/
+
+          if [ "$hardware" == "gaudi" ]; then IMAGE_REPO=$IMAGE_REPO_GAUDI; else IMAGE_REPO=$IMAGE_REPO_XEON; fi
+          export IMAGE_REPO=${IMAGE_REPO}
+          # example_l=$(echo $example | tr '[:upper:]' '[:lower:]')
+          if [ -f test_chatqna_on_${hardware}.sh ]; then timeout 30m bash test_chatqna_on_${hardware}.sh > ${hardware}_output.log; else echo "Test script not found, skip test!"; fi
+
+      - name: Process log and save to JSON
+        env:
+          hardware: ${{ github.event.inputs.hardware || 'gaudi' }}   #xeon
+        run: |
+          cd ${{ github.workspace }}/ChatQnA/tests
+          echo '{}' > ${hardware}_output.json
+          echo $(grep -a 'Total Requests:' ${hardware}_output.log | awk '{print "{\"total_requests\": \""$3 "\"}"}') > ${hardware}_output.json
+          echo $(grep -a 'P50 latency is' ${hardware}_output.log | awk '{print "{\"p50_latency\": \""$4 "\"}"}') >> ${hardware}_output.json
+          echo $(grep -a 'P99 latency is' ${hardware}_output.log | awk '{print "{\"p99_latency\": \""$4 "\"}"}') >> ${hardware}_output.json
+          jq -s 'add' ${hardware}_output.json > ${hardware}_final_output.json && mv ${hardware}_final_output.json ${hardware}_output.json
+
+      - name: Publish pipeline artifact
+        if: ${{ !cancelled() }}
+        uses: actions/upload-artifact@v4
+        with:
+          path: |
+            ${{ github.workspace }}/ChatQnA/tests/*.log
+            ${{ github.workspace }}/ChatQnA/tests/*.json
--- a/.github/workflows/docker-compose-e2e.yml
+++ b/.github/workflows/docker-compose-e2e.yml
@@ -80,11 +80,7 @@ jobs:
        if: cancelled() || failure()
        run: |
          cd ${{ github.workspace }}/$example/docker/$hardware
-          container_list=$(cat docker_compose.yaml | grep container_name | cut -d':' -f2)
-          for container_name in $container_list; do
-              cid=$(docker ps -aq --filter "name=$container_name")
-              if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
-          done
+          docker compose stop && docker compose rm -f
          echo y | docker system prune

      - name: Publish pipeline artifact
--- a/.github/workflows/gmc-e2e.yaml
+++ b/.github/workflows/gmc-e2e.yaml
@@ -4,7 +4,7 @@
 name: E2E test with GMC

 on:
-  pull_request:
+  pull_request_target:
    branches: [main]
    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
    paths:
@@ -12,6 +12,7 @@ on:
      - "**/tests/test_gmc**"
      - "!**.md"
      - "!**.txt"
+      - "!**/kubernetes/manifests/**"
  workflow_dispatch:

 concurrency:
@@ -43,36 +44,37 @@ jobs:
      - name: Checkout out Repo
        uses: actions/checkout@v4
        with:
-          fetch-depth: 0
+          ref: "refs/pull/${{ github.event.number }}/merge"

      - name: Set variables
        run: |
          if [ ${{ matrix.hardware }} == "gaudi" ]; then IMAGE_REPO=${{ vars.IMAGE_REPO_GAUDI }}; else IMAGE_REPO=${{ vars.IMAGE_REPO_XEON }}; fi
          echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
          lower_example=$(echo "${{ matrix.example }}" | tr '[:upper:]' '[:lower:]')
-          echo "SYSTEM_NAMESPACE=opea-system-$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
          echo "APP_NAMESPACE=$lower_example-$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
          echo "ROLLOUT_TIMEOUT_SECONDS=1800s" >> $GITHUB_ENV
          echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
          echo "continue_test=true" >> $GITHUB_ENV
          echo "should_cleanup=false" >> $GITHUB_ENV
-          echo "skip_validate=true" >> $GITHUB_ENV
          echo "APP_NAMESPACE=$APP_NAMESPACE"

-      - name: Kubectl install
-        id: install
+      - name: Run tests
+        id: run-test
+        env:
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
        run: |
          if [[ ! -f ${{ github.workspace }}/${{ matrix.example }}/tests/test_gmc_on_${{ matrix.hardware }}.sh ]]; then
            echo "No test script found, exist test!"
            exit 0
          else
-            ${{ github.workspace }}/${{ matrix.example }}/tests/test_gmc_on_${{ matrix.hardware }}.sh init_${{ matrix.example }}
            echo "should_cleanup=true" >> $GITHUB_ENV
            ${{ github.workspace }}/${{ matrix.example }}/tests/test_gmc_on_${{ matrix.hardware }}.sh install_${{ matrix.example }}
            echo "Testing ${{ matrix.example }}, waiting for pod ready..."
            if kubectl rollout status deployment --namespace "$APP_NAMESPACE" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
-              echo "Testing gmc ${{ matrix.example }}, waiting for pod ready done!"
-              echo "skip_validate=false" >> $GITHUB_ENV
+              echo "Testing gmc ${{ matrix.example }}, running validation test..."
+              ${{ github.workspace }}/${{ matrix.example }}/tests/test_gmc_on_${{ matrix.hardware }}.sh validate_${{ matrix.example }}
            else
              echo "Timeout waiting for pods in namespace $APP_NAMESPACE to be ready!"
              exit 1
@@ -80,23 +82,10 @@ jobs:
            sleep 60
          fi

-      - name: Validate e2e test
-        if: always()
-        run: |
-          if $skip_validate; then
-            echo "Skip validate"
-          else
-            ${{ github.workspace }}/${{ matrix.example }}/tests/test_gmc_on_${{ matrix.hardware }}.sh validate_${{ matrix.example }}
-          fi
-
      - name: Kubectl uninstall
        if: always()
        run: |
          if $should_cleanup; then
-            if ! kubectl delete ns $SYSTEM_NAMESPACE --timeout=$KUBECTL_TIMEOUT_SECONDS; then
-              kubectl delete pods --namespace $SYSTEM_NAMESPACE --force --grace-period=0 --all
-              kubectl delete ns $SYSTEM_NAMESPACE --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
-            fi
            if ! kubectl delete ns $APP_NAMESPACE --timeout=$KUBECTL_TIMEOUT_SECONDS; then
              kubectl delete pods --namespace $APP_NAMESPACE --force --grace-period=0 --all
              kubectl delete ns $APP_NAMESPACE --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
--- a/.github/workflows/image-build-on-push.yml
+++ b/.github/workflows/image-build-on-push.yml
@@ -23,8 +23,11 @@ jobs:
  mega-image-build:
    needs: job1
    strategy:
-      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
+      matrix:
+        workload: ${{ fromJSON(needs.job1.outputs.run_matrix).include.*.example }}
+        hardware: ["gaudi","xeon"]
    uses: ./.github/workflows/reuse-image-build.yml
    with:
-      image-tag: latest
-      mega-service: "${{ matrix.example }}"
+      image_tag: latest
+      mega_service: "${{ matrix.workload }}"
+      runner_label: docker-build-${{ matrix.hardware }}
--- a/.github/workflows/reuse-image-build.yml
+++ b/.github/workflows/reuse-image-build.yml
@@ -21,7 +21,7 @@ on:
        default: 'docker-build-xeon'
    outputs:
      image_repo:
-        description: "The image reposity used for the image build"
+        description: "The image repository used for the image build"
        value: ${{ jobs.mega-image-build.outputs.image_repo }}
      image_tag:
        description: "The image tag used for the image build"
--- a/.github/workflows/scripts/build_push.sh
+++ b/.github/workflows/scripts/build_push.sh
@@ -46,7 +46,7 @@ function docker_build() {
 # $1 is like "apple orange pear"
 for MEGA_SVC in $1; do
    case $MEGA_SVC in
-        "ChatQnA"|"CodeGen"|"CodeTrans"|"DocSum"|"Translation")
+        "ChatQnA"|"CodeGen"|"CodeTrans"|"DocSum"|"Translation"|"AudioQnA"|"SearchQnA"|"FaqGen")
            cd $MEGA_SVC/docker
            IMAGE_NAME="$(getImagenameFromMega $MEGA_SVC)"
            docker_build ${IMAGE_NAME}
@@ -55,8 +55,14 @@ for MEGA_SVC in $1; do
            if [ "$MEGA_SVC" == "ChatQnA" ];then
                docker_build ${IMAGE_NAME}-conversation-ui docker/Dockerfile.react
            fi
+            if [ "$MEGA_SVC" == "DocSum" ];then
+                docker_build ${IMAGE_NAME}-react-ui docker/Dockerfile.react
+            fi
+            if [ "$MEGA_SVC" == "CodeGen" ];then
+                docker_build ${IMAGE_NAME}-react-ui docker/Dockerfile.react
+            fi
            ;;
-        "AudioQnA"|"SearchQnA"|"VisualQnA")
+        "VisualQnA")
            echo "Not supported yet"
            ;;
        *)
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ ci:

 repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v4.6.0
    hooks:
      - id: end-of-file-fixer
        files: (.*\.(py|md|rst|yaml|yml|json|ts|js|html|svelte|sh))$
@@ -24,6 +24,8 @@ repos:
      - id: requirements-txt-fixer
      - id: trailing-whitespace
        files: (.*\.(py|rst|cmake|yaml|yml|json|ts|js|html|svelte|sh))$
+      - id: mixed-line-ending
+        args: [--fix=lf]

  - repo: https://github.com/Lucas-C/pre-commit-hooks
    rev: v1.5.5
@@ -100,21 +102,21 @@ repos:
          - prettier@3.2.5

  - repo: https://github.com/psf/black.git
-    rev: 24.3.0
+    rev: 24.4.2
    hooks:
      - id: black
        files: (.*\.py)$

  - repo: https://github.com/asottile/blacken-docs
-    rev: 1.16.0
+    rev: 1.18.0
    hooks:
      - id: blacken-docs
        args: [--line-length=120, --skip-errors]
        additional_dependencies:
-          - black==24.3.0
+          - black==24.4.2

  - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.6
+    rev: v2.3.0
    hooks:
      - id: codespell
        args: [-w]
@@ -122,7 +124,7 @@ repos:
          - tomli

  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.3.5
+    rev: v0.5.0
    hooks:
      - id: ruff
        args: [--fix, --exit-non-zero-on-fix, --no-cache]
--- a/AudioQnA/deprecated/docker/asr/requirements.txt
+++ b/AudioQnA/deprecated/docker/asr/requirements.txt
@@ -5,7 +5,7 @@ ffmpeg-python
 numpy
 pydub
 python-multipart
-torch==2.1.0
+torch==2.2.0
 transformers
 uvicorn
 zhconv
--- a/AudioQnA/deprecated/docker/tts/config.py
+++ b/AudioQnA/deprecated/docker/tts/config.py
@@ -1,101 +1,101 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-#
-#
-# This script is adapted from
-# https://github.com/RVC-Boss/GPT-SoVITS/blob/main/api.py
-# which is under the MIT license
-#
-# Copyright (c) 2024 RVC-Boss
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import os
-import sys
-
-import torch
-
-sovits_path = ""
-gpt_path = ""
-is_half_str = os.environ.get("is_half", "True")
-is_half = True if is_half_str.lower() == "true" else False
-is_share_str = os.environ.get("is_share", "False")
-is_share = True if is_share_str.lower() == "true" else False
-
-cnhubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
-bert_path = "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"
-pretrained_sovits_path = "GPT_SoVITS/pretrained_models/s2G488k.pth"
-pretrained_gpt_path = "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
-
-exp_root = "logs"
-python_exec = sys.executable or "python"
-if torch.cuda.is_available():
-    infer_device = "cuda"
-else:
-    infer_device = "cpu"
-
-webui_port_main = 9874
-webui_port_uvr5 = 9873
-webui_port_infer_tts = 9872
-webui_port_subfix = 9871
-
-api_port = 9880
-
-if infer_device == "cuda":
-    gpu_name = torch.cuda.get_device_name(0)
-    if (
-        ("16" in gpu_name and "V100" not in gpu_name.upper())
-        or "P40" in gpu_name.upper()
-        or "P10" in gpu_name.upper()
-        or "1060" in gpu_name
-        or "1070" in gpu_name
-        or "1080" in gpu_name
-    ):
-        is_half = False
-
-if infer_device == "cpu":
-    is_half = False
-    use_bf16 = False
-
-
-class Config:
-    def __init__(self):
-        self.sovits_path = sovits_path
-        self.gpt_path = gpt_path
-        self.is_half = is_half
-        self.use_bf16 = use_bf16
-
-        self.cnhubert_path = cnhubert_path
-        self.bert_path = bert_path
-        self.pretrained_sovits_path = pretrained_sovits_path
-        self.pretrained_gpt_path = pretrained_gpt_path
-
-        self.exp_root = exp_root
-        self.python_exec = python_exec
-        self.infer_device = infer_device
-
-        self.webui_port_main = webui_port_main
-        self.webui_port_uvr5 = webui_port_uvr5
-        self.webui_port_infer_tts = webui_port_infer_tts
-        self.webui_port_subfix = webui_port_subfix
-
-        self.api_port = api_port
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+#
+#
+# This script is adapted from
+# https://github.com/RVC-Boss/GPT-SoVITS/blob/main/api.py
+# which is under the MIT license
+#
+# Copyright (c) 2024 RVC-Boss
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import os
+import sys
+
+import torch
+
+sovits_path = ""
+gpt_path = ""
+is_half_str = os.environ.get("is_half", "True")
+is_half = True if is_half_str.lower() == "true" else False
+is_share_str = os.environ.get("is_share", "False")
+is_share = True if is_share_str.lower() == "true" else False
+
+cnhubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
+bert_path = "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"
+pretrained_sovits_path = "GPT_SoVITS/pretrained_models/s2G488k.pth"
+pretrained_gpt_path = "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
+
+exp_root = "logs"
+python_exec = sys.executable or "python"
+if torch.cuda.is_available():
+    infer_device = "cuda"
+else:
+    infer_device = "cpu"
+
+webui_port_main = 9874
+webui_port_uvr5 = 9873
+webui_port_infer_tts = 9872
+webui_port_subfix = 9871
+
+api_port = 9880
+
+if infer_device == "cuda":
+    gpu_name = torch.cuda.get_device_name(0)
+    if (
+        ("16" in gpu_name and "V100" not in gpu_name.upper())
+        or "P40" in gpu_name.upper()
+        or "P10" in gpu_name.upper()
+        or "1060" in gpu_name
+        or "1070" in gpu_name
+        or "1080" in gpu_name
+    ):
+        is_half = False
+
+if infer_device == "cpu":
+    is_half = False
+    use_bf16 = False
+
+
+class Config:
+    def __init__(self):
+        self.sovits_path = sovits_path
+        self.gpt_path = gpt_path
+        self.is_half = is_half
+        self.use_bf16 = use_bf16
+
+        self.cnhubert_path = cnhubert_path
+        self.bert_path = bert_path
+        self.pretrained_sovits_path = pretrained_sovits_path
+        self.pretrained_gpt_path = pretrained_gpt_path
+
+        self.exp_root = exp_root
+        self.python_exec = python_exec
+        self.infer_device = infer_device
+
+        self.webui_port_main = webui_port_main
+        self.webui_port_uvr5 = webui_port_uvr5
+        self.webui_port_infer_tts = webui_port_infer_tts
+        self.webui_port_subfix = webui_port_subfix
+
+        self.api_port = api_port
--- a/AudioQnA/docker/gaudi/README.md
+++ b/AudioQnA/docker/gaudi/README.md
@@ -14,7 +14,7 @@ cd GenAIComps
 ### 2. Build ASR Image

 ```bash
-docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile_hpu .
+docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile_hpu .


 docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/Dockerfile .
@@ -29,7 +29,7 @@ docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_pr
 ### 4. Build TTS Image

 ```bash
-docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile_hpu .
+docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile_hpu .

 docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/Dockerfile .
 ```
@@ -46,10 +46,10 @@ docker build --no-cache -t opea/audioqna:latest --build-arg https_proxy=$https_p

 Then run the command `docker images`, you will have following images ready:

-1. `opea/whisper:latest`
+1. `opea/whisper-gaudi:latest`
 2. `opea/asr:latest`
 3. `opea/llm-tgi:latest`
-4. `opea/speecht5:latest`
+4. `opea/speecht5-gaudi:latest`
 5. `opea/tts:latest`
 6. `opea/audioqna:latest`

@@ -81,7 +81,7 @@ export LLM_SERVICE_PORT=3007

 ```bash
 cd GenAIExamples/AudioQnA/docker/gaudi/
-docker compose -f docker_compose.yaml up -d
+docker compose up -d
 ```

 ## 🚀 Test MicroServices
--- a/AudioQnA/docker/gaudi/docker_compose.yaml
+++ b/AudioQnA/docker/gaudi/docker_compose.yaml
@@ -6,7 +6,7 @@ version: "3.8"

 services:
  whisper-service:
-    image: opea/whisper:latest
+    image: opea/whisper-gaudi:latest
    container_name: whisper-service
    ports:
      - "7066:7066"
@@ -30,7 +30,7 @@ services:
    environment:
      ASR_ENDPOINT: ${ASR_ENDPOINT}
  speecht5-service:
-    image: opea/speecht5:latest
+    image: opea/speecht5-gaudi:latest
    container_name: speecht5-service
    ports:
      - "7055:7055"
@@ -54,7 +54,7 @@ services:
    environment:
      TTS_ENDPOINT: ${TTS_ENDPOINT}
  tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
    container_name: tgi-gaudi-server
    ports:
      - "3006:80"
@@ -73,7 +73,7 @@ services:
    cap_add:
      - SYS_NICE
    ipc: host
-    command: --model-id ${LLM_MODEL_ID}
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
  llm:
    image: opea/llm-tgi:latest
    container_name: llm-tgi-gaudi-server
--- a/AudioQnA/docker/ui/svelte/README.md
+++ b/AudioQnA/docker/ui/svelte/README.md
@@ -1,19 +1,18 @@
-<h1 align="center" id="title">AudioQnA</h1>
+# AudioQnA

-### 📸 Project Screenshots
+## 📸 Project Screenshots

-![project-screenshot](../../assets/img/audio_ui.png)
-![project-screenshot](../../assets/img/audio_ui_record.png)
+![project-screenshot](../../../assets/img/audio_ui.png)
+![project-screenshot](../../../assets/img/audio_ui_record.png)

-<h2>🧐 Features</h2>
+## 🧐 Features

 Here're some of the project's features:

 - Start a Talking Chat：Initiate voice chat, able to input voice content, and customize the conversation sound based on the uploaded file.
- Upload File: Select local upload of voice file. Chat based on the uploaded sound.
 - Scroll to Bottom: The chat automatically slides to the bottom.

-<h2>🛠️ Get it Running:</h2>
+## 🛠️ Get it Running

 1. Clone the repo.

@@ -22,13 +21,7 @@ Here're some of the project's features:
 3. Modify the required .env variables.

   ```
-    TTS_URL = ''
-
-    UPLOAD_URL = ''
-
-    CHAT_URL = ''
-
-    ASR_URL = ''
+   CHAT_URL = ''
   ```

 4. Execute `npm install` to install the corresponding dependencies.
--- a/AudioQnA/docker/xeon/README.md
+++ b/AudioQnA/docker/xeon/README.md
@@ -81,7 +81,7 @@ export LLM_SERVICE_PORT=3007

 ```bash
 cd GenAIExamples/AudioQnA/docker/xeon/
-docker compose -f docker_compose.yaml up -d
+docker compose up -d
 ```

 ## 🚀 Test MicroServices
--- a/AudioQnA/docker/xeon/docker_compose.yaml
+++ b/AudioQnA/docker/xeon/docker_compose.yaml
--- a/AudioQnA/tests/test_audioqna_on_gaudi.sh
+++ b/AudioQnA/tests/test_audioqna_on_gaudi.sh
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0

 set -e
+echo "IMAGE_REPO=${IMAGE_REPO}"

 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
@@ -13,14 +14,14 @@ function build_docker_images() {
    git clone https://github.com/opea-project/GenAIComps.git
    cd GenAIComps

-    docker build -t opea/whisper:latest  -f comps/asr/whisper/Dockerfile_hpu .
+    docker build -t opea/whisper-gaudi:latest  -f comps/asr/whisper/Dockerfile_hpu .

    docker build -t opea/asr:latest  -f comps/asr/Dockerfile .
    docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile .
-    docker build -t opea/speecht5:latest  -f comps/tts/speecht5/Dockerfile_hpu .
+    docker build -t opea/speecht5-gaudi:latest  -f comps/tts/speecht5/Dockerfile_hpu .
    docker build -t opea/tts:latest  -f comps/tts/Dockerfile .

-    docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1

    cd ..

@@ -54,31 +55,43 @@ function start_services() {

    # sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env

-    # Replace the container name with a test-specific name
-    # echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
-    # sed -i "s#image: opea/chatqna:latest#image: opea/chatqna:${IMAGE_TAG}#g" docker_compose.yaml
-    # sed -i "s#image: opea/chatqna-ui:latest#image: opea/chatqna-ui:${IMAGE_TAG}#g" docker_compose.yaml
-    # sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" docker_compose.yaml
+    if [[ "$IMAGE_REPO" != "" ]]; then
+        # Replace the container name with a test-specific name
+        echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
+        sed -i "s#image: opea/audioqna:latest#image: opea/audioqna:${IMAGE_TAG}#g" compose.yaml
+        sed -i "s#image: opea/audioqna-ui:latest#image: opea/audioqna-ui:${IMAGE_TAG}#g" compose.yaml
+        sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" compose.yaml
+        echo "cat compose.yaml"
+        cat compose.yaml
+    fi
+
    # Start Docker Containers
-    docker compose -f docker_compose.yaml up -d
-    # n=0
-    # until [[ "$n" -ge 200 ]]; do
-    #     docker logs tgi-gaudi-server > tgi_service_start.log
-    #     if grep -q Connected tgi_service_start.log; then
-    #         break
-    #     fi
-    #     sleep 1s
-    #     n=$((n+1))
-    # done
-    sleep 8m
+    docker compose up -d
+    n=0
+    until [[ "$n" -ge 500 ]]; do
+       docker logs tgi-gaudi-server > $LOG_PATH/tgi_service_start.log
+       if grep -q Connected $LOG_PATH/tgi_service_start.log; then
+           break
+       fi
+       sleep 1s
+       n=$((n+1))
+    done
 }


 function validate_megaservice() {
    result=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json')
+    echo "result is === $result"
    if [[ $result == *"AAA"* ]]; then
        echo "Result correct."
    else
+        docker logs whisper-service > $LOG_PATH/whisper-service.log
+        docker logs asr-service > $LOG_PATH/asr-service.log
+        docker logs speecht5-service > $LOG_PATH/tts-service.log
+        docker logs tts-service > $LOG_PATH/tts-service.log
+        docker logs tgi-gaudi-server > $LOG_PATH/tgi-gaudi-server.log
+        docker logs llm-tgi-gaudi-server > $LOG_PATH/llm-tgi-gaudi-server.log
+
        echo "Result wrong."
        exit 1
    fi
@@ -112,24 +125,14 @@ function validate_megaservice() {

 function stop_docker() {
    cd $WORKPATH/docker/gaudi
-    container_list=$(cat docker_compose.yaml | grep container_name | cut -d':' -f2)
-    for container_name in $container_list; do
-        cid=$(docker ps -aq --filter "name=$container_name")
-        if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
-    done
+    docker compose stop && docker compose rm -f
 }

 function main() {

    stop_docker
-    # begin_time=$(date +%s)
-    build_docker_images
-    # start_time=$(date +%s)
+    if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi
    start_services
-    # end_time=$(date +%s)
-    # minimal_duration=$((end_time-start_time))
-    # maximal_duration=$((end_time-begin_time))
-    # echo "Mega service start duration is "$maximal_duration"s"

    # validate_microservices
    validate_megaservice
--- a/AudioQnA/tests/test_audioqna_on_xeon.sh
+++ b/AudioQnA/tests/test_audioqna_on_xeon.sh
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0

 set -e
+echo "IMAGE_REPO=${IMAGE_REPO}"

 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
@@ -19,7 +20,7 @@ function build_docker_images() {
    docker build -t opea/speecht5:latest -f comps/tts/speecht5/Dockerfile .
    docker build -t opea/tts:latest -f comps/tts/Dockerfile .

-    docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1

    cd $WORKPATH/docker
    docker build --no-cache -t opea/audioqna:latest -f Dockerfile .
@@ -50,31 +51,44 @@ function start_services() {

    # sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env

-    # Replace the container name with a test-specific name
-    # echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
-    # sed -i "s#image: opea/chatqna:latest#image: opea/chatqna:${IMAGE_TAG}#g" docker_compose.yaml
-    # sed -i "s#image: opea/chatqna-ui:latest#image: opea/chatqna-ui:${IMAGE_TAG}#g" docker_compose.yaml
-    # sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" docker_compose.yaml
+    if [[ "$IMAGE_REPO" != "" ]]; then
+        # Replace the container name with a test-specific name
+        echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
+        sed -i "s#image: opea/audioqna:latest#image: opea/audioqna:${IMAGE_TAG}#g" compose.yaml
+        sed -i "s#image: opea/audioqna-ui:latest#image: opea/audioqna-ui:${IMAGE_TAG}#g" compose.yaml
+        sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" compose.yaml
+        echo "cat compose.yaml"
+        cat compose.yaml
+    fi
+
    # Start Docker Containers
-    docker compose -f docker_compose.yaml up -d
-    sleep 3m
-    # n=0
-    # until [[ "$n" -ge 200 ]]; do
-    #     docker logs tgi-service > tgi_service_start.log
-    #     if grep -q Connected tgi_service_start.log; then
-    #         break
-    #     fi
-    #     sleep 1s
-    #     n=$((n+1))
-    # done
+    docker compose up -d
+    n=0
+    until [[ "$n" -ge 500 ]]; do
+       docker logs tgi-service > $LOG_PATH/tgi_service_start.log
+       if grep -q Connected $LOG_PATH/tgi_service_start.log; then
+           break
+       fi
+       sleep 1s
+       n=$((n+1))
+    done
 }


 function validate_megaservice() {
    result=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json')
+    echo $result
    if [[ $result == *"AAA"* ]]; then
        echo "Result correct."
    else
+        docker logs whisper-service > $LOG_PATH/whisper-service.log
+        docker logs asr-service > $LOG_PATH/asr-service.log
+        docker logs speecht5-service > $LOG_PATH/tts-service.log
+        docker logs tts-service > $LOG_PATH/tts-service.log
+        docker logs tgi-service > $LOG_PATH/tgi-service.log
+        docker logs llm-tgi-server > $LOG_PATH/llm-tgi-server.log
+        docker logs audioqna-xeon-backend-server > $LOG_PATH/audioqna-xeon-backend-server.log
+
        echo "Result wrong."
        exit 1
    fi
@@ -108,22 +122,14 @@ function validate_megaservice() {

 function stop_docker() {
    cd $WORKPATH/docker/xeon
-    container_list=$(cat docker_compose.yaml | grep container_name | cut -d':' -f2)
-    for container_name in $container_list; do
-        cid=$(docker ps -aq --filter "name=$container_name")
-        if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
-    done
+    docker compose stop && docker compose rm -f
 }

 function main() {

    stop_docker
-    build_docker_images
-    # begin_time=$(date +%s)
+    if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi
    start_services
-    # end_time=$(date +%s)
-    # maximal_duration=$((end_time-begin_time))
-    # echo "Mega service start duration is "$maximal_duration"s" && sleep 1s

    validate_megaservice
    # validate_frontend
--- a/ChatQnA/README.md
+++ b/ChatQnA/README.md
@@ -8,24 +8,158 @@ ChatQnA architecture shows below:

 ![architecture](./assets/img/chatqna_architecture.png)

+ChatQnA is implemented on top of [GenAIComps](https://github.com/opea-project/GenAIComps), the ChatQnA Flow Chart shows below:
+
+![Flow Chart](./assets/img/chatqna_flow_chart.png)
+
 This ChatQnA use case performs RAG using LangChain, Redis VectorDB and Text Generation Inference on Intel Gaudi2 or Intel XEON Scalable Processors. The Intel Gaudi2 accelerator supports both training and inference for deep learning models in particular for LLMs. Please visit [Habana AI products](https://habana.ai/products) for more details.

 # Deploy ChatQnA Service

 The ChatQnA service can be effortlessly deployed on either Intel Gaudi2 or Intel XEON Scalable Processors.

+Currently we support two ways of deploying ChatQnA services with docker compose:
+
+1. Start services using the docker image on `docker hub`:
+
+```bash
+docker pull opea/chatqna:latest
+```
+
+Two type of UI are supported now, choose one you like and pull the referred docker image.
+
+If you choose conversational UI, follow the [instruction](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker/gaudi#-launch-the-conversational-ui-optional) and modify the [compose.yaml](./docker/xeon/compose.yaml).
+
+```bash
+docker pull opea/chatqna-ui:latest
+# or
+docker pull opea/chatqna-conversation-ui:latest
+```
+
+2. Start services using the docker images `built from source`: [Guide](./docker)
+
+## Setup Environment Variable
+
+To set up environment variables for deploying ChatQnA services, follow these steps:
+
+1. Set the required environment variables:
+
+```bash
+# Example: host_ip="192.168.1.1"
+export host_ip="External_Public_IP"
+# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+export no_proxy="Your_No_Proxy"
+export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+```
+
+2. If you are in a proxy environment, also set the proxy-related environment variables:
+
+```bash
+export http_proxy="Your_HTTP_Proxy"
+export https_proxy="Your_HTTPs_Proxy"
+```
+
+3. Set up other environment variables:
+
+> Notice that you can only choose <b>one</b> command below to set up envs according to your hardware. Other that the port numbers may be set incorrectly.
+
+```bash
+# on Gaudi
+source ./docker/gaudi/set_env.sh
+# on Xeon
+source ./docker/xeon/set_env.sh
+# on Nvidia GPU
+source ./docker/gpu/set_env.sh
+```
+
 ## Deploy ChatQnA on Gaudi

-Refer to the [Gaudi Guide](./docker/gaudi/README.md) for instructions on deploying ChatQnA on Gaudi.
+Please find corresponding [compose.yaml](./docker/gaudi/compose.yaml).
+
+```bash
+cd GenAIExamples/ChatQnA/docker/gaudi/
+docker compose up -d
+```
+
+> Notice: Currently only the <b>Habana Driver 1.16.x</b> is supported for Gaudi.
+
+Please refer to the [Gaudi Guide](./docker/gaudi/README.md) to build docker images from source.

 ## Deploy ChatQnA on Xeon

-Refer to the [Xeon Guide](./docker/xeon/README.md) for instructions on deploying ChatQnA on Xeon.
+Please find corresponding [compose.yaml](./docker/xeon/compose.yaml).
+
+```bash
+cd GenAIExamples/ChatQnA/docker/xeon/
+docker compose up -d
+```
+
+Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on building docker images from source.

 ## Deploy ChatQnA on NVIDIA GPU

-Refer to the [NVIDIA GPU Guide](./docker/gpu/README.md) for instructions on deploying ChatQnA on NVIDIA GPU.
+```bash
+cd GenAIExamples/ChatQnA/docker/gpu/
+docker compose up -d
+```

-## Deploy ChatQnA into Kubernetes on Xeon & Gaudi
+Refer to the [NVIDIA GPU Guide](./docker/gpu/README.md) for more instructions on building docker images from source.

-Refer to the [Kubernetes Guide](./kubernetes/manifests/README.md) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi.
+## Deploy ChatQnA into Kubernetes on Xeon & Gaudi with GMC
+
+Refer to the [Kubernetes Guide](./kubernetes/README.md) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi with GMC.
+
+## Deploy ChatQnA into Kubernetes on Xeon & Gaudi without GMC
+
+Refer to the [Kubernetes Guide](./kubernetes/manifests/README.md) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi without GMC.
+
+## Deploy ChatQnA into Kubernetes using Helm Chart
+
+Install Helm (version >= 3.15) first. Please refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
+
+Refer to the [ChatQnA helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi.
+
+## Deploy ChatQnA on AI PC
+
+Refer to the [AI PC Guide](./docker/aipc/README.md) for instructions on deploying ChatQnA on AI PC.
+
+# Consume ChatQnA Service
+
+Two ways of consuming ChatQnA Service:
+
+1. Use cURL command on terminal
+
+```bash
+curl http://${host_ip}:8888/v1/chatqna \
+    -H "Content-Type: application/json" \
+    -d '{
+        "messages": "What is the revenue of Nike in 2023?"
+    }'
+```
+
+2. Access via frontend
+
+To access the frontend, open the following URL in your browser: `http://{host_ip}:5173`
+
+By default, the UI runs on port 5173 internally.
+
+If you choose conversational UI, use this URL: `http://{host_ip}:5174`
+
+# Troubleshooting
+
+1. If you get errors like "Access Denied", please [validate micro service](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker/xeon#validate-microservices) first. A simple example:
+
+```bash
+http_proxy="" curl ${host_ip}:6006/embed -X POST  -d '{"inputs":"What is Deep Learning?"}' -H 'Content-Type: application/json'
+```
+
+2. (Docker only) If all microservices work well, please check the port ${host_ip}:8888, the port may be allocated by other users, you can modify the `compose.yaml`.
+
+3. (Docker only) If you get errors like "The container name is in use", please change container name in `compose.yaml`.
+
+# Monitoring OPEA Service with Prometheus and Grafana dashboard
+
+OPEA microservice deployment can easily be monitored through Grafana dashboards in conjunction with Prometheus data collection. Please follow the [README](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/grafana/README.md) to setup Prometheus and Grafana servers and import dashboards to monitor the OPEA service.
+
+![chatqna dashboards](./assets/img/chatqna_dashboards.png)
+![tgi dashboard](./assets/img/tgi_dashboard.png)
--- a/ChatQnA/assets/img/chatqna_dashboards.png
+++ b/ChatQnA/assets/img/chatqna_dashboards.png
--- a/ChatQnA/assets/img/chatqna_flow_chart.png
+++ b/ChatQnA/assets/img/chatqna_flow_chart.png
--- a/ChatQnA/assets/img/tgi_dashboard.png
+++ b/ChatQnA/assets/img/tgi_dashboard.png
--- a/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml
+++ b/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml
@@ -0,0 +1,21 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  LLM_SERVICE_HOST_IP: llm-svc
--- a/ChatQnA/benchmark/single_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/chatqna_mega_service_run.yaml
@@ -0,0 +1,60 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+        resources:
+          limits:
+            cpu: 8
+            memory: 4000Mi
+          requests:
+            cpu: 8
+            memory: 4000Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chaqna-backend-server-svc
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
--- a/ChatQnA/benchmark/single_gaudi/dataprep-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/dataprep-microservice_run.yaml
@@ -0,0 +1,68 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
--- a/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml
@@ -1,32 +1,33 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: tei-embedding-service-deploy
+  name: embedding-dependency-deploy
+  namespace: default
 spec:
-  replicas: 1
+  replicas: 4
  selector:
    matchLabels:
-      app: tei-embedding-service-deploy
+      app: embedding-dependency-deploy
  template:
    metadata:
      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: "true"
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
-        app: tei-embedding-service-deploy
+        app: embedding-dependency-deploy
    spec:
      containers:
      - envFrom:
        - configMapRef:
            name: qna-config
        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
-        name: tei-embedding-service-deploy
+        name: embedding-dependency-deploy
        args:
        - --model-id
        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
        volumeMounts:
        - mountPath: /data
          name: model-volume
@@ -34,20 +35,18 @@ spec:
          name: shm
        ports:
        - containerPort: 80
-        # resources:
-        #   limits:
-        #     cpu: 56000m
-        #     memory: 26Gi
-        #   requests:
-        #     cpu: 56000m
-        #     memory: 26Gi
+        resources:
+          limits:
+            cpu: 24
+            memory: 4000Mi
+          requests:
+            cpu: 24
+            memory: 4000Mi
      serviceAccountName: default
      volumes:
      - name: model-volume
        hostPath:
-          # directory location on host
-          path: /mnt/models
-          # this field is optional
+          path: /home/sdp/cesg
          type: Directory
      - name: shm
        emptyDir:
@@ -57,12 +56,12 @@ spec:
 kind: Service
 apiVersion: v1
 metadata:
-  name: tei-embedding-svc
+  name: embedding-dependency-svc
 spec:
  type: ClusterIP
  selector:
-    app: tei-embedding-service-deploy
+    app: embedding-dependency-deploy
  ports:
-    - name: service
-      port: 6006
-      targetPort: 80
+  - name: service
+    port: 6006
+    targetPort: 80
--- a/ChatQnA/benchmark/single_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/embedding-microservice_run.yaml
@@ -1,11 +1,11 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: embedding-deploy
+  namespace: default
 spec:
  replicas: 1
  selector:
@@ -14,10 +14,17 @@ spec:
  template:
    metadata:
      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: "true"
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
        app: embedding-deploy
    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
      hostIPC: true
      containers:
      - envFrom:
@@ -26,9 +33,14 @@ spec:
        image: opea/embedding-tei:latest
        imagePullPolicy: IfNotPresent
        name: embedding-deploy
-        args:
+        args: null
        ports:
        - containerPort: 6000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
      serviceAccountName: default
 ---
 kind: Service
@@ -40,6 +52,6 @@ spec:
  selector:
    app: embedding-deploy
  ports:
-    - name: service
-      port: 6000
-      targetPort: 6000
+  - name: service
+    port: 6000
+    targetPort: 6000
--- a/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml
@@ -1,33 +1,41 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: tgi-gaudi-service-deploy
+  name: llm-dependency-deploy
+  namespace: default
 spec:
-  replicas: 1
+  replicas: 7
  selector:
    matchLabels:
-      app: tgi-gaudi-service-deploy
+      app: llm-dependency-deploy
  template:
    metadata:
      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: "true"
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
-        app: tgi-gaudi-service-deploy
+        app: llm-dependency-deploy
    spec:
      hostIPC: true
      containers:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:1.2.1
-        name: tgi-gaudi-service-deploy-demo
+        image: tgi_gaudi:2.0.1
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
        args:
        - --model-id
        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
        volumeMounts:
        - mountPath: /data
          name: model-volume
@@ -38,14 +46,22 @@ spec:
        resources:
          limits:
            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: $(HF_TOKEN)
      serviceAccountName: default
-      nodeSelector:
      volumes:
      - name: model-volume
        hostPath:
-          # directory location on host
-          path: /mnt/models
-          # this field is optional
+          path: /home/sdp/cesg
          type: Directory
      - name: shm
        emptyDir:
@@ -55,12 +71,12 @@ spec:
 kind: Service
 apiVersion: v1
 metadata:
-  name: tgi-gaudi-svc
+  name: llm-dependency-svc
 spec:
  type: ClusterIP
  selector:
-    app: tgi-gaudi-service-deploy
+    app: llm-dependency-deploy
  ports:
-    - name: service
-      port: 9009
-      targetPort: 80
+  - name: service
+    port: 9009
+    targetPort: 80
--- a/ChatQnA/benchmark/single_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/llm-microservice_run.yaml
@@ -1,11 +1,11 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: llm-deploy
+  namespace: default
 spec:
  replicas: 1
  selector:
@@ -14,10 +14,17 @@ spec:
  template:
    metadata:
      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: "true"
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
        app: llm-deploy
    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
      hostIPC: true
      containers:
      - envFrom:
@@ -26,9 +33,14 @@ spec:
        image: opea/llm-tgi:latest
        imagePullPolicy: IfNotPresent
        name: llm-deploy
-        args:
+        args: null
        ports:
        - containerPort: 9000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
      serviceAccountName: default
 ---
 kind: Service
@@ -40,6 +52,6 @@ spec:
  selector:
    app: llm-deploy
  ports:
-    - name: service
-      port: 9000
-      targetPort: 9000
+  - name: service
+    port: 9000
+    targetPort: 9000
--- a/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml
@@ -0,0 +1,83 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-dependency-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-dependency-deploy
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: tei_gaudi:rerank
+        name: reranking-dependency-deploy
+        args:
+        - --model-id
+        - $(RERANK_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: $(HF_TOKEN)
+        - name: MAX_WARMUP_SEQUENCE_LENGTH
+          value: '512'
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /home/sdp/cesg
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-dependency-deploy
+  ports:
+  - name: service
+    port: 8808
+    targetPort: 80
--- a/ChatQnA/benchmark/single_gaudi/reranking-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/reranking-microservice_run.yaml
@@ -1,11 +1,11 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: reranking-deploy
+  namespace: default
 spec:
  replicas: 1
  selector:
@@ -14,10 +14,17 @@ spec:
  template:
    metadata:
      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: "true"
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
        app: reranking-deploy
    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-deploy
      hostIPC: true
      containers:
      - envFrom:
@@ -26,9 +33,14 @@ spec:
        image: opea/reranking-tei:latest
        imagePullPolicy: IfNotPresent
        name: reranking-deploy
-        args:
+        args: null
        ports:
        - containerPort: 8000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
      serviceAccountName: default
 ---
 kind: Service
@@ -40,6 +52,6 @@ spec:
  selector:
    app: reranking-deploy
  ports:
-    - name: service
-      port: 8000
-      targetPort: 8000
+  - name: service
+    port: 8000
+    targetPort: 8000
--- a/ChatQnA/benchmark/single_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/retrieval-microservice_run.yaml
@@ -0,0 +1,67 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+        resources:
+          limits:
+            cpu: 8
+            memory: 2500Mi
+          requests:
+            cpu: 8
+            memory: 2500Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
--- a/ChatQnA/benchmark/single_gaudi/vector-db_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/vector-db_run.yaml
@@ -1,42 +1,46 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-# Redis Vector DB Deployment
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: redis-vector-db
+  name: vector-db
 spec:
  replicas: 1
  selector:
    matchLabels:
-      app: redis-vector-db
+      app: vector-db
  template:
    metadata:
      labels:
-        app: redis-vector-db
+        app: vector-db
    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
      containers:
-      - name: redis-vector-db
+      - name: vector-db
        image: redis/redis-stack:7.2.0-v9
        ports:
        - containerPort: 6379
        - containerPort: 8001
-
 ---
-# Redis Vector DB Service
 apiVersion: v1
 kind: Service
 metadata:
-  name: redis-vector-db
+  name: vector-db
 spec:
  type: ClusterIP
  selector:
-    app: redis-vector-db
+    app: vector-db
  ports:
-  - name: redis-service
+  - name: vector-db-service
    port: 6379
    targetPort: 6379
-  - name: redis-insight
+  - name: vector-db-insight
    port: 8001
    targetPort: 8001
--- a/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml
+++ b/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml
@@ -0,0 +1,22 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: 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
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  LLM_SERVICE_HOST_IP: llm-svc
--- a/ChatQnA/benchmark/two_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/chatqna_mega_service_run.yaml
@@ -0,0 +1,60 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+        resources:
+          limits:
+            cpu: 8
+            memory: 4000Mi
+          requests:
+            cpu: 8
+            memory: 4000Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chaqna-backend-server-svc
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
--- a/ChatQnA/benchmark/two_gaudi/dataprep-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/dataprep-microservice_run.yaml
@@ -0,0 +1,68 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
--- a/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml
@@ -1,32 +1,33 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: tei-reranking-service-deploy
+  name: embedding-dependency-deploy
+  namespace: default
 spec:
-  replicas: 1
+  replicas: 10
  selector:
    matchLabels:
-      app: tei-reranking-service-deploy
+      app: embedding-dependency-deploy
  template:
    metadata:
      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: "true"
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
      labels:
-        app: tei-reranking-service-deploy
+        app: embedding-dependency-deploy
    spec:
      containers:
      - envFrom:
        - configMapRef:
            name: qna-config
        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
-        name: tei-reranking-service-deploy
+        name: embedding-dependency-deploy
        args:
        - --model-id
-        - $(RERANK_MODEL_ID)
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
        volumeMounts:
        - mountPath: /data
          name: model-volume
@@ -34,20 +35,18 @@ spec:
          name: shm
        ports:
        - containerPort: 80
-        # resources:
-        #   limits:
-        #     cpu: 56000m
-        #     memory: 26Gi
-        #   requests:
-        #     cpu: 56000m
-        #     memory: 26Gi
+        resources:
+          limits:
+            cpu: 24
+            memory: 4000Mi
+          requests:
+            cpu: 24
+            memory: 4000Mi
      serviceAccountName: default
      volumes:
      - name: model-volume
        hostPath:
-          # directory location on host
-          path: /mnt/models
-          # this field is optional
+          path: /home/sdp/cesg
          type: Directory
      - name: shm
        emptyDir:
@@ -57,12 +56,12 @@ spec:
 kind: Service
 apiVersion: v1
 metadata:
-  name: tei-reranking-svc
+  name: embedding-dependency-svc
 spec:
  type: ClusterIP
  selector:
-    app: tei-reranking-service-deploy
+    app: embedding-dependency-deploy
  ports:
-    - name: service
-      port: 8808
-      targetPort: 80
+  - name: service
+    port: 6006
+    targetPort: 80
--- a/ChatQnA/benchmark/two_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/embedding-microservice_run.yaml
@@ -0,0 +1,57 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
--- a/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml
@@ -0,0 +1,82 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 15
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: tgi_gaudi:2.0.1
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: $(HF_TOKEN)
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /home/sdp/cesg
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
--- a/ChatQnA/benchmark/two_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/llm-microservice_run.yaml
@@ -0,0 +1,57 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
--- a/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml
@@ -0,0 +1,83 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-dependency-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-dependency-deploy
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: tei_gaudi:rerank
+        name: reranking-dependency-deploy
+        args:
+        - --model-id
+        - $(RERANK_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: $(HF_TOKEN)
+        - name: MAX_WARMUP_SEQUENCE_LENGTH
+          value: '512'
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /home/sdp/cesg
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-dependency-deploy
+  ports:
+  - name: service
+    port: 8808
+    targetPort: 80
--- a/ChatQnA/benchmark/two_gaudi/reranking-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/reranking-microservice_run.yaml
@@ -0,0 +1,57 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/reranking-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: reranking-deploy
+        args: null
+        ports:
+        - containerPort: 8000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-deploy
+  ports:
+  - name: service
+    port: 8000
+    targetPort: 8000
--- a/ChatQnA/benchmark/two_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/retrieval-microservice_run.yaml
@@ -0,0 +1,67 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+        resources:
+          limits:
+            cpu: 8
+            memory: 2500Mi
+          requests:
+            cpu: 8
+            memory: 2500Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
--- a/ChatQnA/benchmark/two_gaudi/vector-db_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/vector-db_run.yaml
@@ -0,0 +1,46 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
--- a/ChatQnA/chatqna.yaml
+++ b/ChatQnA/chatqna.yaml
@@ -45,10 +45,10 @@ opea_micro_services:
    ports: ${RERANK_SERVICE_PORT}
    image: opea/reranking-tei:latest
    endpoint: /v1/reranking
-  tgi_service:
+  tgi-service:
    host: ${TGI_SERVICE_IP}
    ports: ${TGI_SERVICE_PORT}
-    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
    volumes:
      - "./data:/data"
    runtime: habana
--- a/ChatQnA/docker/Dockerfile_guardrails
+++ b/ChatQnA/docker/Dockerfile_guardrails
@@ -0,0 +1,32 @@
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    vim \
+    git
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+RUN cd /home/user/ && \
+    git clone https://github.com/opea-project/GenAIComps.git
+
+RUN cd /home/user/GenAIComps && pip install --no-cache-dir --upgrade pip && \
+    pip install -r /home/user/GenAIComps/requirements.txt
+
+COPY ./chatqna_guardrails.py /home/user/chatqna_guardrails.py
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
+
+USER user
+
+WORKDIR /home/user
+
+ENTRYPOINT ["python", "chatqna_guardrails.py"]
--- a/ChatQnA/docker/aipc/README.md
+++ b/ChatQnA/docker/aipc/README.md
@@ -0,0 +1,270 @@
+# Build Mega Service of ChatQnA on AIPC
+
+This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on AIPC. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`.
+
+## 🚀 Build Docker Images
+
+First of all, you need to build Docker Images locally and install the python package of it.
+
+```bash
+git clone https://github.com/opea-project/GenAIComps.git
+cd GenAIComps
+```
+
+### 1. Build Embedding Image
+
+```bash
+docker build --no-cache -t opea/embedding-tei:latest -f comps/embeddings/langchain/docker/Dockerfile .
+```
+
+### 2. Build Retriever Image
+
+```bash
+docker build --no-cache -t opea/retriever-redis:latest -f comps/retrievers/langchain/redis/docker/Dockerfile .
+```
+
+### 3. Build Rerank Image
+
+```bash
+docker build --no-cache -t opea/reranking-tei:latest -f comps/reranks/tei/docker/Dockerfile .
+```
+
+### 4. Build LLM Image
+
+We use [Ollama](https://ollama.com/) as our LLM service for AIPC. Please pre-download Ollama on your PC.
+
+```bash
+docker build --no-cache -t opea/llm-ollama:latest -f comps/llms/text-generation/ollama/Dockerfile .
+```
+
+### 5. Build Dataprep Image
+
+```bash
+docker build --no-cache -t opea/dataprep-redis:latest -f comps/dataprep/redis/langchain/docker/Dockerfile .
+cd ..
+```
+
+### 6. Build MegaService Docker Image
+
+To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:
+
+```bash
+git clone https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples/ChatQnA/docker
+docker build --no-cache -t opea/chatqna:latest -f Dockerfile .
+cd ../../..
+```
+
+### 7. Build UI Docker Image
+
+Build frontend Docker image via below command:
+
+```bash
+cd GenAIExamples/ChatQnA/docker/ui/
+docker build --no-cache -t opea/chatqna-ui:latest -f ./docker/Dockerfile .
+cd ../../../..
+```
+
+Then run the command `docker images`, you will have the following 7 Docker Images:
+
+1. `opea/dataprep-redis:latest`
+2. `opea/embedding-tei:latest`
+3. `opea/retriever-redis:latest`
+4. `opea/reranking-tei:latest`
+5. `opea/llm-ollama:latest`
+6. `opea/chatqna:latest`
+7. `opea/chatqna-ui:latest`
+
+## 🚀 Start Microservices
+
+### Setup Environment Variables
+
+Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
+
+**Export the value of the public IP address of your AIPC to the `host_ip` environment variable**
+
+> Change the External_Public_IP below with the actual IPV4 value
+
+```
+export host_ip="External_Public_IP"
+```
+
+For Linux users, please run `hostname -I | awk '{print $1}'`. For Windows users, please run `ipconfig | findstr /i "IPv4"` to get the external public ip.
+
+**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable**
+
+> Change the Your_Huggingface_API_Token below with tyour actual Huggingface API Token value
+
+```
+export your_hf_api_token="Your_Huggingface_API_Token"
+```
+
+**Append the value of the public IP address to the no_proxy list**
+
+```
+export your_no_proxy=${your_no_proxy},"External_Public_IP"
+```
+
+- Linux PC
+
+```bash
+export no_proxy=${your_no_proxy}
+export http_proxy=${your_http_proxy}
+export https_proxy=${your_http_proxy}
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP=${host_ip}
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+
+export OLLAMA_ENDPOINT=http://${host_ip}:11434
+export OLLAMA_MODEL="llama3"
+```
+
+- Windows PC
+
+```bash
+set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
+set RERANK_MODEL_ID=BAAI/bge-reranker-base
+set TEI_EMBEDDING_ENDPOINT=http://%host_ip%:6006
+set TEI_RERANKING_ENDPOINT=http://%host_ip%:8808
+set REDIS_URL=redis://%host_ip%:6379
+set INDEX_NAME=rag-redis
+set HUGGINGFACEHUB_API_TOKEN=%your_hf_api_token%
+set MEGA_SERVICE_HOST_IP=%host_ip%
+set EMBEDDING_SERVICE_HOST_IP=%host_ip%
+set RETRIEVER_SERVICE_HOST_IP=%host_ip%
+set RERANK_SERVICE_HOST_IP=%host_ip%
+set LLM_SERVICE_HOST_IP=%host_ip%
+set BACKEND_SERVICE_ENDPOINT=http://%host_ip%:8888/v1/chatqna
+set DATAPREP_SERVICE_ENDPOINT=http://%host_ip%:6007/v1/dataprep
+
+set OLLAMA_ENDPOINT=http://host.docker.internal:11434
+set OLLAMA_MODEL="llama3"
+```
+
+Note: Please replace with `host_ip` with you external IP address, do not use localhost.
+
+### Start all the services Docker Containers
+
+> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file
+
+```bash
+cd GenAIExamples/ChatQnA/docker/aipc/
+docker compose up -d
+
+# let ollama service runs
+# e.g. ollama run llama3
+ollama run $OLLAMA_MODEL
+# for windows
+# ollama run %OLLAMA_MODEL%
+```
+
+### Validate Microservices
+
+1. TEI Embedding Service
+
+```bash
+curl ${host_ip}:6006/embed \
+    -X POST \
+    -d '{"inputs":"What is Deep Learning?"}' \
+    -H 'Content-Type: application/json'
+```
+
+2. Embedding Microservice
+
+```bash
+curl http://${host_ip}:6000/v1/embeddings\
+  -X POST \
+  -d '{"text":"hello"}' \
+  -H 'Content-Type: application/json'
+```
+
+3. Retriever Microservice  
+   To validate the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:
+
+```bash
+export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
+curl http://${host_ip}:7000/v1/retrieval \
+  -X POST \
+  -d '{"text":"What is the revenue of Nike in 2023?","embedding":"'"${your_embedding}"'"}' \
+  -H 'Content-Type: application/json'
+```
+
+4. TEI Reranking Service
+
+```bash
+curl http://${host_ip}:8808/rerank \
+    -X POST \
+    -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
+    -H 'Content-Type: application/json'
+```
+
+5. Reranking Microservice
+
+```bash
+curl http://${host_ip}:8000/v1/reranking\
+  -X POST \
+  -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
+  -H 'Content-Type: application/json'
+```
+
+6. Ollama Service
+
+```bash
+curl http://${host_ip}:11434/api/generate -d '{"model": "llama3", "prompt":"What is Deep Learning?"}'
+```
+
+7. LLM Microservice
+
+```bash
+curl http://${host_ip}:9000/v1/chat/completions\
+  -X POST \
+  -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+  -H 'Content-Type: application/json'
+```
+
+8. MegaService
+
+```bash
+curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
+     "messages": "What is the revenue of Nike in 2023?", "model": "'"${OLLAMA_MODEL}"'"
+     }'
+```
+
+9. Dataprep Microservice（Optional）
+
+If you want to update the default knowledge base, you can use the following commands:
+
+Update Knowledge Base via Local File Upload:
+
+```bash
+curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+     -H "Content-Type: multipart/form-data" \
+     -F "files=@./nke-10k-2023.pdf"
+```
+
+This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
+
+Add Knowledge Base via HTTP Links:
+
+```bash
+curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+     -H "Content-Type: multipart/form-data" \
+     -F 'link_list=["https://opea.dev"]'
+```
+
+This command updates a knowledge base by submitting a list of HTTP links for processing.
+
+## 🚀 Launch the UI
+
+To access the frontend, open the following URL in your browser: http://{host_ip}:5173.
--- a/ChatQnA/docker/aipc/compose.yaml
+++ b/ChatQnA/docker/aipc/compose.yaml
@@ -0,0 +1,171 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    ports:
+      - "6379:6379"
+      - "8001:8001"
+  dataprep-redis-service:
+    image: opea/dataprep-redis:latest
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "6007:6007"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+  tei-embedding-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-embedding-server
+    ports:
+      - "6006:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
+  embedding:
+    image: opea/embedding-tei:latest
+    container_name: embedding-tei-server
+    depends_on:
+      - tei-embedding-service
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-embedding-service"
+    restart: unless-stopped
+  retriever:
+    image: opea/retriever-redis:latest
+    container_name: retriever-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "7000:7000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-retriever-service"
+    restart: unless-stopped
+  tei-reranking-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-reranking-server
+    ports:
+      - "8808:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
+  reranking:
+    image: opea/reranking-tei:latest
+    container_name: reranking-tei-aipc-server
+    depends_on:
+      - tei-reranking-service
+    ports:
+      - "8000:8000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-reranking-service"
+    restart: unless-stopped
+  llm:
+    image: opea/llm-ollama
+    container_name: llm-ollama
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+      OLLAMA_ENDPOINT: ${OLLAMA_ENDPOINT}
+  chaqna-aipc-backend-server:
+    image: opea/chatqna:latest
+    container_name: chatqna-aipc-backend-server
+    depends_on:
+      - redis-vector-db
+      - tei-embedding-service
+      - embedding
+      - retriever
+      - tei-reranking-service
+      - reranking
+      - llm
+    ports:
+      - "8888:8888"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
+      - EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
+      - RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
+      - RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
+      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+    ipc: host
+    restart: always
+  chaqna-aipc-ui-server:
+    image: opea/chatqna-ui:latest
+    container_name: chatqna-aipc-ui-server
+    depends_on:
+      - chaqna-aipc-backend-server
+    ports:
+      - "5173:5173"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
+      - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
+    ipc: host
+    restart: always
+
+networks:
+  default:
+    driver: bridge
--- a/ChatQnA/docker/chatqna.py
+++ b/ChatQnA/docker/chatqna.py
@@ -1,7 +1,6 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-import asyncio
 import os

 from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
--- a/ChatQnA/docker/chatqna_guardrails.py
+++ b/ChatQnA/docker/chatqna_guardrails.py
@@ -0,0 +1,88 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
+
+MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
+MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
+GUARDRAIL_SERVICE_HOST_IP = os.getenv("GUARDRAIL_SERVICE_HOST_IP", "0.0.0.0")
+GUARDRAIL_SERVICE_PORT = int(os.getenv("GUARDRAIL_SERVICE_PORT", 9090))
+EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
+EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
+RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
+RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
+RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
+RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
+LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
+LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
+
+
+class ChatQnAService:
+    def __init__(self, host="0.0.0.0", port=8000):
+        self.host = host
+        self.port = port
+        self.megaservice = ServiceOrchestrator()
+
+    def add_remote_service(self):
+        guardrail_in = MicroService(
+            name="guardrail_in",
+            host=GUARDRAIL_SERVICE_HOST_IP,
+            port=GUARDRAIL_SERVICE_PORT,
+            endpoint="/v1/guardrails",
+            use_remote_service=True,
+            service_type=ServiceType.GUARDRAIL,
+        )
+        embedding = MicroService(
+            name="embedding",
+            host=EMBEDDING_SERVICE_HOST_IP,
+            port=EMBEDDING_SERVICE_PORT,
+            endpoint="/v1/embeddings",
+            use_remote_service=True,
+            service_type=ServiceType.EMBEDDING,
+        )
+        retriever = MicroService(
+            name="retriever",
+            host=RETRIEVER_SERVICE_HOST_IP,
+            port=RETRIEVER_SERVICE_PORT,
+            endpoint="/v1/retrieval",
+            use_remote_service=True,
+            service_type=ServiceType.RETRIEVER,
+        )
+        rerank = MicroService(
+            name="rerank",
+            host=RERANK_SERVICE_HOST_IP,
+            port=RERANK_SERVICE_PORT,
+            endpoint="/v1/reranking",
+            use_remote_service=True,
+            service_type=ServiceType.RERANK,
+        )
+        llm = MicroService(
+            name="llm",
+            host=LLM_SERVICE_HOST_IP,
+            port=LLM_SERVICE_PORT,
+            endpoint="/v1/chat/completions",
+            use_remote_service=True,
+            service_type=ServiceType.LLM,
+        )
+        guardrail_out = MicroService(
+            name="guardrail_out",
+            host=GUARDRAIL_SERVICE_HOST_IP,
+            port=GUARDRAIL_SERVICE_PORT,
+            endpoint="/v1/guardrails",
+            use_remote_service=True,
+            service_type=ServiceType.GUARDRAIL,
+        )
+        self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm).add(guardrail_out)
+        self.megaservice.flow_to(guardrail_in, embedding)
+        self.megaservice.flow_to(embedding, retriever)
+        self.megaservice.flow_to(retriever, rerank)
+        self.megaservice.flow_to(rerank, llm)
+        self.megaservice.flow_to(llm, guardrail_out)
+        self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
+
+
+if __name__ == "__main__":
+    chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
+    chatqna.add_remote_service()
--- a/ChatQnA/docker/gaudi/README.md
+++ b/ChatQnA/docker/gaudi/README.md
@@ -28,15 +28,61 @@ docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$
 ### 4. Build Rerank Image

 ```bash
-docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/langchain/docker/Dockerfile .
+docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/docker/Dockerfile .
 ```

 ### 5. Build LLM Image

+You can use different LLM serving solutions, choose one of following four options.
+
+#### 5.1 Use TGI
+
 ```bash
 docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
 ```

+#### 5.2 Use VLLM
+
+Build vllm docker.
+
+```bash
+docker build --no-cache -t vllm:hpu --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/docker/Dockerfile.hpu .
+```
+
+Build microservice docker.
+
+```bash
+docker build --no-cache -t opea/llm-vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/docker/Dockerfile.microservice .
+```
+
+#### 5.3 Use VLLM-on-Ray
+
+Build vllm-on-ray docker.
+
+```bash
+docker build --no-cache -t vllm_ray:habana --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray .
+```
+
+Build microservice docker.
+
+```bash
+docker build --no-cache -t opea/llm-vllm-ray:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm-ray/docker/Dockerfile.microservice .
+```
+
+#### 5.4 Use Ray Serve
+
+Build Ray Serve docker.
+
+```bash
+docker build --no-cache -t ray_serve:habana --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ray_serve/docker/Dockerfile.rayserve .
+```
+
+Build microservice docker.
+
+```bash
+docker build --no-cache -t opea/llm-ray:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ray_serve/docker/Dockerfile.microservice .
+```
+
 ### 6. Build Dataprep Image

 ```bash
@@ -65,6 +111,15 @@ docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_pr
 cd ../../..
 ```

+If you want to enable guardrails microservice in the pipeline, please use the below command instead:
+
+```bash
+git clone https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples/ChatQnA/docker
+docker build --no-cache -t opea/chatqna-guardrails:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile_guardrails .
+cd ../../..
+```
+
 ### 9. Build UI Docker Image

 Construct the frontend Docker image using the command below:
@@ -75,22 +130,54 @@ docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https
 cd ../../../..
 ```

+### 10. Build Conversational React UI Docker Image (Optional)
+
+Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command:
+
+**Export the value of the public IP address of your Gaudi node to the `host_ip` environment variable**
+
+```bash
+cd GenAIExamples/ChatQnA/docker/ui/
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg DATAPREP_SERVICE_ENDPOINT=$DATAPREP_SERVICE_ENDPOINT -f ./docker/Dockerfile.react .
+cd ../../../..
+```
+
+### 11. Build Guardrails Docker Image (Optional)
+
+To fortify AI initiatives in production, Guardrails microservice can secure model inputs and outputs, building Trustworthy, Safe, and Secure LLM-based Applications.
+
+```bash
+cd GenAIExamples/ChatQnA/docker
+docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/langchain/docker/Dockerfile .
+cd ../../..
+```
+
 Then run the command `docker images`, you will have the following 8 Docker Images:

 1. `opea/embedding-tei:latest`
 2. `opea/retriever-redis:latest`
 3. `opea/reranking-tei:latest`
-4. `opea/llm-tgi:latest`
+4. `opea/llm-tgi:latest` or `opea/llm-vllm:latest` or `opea/llm-vllm-ray:latest` or `opea/llm-ray:latest`
 5. `opea/tei-gaudi:latest`
 6. `opea/dataprep-redis:latest`
-7. `opea/chatqna:latest`
+7. `opea/chatqna:latest` or `opea/chatqna-guardrails:latest`
 8. `opea/chatqna-ui:latest`

+If Conversation React UI is built, you will find one more image:
+
+9. `opea/chatqna-conversation-ui:latest`
+
+If Guardrails docker image is built, you will find one more image:
+
+10. `opea/guardrails-tgi:latest`
+
 ## 🚀 Start MicroServices and MegaService

 ### Setup Environment Variables

-Since the `docker_compose.yaml` will consume some environment variables, you need to setup them in advance as below.
+Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.

 ```bash
 export no_proxy=${your_no_proxy}
@@ -99,9 +186,14 @@ export https_proxy=${your_http_proxy}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export LLM_MODEL_ID_NAME="neural-chat-7b-v3-3"
 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
 export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
+export vLLM_LLM_ENDPOINT="http://${host_ip}:8008"
+export vLLM_RAY_LLM_ENDPOINT="http://${host_ip}:8008"
+export RAY_Serve_LLM_ENDPOINT="http://${host_ip}:8008"
+export LLM_SERVICE_PORT=9000
 export REDIS_URL="redis://${host_ip}:6379"
 export INDEX_NAME="rag-redis"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
@@ -112,6 +204,17 @@ export RERANK_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
 export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
+```
+
+If guardrails microservice is enabled in the pipeline, the below environment variables are necessary to be set.
+
+```bash
+export GURADRAILS_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
+export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
+export SAFETY_GUARD_ENDPOINT="http://${host_ip}:8088"
+export GUARDRAIL_SERVICE_HOST_IP=${host_ip}
 ```

 Note: Please replace with `host_ip` with you external IP address, do **NOT** use localhost.
@@ -120,11 +223,44 @@ Note: Please replace with `host_ip` with you external IP address, do **NOT** use

 ```bash
 cd GenAIExamples/ChatQnA/docker/gaudi/
-docker compose -f docker_compose.yaml up -d
+```
+
+If use tgi for llm backend.
+
+```bash
+docker compose -f compose.yaml up -d
+```
+
+If use vllm for llm backend.
+
+```bash
+docker compose -f compose_vllm.yaml up -d
+```
+
+If use vllm-on-ray for llm backend.
+
+```bash
+docker compose -f compose_vllm_ray.yaml up -d
+```
+
+If use ray serve for llm backend.
+
+```bash
+docker compose -f compose_ray_serve.yaml up -d
+```
+
+If you want to enable guardrails microservice in the pipeline, please follow the below command instead:
+
+```bash
+cd GenAIExamples/ChatQnA/docker/gaudi/
+docker compose -f compose_guardrails.yaml up -d
 ```

 ### Validate MicroServices and MegaService

+Follow the instructions to validate MicroServices.
+For validation details, please refer to [how-to-validate_service](./how_to_validate_service.md).
+
 1. TEI Embedding Service

 ```bash
@@ -145,21 +281,17 @@ curl http://${host_ip}:6000/v1/embeddings \

 3. Retriever Microservice

-To consume the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:
+To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
+is determined by the embedding model.
+Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768.

-```python
-import random
-
-embedding = [random.uniform(-1, 1) for _ in range(768)]
-print(embedding)
-```
-
-Then substitute your mock embedding vector for the `${your_embedding}` in the following `curl` command:
+Check the vecotor dimension of your embedding model, set `your_embedding` dimension equals to it.

 ```bash
+export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
 curl http://${host_ip}:7000/v1/retrieval \
  -X POST \
-  -d '{"text":"test", "embedding":${your_embedding}}' \
+  -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \
  -H 'Content-Type: application/json'
 ```

@@ -181,15 +313,42 @@ curl http://${host_ip}:8000/v1/reranking \
  -H 'Content-Type: application/json'
 ```

-6. TGI Service
+6. LLM backend Service

 ```bash
+#TGI Service
 curl http://${host_ip}:8008/generate \
  -X POST \
  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \
  -H 'Content-Type: application/json'
 ```

+```bash
+#vLLM Service
+curl http://${your_ip}:8008/v1/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+  "model": "${LLM_MODEL_ID}",
+  "prompt": "What is Deep Learning?",
+  "max_tokens": 32,
+  "temperature": 0
+  }'
+```
+
+```bash
+#vLLM-on-Ray Service
+curl http://${your_ip}:8008/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
+```
+
+```bash
+#Ray Serve Service
+curl http://${your_ip}:8008/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{"model": "${LLM_MODEL_ID_NAME}", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 32 }'
+```
+
 7. LLM Microservice

 ```bash
@@ -231,9 +390,44 @@ curl -X POST "http://${host_ip}:6007/v1/dataprep" \

 This command updates a knowledge base by submitting a list of HTTP links for processing.

+Also, you are able to get the file/link list that you uploaded:
+
+```bash
+curl -X POST "http://${host_ip}:6008/v1/dataprep/get_file" \
+     -H "Content-Type: application/json"
+```
+
+To delete the file/link you uploaded:
+
+```bash
+# delete link
+curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
+     -d '{"file_path": "https://opea.dev"}' \
+     -H "Content-Type: application/json"
+
+# delete file
+curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
+     -d '{"file_path": "nke-10k-2023.pdf"}' \
+     -H "Content-Type: application/json"
+
+# delete all uploaded files and links
+curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
+     -d '{"file_path": "all"}' \
+     -H "Content-Type: application/json"
+```
+
+10. Guardrails (Optional)
+
+```bash
+curl http://${host_ip}:9090/v1/guardrails\
+  -X POST \
+  -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
+  -H 'Content-Type: application/json'
+```
+
 ## Enable LangSmith for Monotoring Application (Optional)

-LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f docker_compose.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key.
+LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f compose.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key.

 Here's how you can do it:

@@ -252,7 +446,7 @@ export LANGCHAIN_API_KEY=ls_...

 ## 🚀 Launch the UI

-To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `docker_compose.yaml` file as shown below:
+To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:

 ```yaml
  chaqna-gaudi-ui-server:
@@ -263,3 +457,41 @@ To access the frontend, open the following URL in your browser: http://{host_ip}
 ```

 ![project-screenshot](../../assets/img/chat_ui_init.png)
+
+Here is an example of running ChatQnA:
+
+![project-screenshot](../../assets/img/chat_ui_response.png)
+
+## 🚀 Launch the Conversational UI (Optional)
+
+To access the Conversational UI (react based) frontend, modify the UI service in the `compose.yaml` file. Replace `chaqna-gaudi-ui-server` service with the `chatqna-gaudi-conversation-ui-server` service as per the config below:
+
+```yaml
+chaqna-gaudi-conversation-ui-server:
+  image: opea/chatqna-conversation-ui:latest
+  container_name: chatqna-gaudi-conversation-ui-server
+  environment:
+    - no_proxy=${no_proxy}
+    - https_proxy=${https_proxy}
+    - http_proxy=${http_proxy}
+  ports:
+    - "5174:80"
+  depends_on:
+    - chaqna-gaudi-backend-server
+  ipc: host
+  restart: always
+```
+
+Once the services are up, open the following URL in your browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
+
+```yaml
+  chaqna-gaudi-conversation-ui-server:
+    image: opea/chatqna-conversation-ui:latest
+    ...
+    ports:
+      - "80:80"
+```
+
+Here is an example of running ChatQnA with Conversational UI (React):
+
+![project-screenshot](../../assets/img/conversation_ui_response.png)
--- a/ChatQnA/docker/gaudi/docker_compose.yaml
+++ b/ChatQnA/docker/gaudi/docker_compose.yaml
@@ -18,6 +18,8 @@ services:
      - redis-vector-db
    ports:
      - "6007:6007"
+      - "6008:6008"
+      - "6009:6009"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -41,7 +43,10 @@ services:
      https_proxy: ${https_proxy}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
-    command: --model-id ${EMBEDDING_MODEL_ID}
+      MAX_WARMUP_SEQUENCE_LENGTH: 512
+      INIT_HCCL_ON_ACQUIRE: 0
+      ENABLE_EXPERIMENTAL_FLAGS: true
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
  embedding:
    image: opea/embedding-tei:latest
    container_name: embedding-tei-server
@@ -78,7 +83,7 @@ services:
      LANGCHAIN_PROJECT: "opea-retriever-service"
    restart: unless-stopped
  tei-reranking-service:
-    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
    container_name: tei-reranking-gaudi-server
    ports:
      - "8808:80"
@@ -114,7 +119,7 @@ services:
      LANGCHAIN_PROJECT: "opea-reranking-service"
    restart: unless-stopped
  tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
    container_name: tgi-gaudi-server
    ports:
      - "8008:80"
@@ -133,7 +138,7 @@ services:
    cap_add:
      - SYS_NICE
    ipc: host
-    command: --model-id ${LLM_MODEL_ID}
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
  llm:
    image: opea/llm-tgi:latest
    container_name: llm-tgi-gaudi-server
@@ -192,6 +197,8 @@ services:
      - http_proxy=${http_proxy}
      - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
      - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
+      - GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
+      - DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
    ipc: host
    restart: always

--- a/ChatQnA/docker/gaudi/compose_guardrails.yaml
+++ b/ChatQnA/docker/gaudi/compose_guardrails.yaml
@@ -0,0 +1,244 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    ports:
+      - "6379:6379"
+      - "8001:8001"
+  dataprep-redis-service:
+    image: opea/dataprep-redis:latest
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "6007:6007"
+      - "6008:6008"
+      - "6009:6009"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+  tgi-guardrails-service:
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
+    container_name: tgi-guardrails-server
+    ports:
+      - "8088:80"
+    volumes:
+      - "./data:/data"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model-id ${GURADRAILS_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
+  guardrails:
+    image: opea/guardrails-tgi:latest
+    container_name: guardrails-tgi-gaudi-server
+    ports:
+      - "9090:9090"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      SAFETY_GUARD_MODEL_ID: ${SAFETY_GUARD_MODEL_ID}
+      SAFETY_GUARD_ENDPOINT: ${SAFETY_GUARD_ENDPOINT}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    restart: unless-stopped
+  tei-embedding-service:
+    image: opea/tei-gaudi:latest
+    container_name: tei-embedding-gaudi-server
+    ports:
+      - "8090:80"
+    volumes:
+      - "./data:/data"
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      MAX_WARMUP_SEQUENCE_LENGTH: 512
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
+  embedding:
+    image: opea/embedding-tei:latest
+    container_name: embedding-tei-server
+    depends_on:
+      - tei-embedding-service
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-embedding-service"
+    restart: unless-stopped
+  retriever:
+    image: opea/retriever-redis:latest
+    container_name: retriever-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "7000:7000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-retriever-service"
+    restart: unless-stopped
+  tei-reranking-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-reranking-gaudi-server
+    ports:
+      - "8808:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
+  reranking:
+    image: opea/reranking-tei:latest
+    container_name: reranking-tei-gaudi-server
+    depends_on:
+      - tei-reranking-service
+    ports:
+      - "8000:8000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-reranking-service"
+    restart: unless-stopped
+  tgi-service:
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
+    container_name: tgi-gaudi-server
+    ports:
+      - "8008:80"
+    volumes:
+      - "./data:/data"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
+  llm:
+    image: opea/llm-tgi:latest
+    container_name: llm-tgi-gaudi-server
+    depends_on:
+      - tgi-service
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+    restart: unless-stopped
+  chaqna-gaudi-backend-server:
+    image: opea/chatqna-guardrails:latest
+    container_name: chatqna-gaudi-guardrails-server
+    depends_on:
+      - redis-vector-db
+      - tgi-guardrails-service
+      - guardrails
+      - tei-embedding-service
+      - embedding
+      - retriever
+      - tei-reranking-service
+      - reranking
+      - tgi-service
+      - llm
+    ports:
+      - "8888:8888"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
+      - GUARDRAIL_SERVICE_HOST_IP=${GUARDRAIL_SERVICE_HOST_IP}
+      - EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
+      - RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
+      - RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
+      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+    ipc: host
+    restart: always
+  chaqna-gaudi-ui-server:
+    image: opea/chatqna-ui:latest
+    container_name: chatqna-gaudi-ui-server
+    depends_on:
+      - chaqna-gaudi-backend-server
+    ports:
+      - "5173:5173"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
+      - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
+      - GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
+      - DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
+    ipc: host
+    restart: always
+
+networks:
+  default:
+    driver: bridge
--- a/ChatQnA/docker/gaudi/compose_ray_serve.yaml
+++ b/ChatQnA/docker/gaudi/compose_ray_serve.yaml
@@ -0,0 +1,202 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    ports:
+      - "6379:6379"
+      - "8001:8001"
+  dataprep-redis-service:
+    image: opea/dataprep-redis:latest
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "6007:6007"
+      - "6008:6008"
+      - "6009:6009"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+  tei-embedding-service:
+    image: opea/tei-gaudi:latest
+    container_name: tei-embedding-gaudi-server
+    ports:
+      - "8090:80"
+    volumes:
+      - "./data:/data"
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      MAX_WARMUP_SEQUENCE_LENGTH: 512
+    command: --model-id ${EMBEDDING_MODEL_ID}
+  embedding:
+    image: opea/embedding-tei:latest
+    container_name: embedding-tei-server
+    depends_on:
+      - tei-embedding-service
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-embedding-service"
+    restart: unless-stopped
+  retriever:
+    image: opea/retriever-redis:latest
+    container_name: retriever-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "7000:7000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-retriever-service"
+    restart: unless-stopped
+  tei-reranking-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
+    container_name: tei-reranking-gaudi-server
+    ports:
+      - "8808:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
+  reranking:
+    image: opea/reranking-tei:latest
+    container_name: reranking-tei-gaudi-server
+    depends_on:
+      - tei-reranking-service
+    ports:
+      - "8000:8000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-reranking-service"
+    restart: unless-stopped
+  ray-service:
+    image: ray_serve:habana
+    container_name: ray-gaudi-server
+    ports:
+      - "8008:80"
+    volumes:
+      - "./data:/data"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      LLM_MODEL: ${LLM_MODEL_ID}
+      TRUST_REMOTE_CODE: True
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: /bin/bash -c "ray start --head && python api_server_openai.py --port_number 80 --model_id_or_path $LLM_MODEL --chat_processor ChatModelLlama --num_cpus_per_worker 8 --num_hpus_per_worker 1"
+  llm:
+    image: opea/llm-ray:latest
+    container_name: llm-ray-gaudi-server
+    depends_on:
+      - ray-service
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      RAY_Serve_ENDPOINT: ${RAY_Serve_LLM_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LLM_MODEL: ${LLM_MODEL_ID}
+    restart: unless-stopped
+  chaqna-gaudi-backend-server:
+    image: opea/chatqna:latest
+    container_name: chatqna-gaudi-backend-server
+    depends_on:
+      - redis-vector-db
+      - tei-embedding-service
+      - embedding
+      - retriever
+      - tei-reranking-service
+      - reranking
+      - ray-service
+      - llm
+    ports:
+      - "8888:8888"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
+      - EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
+      - RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
+      - RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
+      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
+    ipc: host
+    restart: always
+  chaqna-gaudi-ui-server:
+    image: opea/chatqna-ui:latest
+    container_name: chatqna-gaudi-ui-server
+    depends_on:
+      - chaqna-gaudi-backend-server
+    ports:
+      - "5173:5173"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
+      - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
+      - GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
+      - DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
+    ipc: host
+    restart: always
+
+networks:
+  default:
+    driver: bridge
--- a/ChatQnA/docker/gaudi/compose_vllm.yaml
+++ b/ChatQnA/docker/gaudi/compose_vllm.yaml
@@ -0,0 +1,201 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    ports:
+      - "6379:6379"
+      - "8001:8001"
+  dataprep-redis-service:
+    image: opea/dataprep-redis:latest
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "6007:6007"
+      - "6008:6008"
+      - "6009:6009"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+  tei-embedding-service:
+    image: opea/tei-gaudi:latest
+    container_name: tei-embedding-gaudi-server
+    ports:
+      - "8090:80"
+    volumes:
+      - "./data:/data"
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      MAX_WARMUP_SEQUENCE_LENGTH: 512
+    command: --model-id ${EMBEDDING_MODEL_ID}
+  embedding:
+    image: opea/embedding-tei:latest
+    container_name: embedding-tei-server
+    depends_on:
+      - tei-embedding-service
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-embedding-service"
+    restart: unless-stopped
+  retriever:
+    image: opea/retriever-redis:latest
+    container_name: retriever-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "7000:7000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-retriever-service"
+    restart: unless-stopped
+  tei-reranking-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
+    container_name: tei-reranking-gaudi-server
+    ports:
+      - "8808:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
+  reranking:
+    image: opea/reranking-tei:latest
+    container_name: reranking-tei-gaudi-server
+    depends_on:
+      - tei-reranking-service
+    ports:
+      - "8000:8000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-reranking-service"
+    restart: unless-stopped
+  vllm-service:
+    image: vllm:hpu
+    container_name: vllm-gaudi-server
+    ports:
+      - "8008:80"
+    volumes:
+      - "./data:/data"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      LLM_MODEL: ${LLM_MODEL_ID}
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $LLM_MODEL --tensor-parallel-size 1 --host 0.0.0.0 --port 80"
+  llm:
+    image: opea/llm-vllm:latest
+    container_name: llm-vllm-gaudi-server
+    depends_on:
+      - vllm-service
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      vLLM_ENDPOINT: ${vLLM_LLM_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LLM_MODEL: ${LLM_MODEL_ID}
+    restart: unless-stopped
+  chaqna-gaudi-backend-server:
+    image: opea/chatqna:latest
+    container_name: chatqna-gaudi-backend-server
+    depends_on:
+      - redis-vector-db
+      - tei-embedding-service
+      - embedding
+      - retriever
+      - tei-reranking-service
+      - reranking
+      - vllm-service
+      - llm
+    ports:
+      - "8888:8888"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
+      - EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
+      - RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
+      - RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
+      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
+    ipc: host
+    restart: always
+  chaqna-gaudi-ui-server:
+    image: opea/chatqna-ui:latest
+    container_name: chatqna-gaudi-ui-server
+    depends_on:
+      - chaqna-gaudi-backend-server
+    ports:
+      - "5173:5173"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
+      - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
+      - GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
+      - DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
+    ipc: host
+    restart: always
+
+networks:
+  default:
+    driver: bridge
--- a/ChatQnA/docker/gaudi/compose_vllm_ray.yaml
+++ b/ChatQnA/docker/gaudi/compose_vllm_ray.yaml
@@ -0,0 +1,201 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    ports:
+      - "6379:6379"
+      - "8001:8001"
+  dataprep-redis-service:
+    image: opea/dataprep-redis:latest
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "6007:6007"
+      - "6008:6008"
+      - "6009:6009"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+  tei-embedding-service:
+    image: opea/tei-gaudi:latest
+    container_name: tei-embedding-gaudi-server
+    ports:
+      - "8090:80"
+    volumes:
+      - "./data:/data"
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      MAX_WARMUP_SEQUENCE_LENGTH: 512
+    command: --model-id ${EMBEDDING_MODEL_ID}
+  embedding:
+    image: opea/embedding-tei:latest
+    container_name: embedding-tei-server
+    depends_on:
+      - tei-embedding-service
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-embedding-service"
+    restart: unless-stopped
+  retriever:
+    image: opea/retriever-redis:latest
+    container_name: retriever-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "7000:7000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-retriever-service"
+    restart: unless-stopped
+  tei-reranking-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
+    container_name: tei-reranking-gaudi-server
+    ports:
+      - "8808:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
+  reranking:
+    image: opea/reranking-tei:latest
+    container_name: reranking-tei-gaudi-server
+    depends_on:
+      - tei-reranking-service
+    ports:
+      - "8000:8000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-reranking-service"
+    restart: unless-stopped
+  vllm-ray-service:
+    image: vllm_ray:habana
+    container_name: vllm-ray-gaudi-server
+    ports:
+      - "8008:8000"
+    volumes:
+      - "./data:/data"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      LLM_MODEL: ${LLM_MODEL_ID}
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: /bin/bash -c "ray start --head && python vllm_ray_openai.py --port_number 8000 --model_id_or_path $LLM_MODEL --tensor_parallel_size 2 --enforce_eager True"
+  llm:
+    image: opea/llm-vllm-ray:latest
+    container_name: llm-vllm-ray-gaudi-server
+    depends_on:
+      - vllm-ray-service
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      vLLM_RAY_ENDPOINT: ${vLLM_RAY_LLM_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LLM_MODEL: ${LLM_MODEL_ID}
+    restart: unless-stopped
+  chaqna-gaudi-backend-server:
+    image: opea/chatqna:latest
+    container_name: chatqna-gaudi-backend-server
+    depends_on:
+      - redis-vector-db
+      - tei-embedding-service
+      - embedding
+      - retriever
+      - tei-reranking-service
+      - reranking
+      - vllm-ray-service
+      - llm
+    ports:
+      - "8888:8888"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
+      - EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
+      - RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
+      - RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
+      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
+    ipc: host
+    restart: always
+  chaqna-gaudi-ui-server:
+    image: opea/chatqna-ui:latest
+    container_name: chatqna-gaudi-ui-server
+    depends_on:
+      - chaqna-gaudi-backend-server
+    ports:
+      - "5173:5173"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
+      - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
+      - GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
+      - DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
+    ipc: host
+    restart: always
+
+networks:
+  default:
+    driver: bridge
--- a/ChatQnA/docker/gaudi/how_to_validate_service.md
+++ b/ChatQnA/docker/gaudi/how_to_validate_service.md
@@ -0,0 +1,405 @@
+# How to Check and Validate Micro Service in the GenAI Example
+
+GenAI examples build mega-services on top of the micro-service and server.
+To make mega-service works, each micro service and server must work as expected.
+
+Take the ChatQnA as an example, this document shows how to start a GenAI example.
+
+Assumption: build all docker images already
+
+Here are steps to check the micro service and server.
+
+## 1. Check environment variables
+
+Make sure environment variables are set
+
+start the docker containers
+
+```
+cd ./GenAIExamples/ChatQnA/docker/gaudi
+docker compose up -d
+```
+
+Check the start up log by `docker compose -f ./docker/gaudi/compose.yaml logs`.
+Where the compose.yaml file is the mega service docker-compose configuration.
+The warning messages point out the veriabls are **NOT** set.
+
+```
+ubuntu@gaudi-vm:~/GenAIExamples/ChatQnA/docker/gaudi$ docker compose -f ./compose.yaml up -d
+WARN[0000] The "LANGCHAIN_API_KEY" variable is not set. Defaulting to a blank string.
+WARN[0000] The "LANGCHAIN_TRACING_V2" variable is not set. Defaulting to a blank string.
+WARN[0000] The "LANGCHAIN_API_KEY" variable is not set. Defaulting to a blank string.
+WARN[0000] The "LANGCHAIN_TRACING_V2" variable is not set. Defaulting to a blank string.
+WARN[0000] The "LANGCHAIN_API_KEY" variable is not set. Defaulting to a blank string.
+WARN[0000] The "LANGCHAIN_TRACING_V2" variable is not set. Defaulting to a blank string.
+WARN[0000] The "LANGCHAIN_API_KEY" variable is not set. Defaulting to a blank string.
+WARN[0000] The "LANGCHAIN_TRACING_V2" variable is not set. Defaulting to a blank string.
+WARN[0000] /home/ubuntu/GenAIExamples/ChatQnA/docker/gaudi/compose.yaml: `version` is obsolete
+```
+
+## 2. Check the docker container status
+
+Check the docker containers are started
+
+For example, the ChatQnA example starts 11 docker (services), check these docker containers are all running, i.e, all the contaniers `STATUS` are `Up`
+
+run the command `docker ps -a`
+
+Here is the output:
+
+```
+CONTAINER ID   IMAGE                                                   COMMAND                  CREATED         STATUS                          PORTS                                                                                  NAMES
+28d9a5570246   opea/chatqna-ui:latest                                  "docker-entrypoint.s…"   2 minutes ago   Up 2 minutes                    0.0.0.0:5173->5173/tcp, :::5173->5173/tcp                                              chatqna-gaudi-ui-server
+bee1132464cd   opea/chatqna:latest                                     "python chatqna.py"      2 minutes ago   Up 2 minutes                    0.0.0.0:8888->8888/tcp, :::8888->8888/tcp                                              chatqna-gaudi-backend-server
+f810f3b4d329   opea/embedding-tei:latest                               "python embedding_te…"   2 minutes ago   Up 2 minutes                    0.0.0.0:6000->6000/tcp, :::6000->6000/tcp                                              embedding-tei-server
+325236a01f9b   opea/llm-tgi:latest                                     "python llm.py"          2 minutes ago   Up 2 minutes                    0.0.0.0:9000->9000/tcp, :::9000->9000/tcp                                              llm-tgi-gaudi-server
+2fa17d84605f   opea/dataprep-redis:latest                              "python prepare_doc_…"   2 minutes ago   Up 2 minutes                    0.0.0.0:6007->6007/tcp, :::6007->6007/tcp                                              dataprep-redis-server
+69e1fb59e92c   opea/retriever-redis:latest                             "/home/user/comps/re…"   2 minutes ago   Up 2 minutes                    0.0.0.0:7000->7000/tcp, :::7000->7000/tcp                                              retriever-redis-server
+313b9d14928a   opea/reranking-tei:latest                               "python reranking_te…"   2 minutes ago   Up 2 minutes                    0.0.0.0:8000->8000/tcp, :::8000->8000/tcp                                              reranking-tei-gaudi-server
+05c40b636239   ghcr.io/huggingface/tgi-gaudi:1.2.1                     "text-generation-lau…"   2 minutes ago   Exited (1) About a minute ago                                                                                          tgi-gaudi-server
+174bd43fa6b5   opea/tei-gaudi:latest                                   "text-embeddings-rou…"   2 minutes ago   Up 2 minutes                    0.0.0.0:8090->80/tcp, :::8090->80/tcp                                                  tei-embedding-gaudi-server
+74084469aa33   redis/redis-stack:7.2.0-v9                              "/entrypoint.sh"         2 minutes ago   Up 2 minutes                    0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp   redis-vector-db
+88399dbc9e43   ghcr.io/huggingface/text-embeddings-inference:cpu-1.5   "text-embeddings-rou…"   2 minutes ago   Up 2 minutes                    0.0.0.0:8808->80/tcp, :::8808->80/tcp                                                  tei-reranking-gaudi-server
+```
+
+In this case, `ghcr.io/huggingface/tgi-gaudi:1.2.1` Existed.
+
+```
+05c40b636239   ghcr.io/huggingface/tgi-gaudi:1.2.1                     "text-generation-lau…"   2 minutes ago   Exited (1) About a minute ago                                                                                          tgi-gaudi-server
+```
+
+Next we can check the container logs to get to know what happened during the docker start.
+
+## 3. Check docker container log
+
+Check the log of container by:
+
+`docker logs <CONTAINER ID> -t`
+
+View the logs of `ghcr.io/huggingface/tgi-gaudi:1.2.1`
+
+#docker logs 05c40b636239 -t
+
+```
+...
+2024-06-05T01:56:48.959581881Z   File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 833, in _apply
+2024-06-05T01:56:48.959583925Z     param_applied = fn(param)
+2024-06-05T01:56:48.959585811Z
+2024-06-05T01:56:48.959587629Z   File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1161, in convert
+2024-06-05T01:56:48.959589733Z     return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
+2024-06-05T01:56:48.959591795Z
+2024-06-05T01:56:48.959593607Z   File "/usr/local/lib/python3.10/dist-packages/habana_frameworks/torch/core/weight_sharing.py", line 53, in __torch_function__
+2024-06-05T01:56:48.959595769Z     return super().__torch_function__(func, types, new_args, kwargs)
+2024-06-05T01:56:48.959597800Z
+2024-06-05T01:56:48.959599622Z RuntimeError: synStatus=9 [Device-type mismatch] Device acquire failed.
+2024-06-05T01:56:48.959601665Z  rank=0
+2024-06-05T01:56:49.053352819Z 2024-06-05T01:56:49.053251Z ERROR text_generation_launcher: Shard 0 failed to start
+2024-06-05T01:56:49.053373989Z 2024-06-05T01:56:49.053279Z  INFO text_generation_launcher: Shutting down shards
+2024-06-05T01:56:49.053385371Z Error: ShardCannotStart
+```
+
+The log shows `RuntimeError: synStatus=9 [Device-type mismatch] Device acquire failed.` This means the service fail to acquire the device.
+
+So just make sure the devices are available.
+
+Here is another failure example:
+
+```
+f7a08f9867f9   ghcr.io/huggingface/tgi-gaudi:1.2.1                     "text-generation-lau…"   16 seconds ago   Exited (2) 14 seconds ago                                                                                          tgi-gaudi-server
+```
+
+Check the log by `docker logs f7a08f9867f9 -t`.
+
+```
+2024-06-05T01:30:30.695934928Z error: a value is required for '--model-id <MODEL_ID>' but none was supplied
+2024-06-05T01:30:30.697123534Z
+2024-06-05T01:30:30.697148330Z For more information, try '--help'.
+```
+
+The log indicates the MODLE_ID is not set.
+
+View the docker input parameters in `./ChatQnA/docker/gaudi/compose.yaml`
+
+```
+  tgi-service:
+    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    container_name: tgi-gaudi-server
+    ports:
+      - "8008:80"
+    volumes:
+      - "./data:/data"
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model-id ${LLM_MODEL_ID}
+```
+
+The input MODEL_ID is `${LLM_MODEL_ID}`
+
+Check environment variable `LLM_MODEL_ID` is set correctly, spelled correctly.
+Set the LLM_MODEL_ID then restart the containers.
+
+Also you can check overall logs with the following command, where the compose.yaml is the mega service docker-compose configuration file.
+
+```
+docker compose -f ./docker-composer/gaudi/compose.yaml logs
+```
+
+## 4. Check each micro service used by the Mega Service
+
+### 1 TEI Embedding Service
+
+```
+curl ${host_ip}:8090/embed \
+    -X POST \
+    -d '{"inputs":"What is Deep Learning?"}' \
+    -H 'Content-Type: application/json'
+```
+
+This test the embedding service. It sends "What is Deep Learning?" to the embedding service, the output is the embedding result of the sentences, it is a list of vector.
+`[[0.00030903306,-0.06356524,0.0025720573,-0.012404448,0.050649878, ... , -0.02776986,-0.0246678,0.03999176,0.037477136,-0.006806653,0.02261455,-0.04570737,-0.033122733,0.022785513,0.0160026,-0.021343587,-0.029969815,-0.0049176104]]`
+
+**Note**: The vector dimension are decided by the embedding model and the output value is dependent on model and input data.
+
+### 2 Embedding Microservice
+
+```
+curl http://${host_ip}:6000/v1/embeddings\
+  -X POST \
+  -d '{"text":"What is Deep Learning?"}' \
+  -H 'Content-Type: application/json'
+```
+
+This test the embedding microservice. In this test, it sends out `What is Deep Learning?` to embedding.
+Embedding microservice get input data, call embedding service to embedding data.
+Embedding server are with NO state, but microservice keep the state. There is `id` in the output of `Embedding Microservice`.
+
+```
+{"id":"e8c85e588a235a4bc4747a23b3a71d8f","text":"What is Deep Learning?","embedding":[0.00030903306,-0.06356524,0.0025720573,-0.012404448,0.050649878, ...,   0.02776986,-0.0246678,0.03999176,0.037477136,-0.006806653,0.02261455,-0.04570737,-0.033122733,0.022785513,0.0160026,-0.021343587,-0.029969815,-0.0049176104]}
+```
+
+### 3 Retriever Microservice
+
+To consume the retriever microservice, you need to generate a mock embedding vector by Python script.
+The length of embedding vector is determined by the embedding model.
+Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, [the model dimension is 768](https://huggingface.co/BAAI/bge-base-en-v1.5).
+
+Check the vecotor dimension of your embedding model, set `your_embedding` dimension equals to it.
+
+```
+export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
+curl http://${host_ip}:7000/v1/retrieval \
+  -X POST \
+  -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \
+  -H 'Content-Type: application/json'
+```
+
+The output is retrieved text that relevant to the input data:
+
+```
+{"id":"27210945c7c6c054fa7355bdd4cde818","retrieved_docs":[{"id":"0c1dd04b31ab87a5468d65f98e33a9f6","text":"Company: Nike. financial instruments are subject to master netting arrangements that allow for the offset of assets and liabilities in the event of default or early termination of the contract.\nAny amounts of cash collateral received related to these instruments associated with the Company's credit-related contingent features are recorded in Cash and\nequivalents and Accrued liabilities, the latter of which would further offset against the Company's derivative asset balance. Any amounts of cash collateral posted related\nto these instruments associated with the Company's credit-related contingent features are recorded in Prepaid expenses and other current assets, which would further\noffset against the Company's derivative liability balance. Cash collateral received or posted related to the Company's credit-related contingent features is presented in the\nCash provided by operations component of the Consolidated Statements of Cash Flows. The Company does not recognize amounts of non-cash collateral received, such\nas securities, on the Consolidated Balance Sheets. For further information related to credit risk, refer to Note 12 — Risk Management and Derivatives.\n2023 FORM 10-K 68Table of Contents\nThe following tables present information about the Company's derivative assets and liabilities measured at fair value on a recurring basis and indicate the level in the fair\nvalue hierarchy in which the Company classifies the fair value measurement:\nMAY 31, 2023\nDERIVATIVE ASSETS\nDERIVATIVE LIABILITIES"},{"id":"1d742199fb1a86aa8c3f7bcd580d94af","text": ... }
+
+```
+
+### 4 TEI Reranking Service
+
+Reranking service
+
+```
+curl http://${host_ip}:8808/rerank \
+    -X POST \
+    -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
+    -H 'Content-Type: application/json'
+```
+
+Output is:
+`[{"index":1,"score":0.9988041},{"index":0,"score":0.022948774}]`
+
+It scores the input
+
+### 5 Reranking Microservice
+
+```
+curl http://${host_ip}:8000/v1/reranking\
+  -X POST \
+  -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
+  -H 'Content-Type: application/json'
+```
+
+Here is the output:
+
+```
+{"id":"e1eb0e44f56059fc01aa0334b1dac313","query":"Human: Answer the question based only on the following context:\n    Deep learning is...\n    Question: What is Deep Learning?","max_new_tokens":1024,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}
+```
+
+You may notice reranking microservice are with state ('ID' and other meta data), while reranking service are not.
+
+### 6 TGI Service
+
+```
+curl http://${host_ip}:8008/generate \
+  -X POST \
+  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \
+  -H 'Content-Type: application/json'
+```
+
+TGI service generate text for the input prompt. Here is the expected result from TGI output:
+
+```
+{"generated_text":"We have all heard the buzzword, but our understanding of it is still growing. It’s a sub-field of Machine Learning, and it’s the cornerstone of today’s Machine Learning breakthroughs.\n\nDeep Learning makes machines act more like humans through their ability to generalize from very large"}
+```
+
+**NOTE**: After launch the TGI, it takes minutes for TGI server to load LLM model and warm up.
+
+If you get
+
+```
+curl: (7) Failed to connect to 100.81.104.168 port 8008 after 0 ms: Connection refused
+```
+
+and the log shows model warm up, please wait for a while and try it later.
+
+```
+2024-06-05T05:45:27.707509646Z 2024-06-05T05:45:27.707361Z  WARN text_generation_router: router/src/main.rs:357: `--revision` is not set
+2024-06-05T05:45:27.707539740Z 2024-06-05T05:45:27.707379Z  WARN text_generation_router: router/src/main.rs:358: We strongly advise to set it to a known supported commit.
+2024-06-05T05:45:27.852525522Z 2024-06-05T05:45:27.852437Z  INFO text_generation_router: router/src/main.rs:379: Serving revision bdd31cf498d13782cc7497cba5896996ce429f91 of model Intel/neural-chat-7b-v3-3
+2024-06-05T05:45:27.867833811Z 2024-06-05T05:45:27.867759Z  INFO text_generation_router: router/src/main.rs:221: Warming up model
+```
+
+### 7 LLM Microservice
+
+```
+curl http://${host_ip}:9000/v1/chat/completions\
+  -X POST \
+  -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+  -H 'Content-Type: application/json'
+```
+
+You will get generated text from LLM:
+
+```
+data: b'\n'
+
+data: b'\n'
+
+data: b'Deep'
+
+data: b' learning'
+
+data: b' is'
+
+data: b' a'
+
+data: b' subset'
+
+data: b' of'
+
+data: b' machine'
+
+data: b' learning'
+
+data: b' that'
+
+data: b' uses'
+
+data: b' algorithms'
+
+data: b' to'
+
+data: b' learn'
+
+data: b' from'
+
+data: b' data'
+
+data: [DONE]
+```
+
+### 8 MegaService
+
+```
+curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
+     "model": "Intel/neural-chat-7b-v3-3",
+     "messages": "What is the revenue of Nike in 2023?"
+     }'
+```
+
+Here it the output for your reference:
+
+```
+data: b'\n'
+
+data: b'An'
+
+data: b'swer'
+
+data: b':'
+
+data: b' In'
+
+data: b' fiscal'
+
+data: b' '
+
+data: b'2'
+
+data: b'0'
+
+data: b'2'
+
+data: b'3'
+
+data: b','
+
+data: b' N'
+
+data: b'I'
+
+data: b'KE'
+
+data: b','
+
+data: b' Inc'
+
+data: b'.'
+
+data: b' achieved'
+
+data: b' record'
+
+data: b' Rev'
+
+data: b'en'
+
+data: b'ues'
+
+data: b' of'
+
+data: b' $'
+
+data: b'5'
+
+data: b'1'
+
+data: b'.'
+
+data: b'2'
+
+data: b' billion'
+
+data: b'.'
+
+data: b'</s>'
+
+data: [DONE]
+
+```
+
+**[Finished]** Congratulation! All your services work as expected now!
--- a/ChatQnA/docker/gaudi/set_env.sh
+++ b/ChatQnA/docker/gaudi/set_env.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP=${host_ip}
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
--- a/ChatQnA/docker/gpu/README.md
+++ b/ChatQnA/docker/gpu/README.md
@@ -28,7 +28,7 @@ docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$
 ### 4. Build Rerank Image

 ```bash
-docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/langchain/docker/Dockerfile .
+docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/docker/Dockerfile .
 ```

 ### 5. Build LLM Image
@@ -78,7 +78,7 @@ Then run the command `docker images`, you will have the following 7 Docker Image

 ### Setup Environment Variables

-Since the `docker_compose.yaml` will consume some environment variables, you need to setup them in advance as below.
+Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.

 ```bash
 export no_proxy=${your_no_proxy}
@@ -100,6 +100,8 @@ export RERANK_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
 export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
 ```

 Note: Please replace with `host_ip` with you external IP address, do **NOT** use localhost.
@@ -108,7 +110,7 @@ Note: Please replace with `host_ip` with you external IP address, do **NOT** use

 ```bash
 cd GenAIExamples/ChatQnA/docker/gpu/
-docker compose -f docker_compose.yaml up -d
+docker compose up -d
 ```

 ### Validate MicroServices and MegaService
@@ -133,21 +135,17 @@ curl http://${host_ip}:6000/v1/embeddings \

 3. Retriever Microservice

-To consume the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:
+To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
+is determined by the embedding model.
+Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768.

-```python
-import random
-
-embedding = [random.uniform(-1, 1) for _ in range(768)]
-print(embedding)
-```
-
-Then substitute your mock embedding vector for the `${your_embedding}` in the following `curl` command:
+Check the vecotor dimension of your embedding model, set `your_embedding` dimension equals to it.

 ```bash
+export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
 curl http://${host_ip}:7000/v1/retrieval \
  -X POST \
-  -d '{"text":"test", "embedding":${your_embedding}}' \
+  -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \
  -H 'Content-Type: application/json'
 ```

@@ -219,9 +217,35 @@ curl -X POST "http://${host_ip}:6007/v1/dataprep" \

 This command updates a knowledge base by submitting a list of HTTP links for processing.

+Also, you are able to get the file list that you uploaded:
+
+```bash
+curl -X POST "http://${host_ip}:6008/v1/dataprep/get_file" \
+     -H "Content-Type: application/json"
+```
+
+To delete the file/link you uploaded:
+
+```bash
+# delete link
+curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
+     -d '{"file_path": "https://opea.dev"}' \
+     -H "Content-Type: application/json"
+
+# delete file
+curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
+     -d '{"file_path": "nke-10k-2023.pdf"}' \
+     -H "Content-Type: application/json"
+
+# delete all uploaded files and links
+curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
+     -d '{"file_path": "all"}' \
+     -H "Content-Type: application/json"
+```
+
 ## Enable LangSmith for Monotoring Application (Optional)

-LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f docker_compose.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key.
+LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f compose.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key.

 Here's how you can do it:

@@ -240,7 +264,7 @@ export LANGCHAIN_API_KEY=ls_...

 ## 🚀 Launch the UI

-To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `docker_compose.yaml` file as shown below:
+To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:

 ```yaml
  chaqna-ui-server:
--- a/ChatQnA/docker/gpu/docker_compose.yaml
+++ b/ChatQnA/docker/gpu/docker_compose.yaml
@@ -18,6 +18,8 @@ services:
      - redis-vector-db
    ports:
      - "6007:6007"
+      - "6008:6008"
+      - "6009:6009"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -25,7 +27,7 @@ services:
      REDIS_URL: ${REDIS_URL}
      INDEX_NAME: ${INDEX_NAME}
  tei-embedding-service:
-    image: ghcr.io/huggingface/text-embeddings-inference:1.2
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
    container_name: tei-embedding-server
    ports:
      - "8090:80"
@@ -37,7 +39,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-    command: --model-id ${EMBEDDING_MODEL_ID}
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
    deploy:
      resources:
        reservations:
@@ -81,7 +83,7 @@ services:
      LANGCHAIN_PROJECT: "opea-retriever-service"
    restart: unless-stopped
  tei-reranking-service:
-    image: ghcr.io/huggingface/text-embeddings-inference:1.2
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
    container_name: tei-reranking-server
    ports:
      - "8808:80"
@@ -95,7 +97,7 @@ services:
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
-    command: --model-id ${RERANK_MODEL_ID}
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
    deploy:
      resources:
        reservations:
@@ -204,6 +206,8 @@ services:
      - http_proxy=${http_proxy}
      - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
      - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
+      - GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
+      - DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
    ipc: host
    restart: always

--- a/ChatQnA/docker/gpu/set_env.sh
+++ b/ChatQnA/docker/gpu/set_env.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP=${host_ip}
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
--- a/ChatQnA/docker/ui/docker/Dockerfile.react
+++ b/ChatQnA/docker/ui/docker/Dockerfile.react
@@ -1,4 +1,8 @@
-FROM node as vite-app
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Use node 20.11.1 as the base image
+FROM node:20.11.1 as vite-app
 
 COPY . /usr/app
 WORKDIR /usr/app/react
--- a/ChatQnA/docker/ui/react/README.md
+++ b/ChatQnA/docker/ui/react/README.md
@@ -1,12 +1,12 @@
-<h1 align="center" id="title"> ChatQnA Conversational UI</h1>
+# ChatQnA Conversational UI

-### 📸 Project Screenshots
+## 📸 Project Screenshots

 ![project-screenshot](../../../assets/img/conversation_ui_init.png)
 ![project-screenshot](../../../assets/img/conversation_ui_response.png)
 ![project-screenshot](../../../assets/img/conversation_ui_upload.png)

-<h2>🧐 Features</h2>
+## 🧐 Features

 Here're some of the project's features:

@@ -17,7 +17,7 @@ Here're some of the project's features:
 - Chat history: Historical chat records can still be retained after refreshing, making it easier for users to view the context.
 - Conversational Chat : The application maintains a history of the conversation, allowing users to review previous messages and the AI to refer back to earlier points in the dialogue when necessary.

-<h2>🛠️ Get it Running:</h2>
+## 🛠️ Get it Running

 1. Clone the repo.

--- a/ChatQnA/docker/ui/react/src/assets/opea-icon-black.svg
+++ b/ChatQnA/docker/ui/react/src/assets/opea-icon-black.svg
@@ -1,39 +1,39 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!-- Generator: Adobe Illustrator 28.4.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
-<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
-	 width="800px" height="800px" viewBox="0 0 800 800" style="enable-background:new 0 0 800 800;" xml:space="preserve">
-<style type="text/css">
-	.Drop_x0020_Shadow{fill:none;}
-	.Outer_x0020_Glow_x0020_5_x0020_pt{fill:none;}
-	.Blue_x0020_Neon{fill:none;stroke:#8AACDA;stroke-width:7;stroke-linecap:round;stroke-linejoin:round;}
-	.Chrome_x0020_Highlight{fill:url(#SVGID_1_);stroke:#FFFFFF;stroke-width:0.3629;stroke-miterlimit:1;}
-	.Jive_GS{fill:#FFDD00;}
-	.Alyssa_GS{fill:#A6D0E4;}
-	.st0{fill:#FF6900;}
-	.st1{fill:#FFB500;}
-	.st2{fill:#FFFFFF;}
-</style>
-<linearGradient id="SVGID_1_" gradientUnits="userSpaceOnUse" x1="-1640" y1="-1640" x2="-1640" y2="-1641">
-	<stop  offset="0" style="stop-color:#656565"/>
-	<stop  offset="0.618" style="stop-color:#1B1B1B"/>
-	<stop  offset="0.6292" style="stop-color:#545454"/>
-	<stop  offset="0.9831" style="stop-color:#3E3E3E"/>
-</linearGradient>
-<g>
-	<polygon points="400,0 737.5,181.7 607.3,252.8 269.7,71.1 	"/>
-	<path d="M708.3,414.7l29.2,15.7l-130.3,71.1l-44.9-24.2v-31.7l40,21.5c1.5,0.8,3.2,1.2,4.9,1.2c1.7,0,3.4-0.4,5-1.3l0,0
-		L708.3,414.7z"/>
-	<path d="M557.3,532.1c-0.1-0.1-0.3-0.1-0.4-0.2l0,0l-34.2-18.4l31.2-17l42.9,23.1v169.9l-34.5-18.6V541
-		C562.4,537.3,560.4,533.9,557.3,532.1z"/>
-	<polygon points="410.4,381.3 541.6,309.7 541.6,479.5 410.4,551.2 	"/>
-	<path d="M258.4,88.5l338.6,182.3v169.9l-34.5-18.6V292.2c0-3.7-1.9-7-5.1-8.9c-0.1-0.1-0.3-0.1-0.4-0.2l0,0L258.4,122.3V88.5z"/>
-	<polygon points="192.7,110.6 530.3,292.3 400,363.4 62.5,181.6 	"/>
-	<polygon points="51.1,369 51.1,199 389.6,381.3 389.6,551.3 96.6,393.5 	"/>
-	<path d="M91.7,414.4l303.4,163.3c1.5,0.8,3.2,1.2,4.9,1.2c1.7,0,3.4-0.4,5-1.3l0,0l96.1-52.4l29.2,15.7L400,612.1L62.5,430.4
-		L91.7,414.4z"/>
-	<polygon points="51.1,447.8 389.6,630.1 389.6,800 51.1,617.7 	"/>
-	<polygon points="541.6,728.3 410.4,799.9 410.4,630 541.6,558.4 	"/>
-	<polygon points="748.9,617.7 617.6,689.3 617.6,519.5 748.9,447.9 	"/>
-	<polygon points="748.9,369 617.6,440.6 617.6,270.7 748.9,199.1 	"/>
-</g>
-</svg>
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 28.4.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
+<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
+	 width="800px" height="800px" viewBox="0 0 800 800" style="enable-background:new 0 0 800 800;" xml:space="preserve">
+<style type="text/css">
+	.Drop_x0020_Shadow{fill:none;}
+	.Outer_x0020_Glow_x0020_5_x0020_pt{fill:none;}
+	.Blue_x0020_Neon{fill:none;stroke:#8AACDA;stroke-width:7;stroke-linecap:round;stroke-linejoin:round;}
+	.Chrome_x0020_Highlight{fill:url(#SVGID_1_);stroke:#FFFFFF;stroke-width:0.3629;stroke-miterlimit:1;}
+	.Jive_GS{fill:#FFDD00;}
+	.Alyssa_GS{fill:#A6D0E4;}
+	.st0{fill:#FF6900;}
+	.st1{fill:#FFB500;}
+	.st2{fill:#FFFFFF;}
+</style>
+<linearGradient id="SVGID_1_" gradientUnits="userSpaceOnUse" x1="-1640" y1="-1640" x2="-1640" y2="-1641">
+	<stop  offset="0" style="stop-color:#656565"/>
+	<stop  offset="0.618" style="stop-color:#1B1B1B"/>
+	<stop  offset="0.6292" style="stop-color:#545454"/>
+	<stop  offset="0.9831" style="stop-color:#3E3E3E"/>
+</linearGradient>
+<g>
+	<polygon points="400,0 737.5,181.7 607.3,252.8 269.7,71.1 	"/>
+	<path d="M708.3,414.7l29.2,15.7l-130.3,71.1l-44.9-24.2v-31.7l40,21.5c1.5,0.8,3.2,1.2,4.9,1.2c1.7,0,3.4-0.4,5-1.3l0,0
+		L708.3,414.7z"/>
+	<path d="M557.3,532.1c-0.1-0.1-0.3-0.1-0.4-0.2l0,0l-34.2-18.4l31.2-17l42.9,23.1v169.9l-34.5-18.6V541
+		C562.4,537.3,560.4,533.9,557.3,532.1z"/>
+	<polygon points="410.4,381.3 541.6,309.7 541.6,479.5 410.4,551.2 	"/>
+	<path d="M258.4,88.5l338.6,182.3v169.9l-34.5-18.6V292.2c0-3.7-1.9-7-5.1-8.9c-0.1-0.1-0.3-0.1-0.4-0.2l0,0L258.4,122.3V88.5z"/>
+	<polygon points="192.7,110.6 530.3,292.3 400,363.4 62.5,181.6 	"/>
+	<polygon points="51.1,369 51.1,199 389.6,381.3 389.6,551.3 96.6,393.5 	"/>
+	<path d="M91.7,414.4l303.4,163.3c1.5,0.8,3.2,1.2,4.9,1.2c1.7,0,3.4-0.4,5-1.3l0,0l96.1-52.4l29.2,15.7L400,612.1L62.5,430.4
+		L91.7,414.4z"/>
+	<polygon points="51.1,447.8 389.6,630.1 389.6,800 51.1,617.7 	"/>
+	<polygon points="541.6,728.3 410.4,799.9 410.4,630 541.6,558.4 	"/>
+	<polygon points="748.9,617.7 617.6,689.3 617.6,519.5 748.9,447.9 	"/>
+	<polygon points="748.9,369 617.6,440.6 617.6,270.7 748.9,199.1 	"/>
+</g>
+</svg>
--- a/ChatQnA/docker/ui/svelte/.env
+++ b/ChatQnA/docker/ui/svelte/.env
@@ -1,3 +1,5 @@
 CHAT_BASE_URL = 'http://backend_address:8888/v1/chatqna'

-UPLOAD_FILE_BASE_URL = 'http://backend_address:6007/v1/dataprep'
+UPLOAD_FILE_BASE_URL = 'http://backend_address:6002/v1/dataprep'
+
+GET_FILE = 'http://backend_address:6001/v1/dataprep/get_file'
--- a/ChatQnA/docker/ui/svelte/README.md
+++ b/ChatQnA/docker/ui/svelte/README.md
@@ -1,12 +1,12 @@
-<h1 align="center" id="title"> ChatQnA Customized UI</h1>
+# ChatQnA Customized UI

-### 📸 Project Screenshots
+## 📸 Project Screenshots

 ![project-screenshot](../../../assets/img/chat_ui_init.png)
 ![project-screenshot](../../../assets/img/chat_ui_response.png)
 ![project-screenshot](../../../assets/img/chat_ui_upload.png)

-<h2>🧐 Features</h2>
+## 🧐 Features

 Here're some of the project's features:

@@ -17,16 +17,20 @@ Here're some of the project's features:
 - Scroll to Bottom / Top: The chat automatically slides to the bottom. Users can also click the top icon to slide to the top of the chat record.
 - End to End Time: Shows the time spent on the current conversation.

-<h2>🛠️ Get it Running:</h2>
+## 🛠️ Get it Running

 1. Clone the repo.

 2. cd command to the current folder.

 3. Modify the required .env variables.
+
   ```
   DOC_BASE_URL = ''
+
+   UPLOAD_FILE_BASE_URL = ''
   ```
+
 4. Execute `npm install` to install the corresponding dependencies.

 5. Execute `npm run dev` in both environments
--- a/ChatQnA/docker/ui/svelte/src/lib/assets/DocManagement/LinkfolderIcon.svelte
+++ b/ChatQnA/docker/ui/svelte/src/lib/assets/DocManagement/LinkfolderIcon.svelte
@@ -0,0 +1,36 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script>
+	export let className = "w-16 h-16";
+</script>
+
+<svg
+	t="1713775351763"
+	class="icon {className}"
+	viewBox="0 0 1024 1024"
+	version="1.1"
+	xmlns="http://www.w3.org/2000/svg"
+	p-id="12834"
+	width="32"
+	height="32"
+	><path
+		d="M192 160h253.728a64 64 0 0 1 53.312 28.576l25.824 38.848A64 64 0 0 0 578.176 256H832a64 64 0 0 1 64 64v480a64 64 0 0 1-64 64H192a64 64 0 0 1-64-64V224a64 64 0 0 1 64-64z"
+		fill="#1989FA"
+		p-id="12835"
+	/><path
+		d="M192 352h640a64 64 0 0 1 64 64v384a64 64 0 0 1-64 64H192a64 64 0 0 1-64-64V416a64 64 0 0 1 64-64z"
+		fill="#8BC4FC"
+		p-id="12836"
+	/><path
+		d="M422.624 768a70.656 70.656 0 0 1-49.888-120.672l30.112-30.112a7.488 7.488 0 0 1 5.28-2.208c5.152 1.28 7.104 3.616 7.552 6.4a93.76 93.76 0 0 0 5.472 22.144 7.68 7.68 0 0 1-1.696 8.032l-21.312 21.312a34.912 34.912 0 0 0 0 48.928 34.24 34.24 0 0 0 24.352 10.08 34.944 34.944 0 0 0 24.544-10.08l89.312-89.376a34.688 34.688 0 0 0 0-48.896 7.488 7.488 0 0 1 0-10.56l15.008-15.04a7.488 7.488 0 0 1 5.344-2.208 7.616 7.616 0 0 1 5.312 2.144 70.688 70.688 0 0 1 0 100.032l-89.312 89.28a70.4 70.4 0 0 1-49.76 20.736z"
+		fill="#FFFFFF"
+		p-id="12837"
+	/><path
+		d="M467.168 660.128a7.456 7.456 0 0 1-5.12-2.112 70.72 70.72 0 0 1 0-100l89.312-89.312a70.656 70.656 0 1 1 99.904 99.968l-30.112 30.112a7.488 7.488 0 0 1-5.248 2.208c-5.184-1.28-7.136-3.616-7.552-6.4a97.504 97.504 0 0 0-5.504-22.176 7.648 7.648 0 0 1 1.696-8l21.312-21.312a34.848 34.848 0 0 0 0-48.928 34.24 34.24 0 0 0-24.352-10.08 34.944 34.944 0 0 0-24.544 10.08l-89.312 89.344a34.752 34.752 0 0 0 0 48.896 7.584 7.584 0 0 1 0 10.688l-14.848 14.912a7.52 7.52 0 0 1-5.248 2.176z"
+		fill="#FFFFFF"
+		p-id="12838"
+	/></svg
+>
--- a/ChatQnA/docker/ui/svelte/src/lib/assets/DocManagement/fileIcon.svelte
+++ b/ChatQnA/docker/ui/svelte/src/lib/assets/DocManagement/fileIcon.svelte
@@ -0,0 +1,30 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script>
+    export let className = "w-16 h-16";
+</script>
+
+<svg
+	t="1711440565760"
+	class="icon {className}"
+	viewBox="0 0 1024 1024"
+	version="1.1"
+	xmlns="http://www.w3.org/2000/svg"
+	p-id="23643"
+	><path
+		d="M913.29536 941.04064c0.0256 24.82688-16.54784 44.96384-37.0176 44.98432l-708.23936 0.6912c-20.46464 0.02048-37.07904-20.08576-37.10464-44.91264l-0.83968-859.02848c-0.0256-24.82688 16.54784-44.96384 37.0176-44.98432l521.10848-0.50688 224.39424 210.50368 0.68096 693.25312z"
+		fill="#E6E4E2"
+		p-id="23644"
+	/><path
+		d="M913.29536 253.26592l-189.11744 0.18432c-20.46464 0.02048-37.07904-20.08576-37.10464-44.91264l-0.16384-165.77024 226.38592 210.49856z"
+		fill="#C4BCB1"
+		p-id="23645"
+	/><path
+		d="M720.72192 396.84096a22.54848 22.54848 0 0 1-22.54848 22.54848H326.13376a22.54848 22.54848 0 0 1 0-45.09696h372.0448a22.54848 22.54848 0 0 1 22.54336 22.54848zM720.72192 565.95456a22.54848 22.54848 0 0 1-22.54848 22.54848H326.13376a22.54848 22.54848 0 0 1 0-45.09696h372.0448a22.54848 22.54848 0 0 1 22.54336 22.54848zM720.72192 746.33728a22.54848 22.54848 0 0 1-22.54848 22.54848H326.13376a22.54848 22.54848 0 0 1 0-45.09696h372.0448a22.54848 22.54848 0 0 1 22.54336 22.54848z"
+		fill="#C4BCB1"
+		p-id="23646"
+	/></svg
+>
--- a/ChatQnA/docker/ui/svelte/src/lib/assets/DocManagement/folderIcon.svelte
+++ b/ChatQnA/docker/ui/svelte/src/lib/assets/DocManagement/folderIcon.svelte
@@ -0,0 +1,30 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script>
+    export let className = "w-16 h-16";
+</script>
+
+<svg
+	t="1711440048470"
+	class="icon {className}"
+	viewBox="0 0 1024 1024"
+	version="1.1"
+	xmlns="http://www.w3.org/2000/svg"
+	p-id="7455"
+	><path
+		d="M848.8576 199.1936H415.7568c0-26.5728-21.5424-48.128-48.128-48.128H175.1424c-26.5728 0-48.128 21.5424-48.128 48.128V343.5648c0 26.5984 21.5424 48.1408 48.128 48.1408h673.728c26.5728 0 48.128-21.5424 48.128-48.1408v-96.2432c-0.0128-26.5856-21.5552-48.128-48.1408-48.128z"
+		fill="#CCA352"
+		p-id="7456"
+	/><path
+		d="M800.7424 247.3088H223.2576c-26.5728 0-48.128 21.5424-48.128 48.128v48.128c0 26.5984 21.5424 48.1408 48.128 48.1408h577.472c26.5728 0 48.128-21.5424 48.128-48.1408v-48.128c0-26.5728-21.5424-48.128-48.1152-48.128z"
+		fill="#FFFFFF"
+		p-id="7457"
+	/><path
+		d="M848.8576 295.4368H175.1424c-26.5728 0-48.128 21.5424-48.128 48.128v481.2544c0 26.5472 21.5424 48.128 48.128 48.128h673.728c26.5728 0 48.128-21.568 48.128-48.128V343.552c-0.0128-26.5728-21.5552-48.1152-48.1408-48.1152z"
+		fill="#FFCC66"
+		p-id="7458"
+	/></svg
+>
--- a/ChatQnA/docker/ui/svelte/src/lib/assets/upload/loading-button.svelte
+++ b/ChatQnA/docker/ui/svelte/src/lib/assets/upload/loading-button.svelte
@@ -0,0 +1,25 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<svg
+  class="animate-spin -ml-1 mr-3 h-5 w-5 text-gray-500"
+  xmlns="http://www.w3.org/2000/svg"
+  fill="none"
+  viewBox="0 0 24 24"
+>
+  <circle
+    class="opacity-25"
+    cx="12"
+    cy="12"
+    r="10"
+    stroke="#0597ff"
+    stroke-width="4"
+  />
+  <path
+    class="opacity-75"
+    fill="#0597ff"
+    d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
+  />
+</svg>
--- a/ChatQnA/docker/ui/svelte/src/lib/assets/upload/no-file.svelte
+++ b/ChatQnA/docker/ui/svelte/src/lib/assets/upload/no-file.svelte
@@ -0,0 +1,37 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<svg
+	t="1697618332186"
+	class="w-16 h-16"
+	viewBox="0 0 1024 1024"
+	version="1.1"
+	xmlns="http://www.w3.org/2000/svg"
+	p-id="7984"
+	width="16"
+	height="16"
+	><path
+		d="M146.285714 146.285714a97.52381 97.52381 0 0 1 97.52381-97.523809h341.333333l292.571429 292.571428v536.380953a97.52381 97.52381 0 0 1-97.52381 97.523809H243.809524a97.52381 97.52381 0 0 1-97.52381-97.523809V146.285714z"
+		fill="#D1DDE5"
+		p-id="7985"
+	/><path
+		d="M585.142857 48.761905l292.571429 292.571428h-195.047619a97.52381 97.52381 0 0 1-97.52381-97.523809V48.761905z"
+		fill="#ABBBC7"
+		p-id="7986"
+	/><path
+		d="M73.142857 609.52381m48.761905 0l365.714286 0q48.761905 0 48.761904 48.761904l0 121.904762q0 48.761905-48.761904 48.761905l-365.714286 0q-48.761905 0-48.761905-48.761905l0-121.904762q0-48.761905 48.761905-48.761904Z"
+		fill="#ABBBC7"
+		p-id="7987"
+	/><path
+		d="M162.06019 674.133333v34.572191h51.321905v22.186666H162.06019v55.637334H136.533333v-134.582857h85.430857v22.186666H162.06019zM238.640762 651.946667h25.502476v134.582857H238.665143v-134.582857zM288.353524 651.946667h25.502476v112.39619h58.953143v22.186667h-84.455619v-134.582857zM414.427429 674.133333v33.426286h51.151238v22.186667h-51.151238v34.57219h59.928381v22.186667h-85.430858V651.946667h85.430858v22.186666h-59.904z"
+		fill="#FFFFFF"
+		p-id="7988"
+	/><path
+		d="M329.142857 231.619048m-60.952381 0a60.952381 60.952381 0 1 0 121.904762 0 60.952381 60.952381 0 1 0-121.904762 0Z"
+		fill="#FFFFFF"
+		opacity=".6"
+		p-id="7989"
+	/></svg
+>
--- a/ChatQnA/docker/ui/svelte/src/lib/network/chat/Network.ts
+++ b/ChatQnA/docker/ui/svelte/src/lib/network/chat/Network.ts
@@ -17,7 +17,7 @@ import { SSE } from "sse.js";

 const CHAT_BASE_URL = env.CHAT_BASE_URL;

-export async function fetchTextStream(query: string, knowledge_base_id: string) {
+export async function fetchTextStream(query: string) {
 	let payload = {};
 	let url = "";

@@ -26,6 +26,7 @@ export async function fetchTextStream(query: string, knowledge_base_id: string)
 		messages: query,
 	};
 	url = `${CHAT_BASE_URL}`;
+	console.log("fetchTextStream", url);

 	return new SSE(url, {
 		headers: { "Content-Type": "application/json" },
--- a/ChatQnA/docker/ui/svelte/src/lib/network/upload/Network.ts
+++ b/ChatQnA/docker/ui/svelte/src/lib/network/upload/Network.ts
@@ -15,6 +15,7 @@
 import { env } from "$env/dynamic/public";

 const UPLOAD_FILE_BASE_URL = env.UPLOAD_FILE_BASE_URL;
+const GET_FILE = env.GET_FILE;

 export async function fetchKnowledgeBaseId(file: Blob, fileName: string) {
 	const url = `${UPLOAD_FILE_BASE_URL}`;
@@ -35,7 +36,7 @@ export async function fetchKnowledgeBaseId(file: Blob, fileName: string) {
 	}
 }

-export async function fetchKnowledgeBaseIdByPaste(pasteUrlList: any, urlType: string | undefined) {
+export async function fetchKnowledgeBaseIdByPaste(pasteUrlList: any) {
 	const url = `${UPLOAD_FILE_BASE_URL}`;
 	const formData = new FormData();
 	formData.append("link_list", JSON.stringify(pasteUrlList));
@@ -53,3 +54,24 @@ export async function fetchKnowledgeBaseIdByPaste(pasteUrlList: any, urlType: st
 		return undefined;
 	}
 }
+
+export async function fetchAllFile() {
+	const data = {
+		knowledge_base_id: "default",
+	};
+	const url = `${GET_FILE}`;
+	const init: RequestInit = {
+		method: "POST",
+		headers: { "Content-Type": "application/json" },
+		body: JSON.stringify(data),
+	};
+
+	try {
+		const response = await fetch(url, init);
+		if (!response.ok) throw response.status;
+		return await response.json();
+	} catch (error) {
+		console.error("network error: ", error);
+		return undefined;
+	}
+}
--- a/ChatQnA/docker/ui/svelte/src/lib/shared/components/doc_management/docCard.svelte
+++ b/ChatQnA/docker/ui/svelte/src/lib/shared/components/doc_management/docCard.svelte
@@ -0,0 +1,84 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script>
+	import FolderIcon from "$lib/assets/DocManagement/folderIcon.svelte";
+	import LinkfolderIcon from "$lib/assets/DocManagement/LinkfolderIcon.svelte";
+	import { Modal } from "flowbite-svelte";
+	import SvelteTree from "$lib/shared/components/doc_management/treeView/svelte-tree.svelte";
+	import FileIcon from "$lib/assets/DocManagement/fileIcon.svelte";
+	import { createEventDispatcher } from "svelte";
+
+	let dispatch = createEventDispatcher();
+	let showDirectory = false;
+	let chooseDir = undefined;
+	let currentIdx = 0;
+
+	export let files = [];
+
+	console.log("files", files);
+
+	function handleDirClick(file, index) {
+		chooseDir = file;
+		showDirectory = true;
+		currentIdx = index;
+		console.log("chooseDir", chooseDir);
+	}
+
+
+</script>
+
+<Modal
+	bind:open={showDirectory}
+	size="xs"
+	autoclose={true}
+	class="z-50 w-full"
+	outsideclose
+>
+	<hr class="my-8 h-px border-0 bg-gray-200 dark:bg-gray-700" />
+	<SvelteTree data={chooseDir.children} {currentIdx} />
+
+</Modal>
+
+<div class="grid grid-cols-2 gap-5 max-h-[35rem]  overflow-auto">
+	{#each files as file, index}
+		<div
+			class="group relative flex w-full flex-col items-center justify-center p-2 px-12 text-center hover:bg-[#d9eeff] focus:bg-[#d9eeff]"
+		>
+			{#if file.type === "File"}
+				<div class="flex-shrink-0">
+					<FileIcon />
+				</div>
+				<p class="w-[6rem] truncate">
+					{file.name}
+				</p>
+			{:else if file.type === "Directory" && file.id === "uploaded_links"}
+				<button
+					class="flex flex-col items-center"
+					on:click={() => handleDirClick(file, index)}
+				>
+					<div class="flex-shrink-0">
+						<LinkfolderIcon />
+					</div>
+					<p class="truncate">
+						{file.name}
+					</p>
+				</button>
+			{:else}
+				<button
+					class="flex flex-col items-center"
+					on:click={() => handleDirClick(file, index)}
+				>
+					<div class="flex-shrink-0">
+						<FolderIcon />
+					</div>
+					<p class="truncate">
+						{file.name}
+					</p>
+				</button>
+			{/if}
+		</div>
+	{/each}
+</div>
--- a/ChatQnA/docker/ui/svelte/src/lib/shared/components/doc_management/treeView/svelte-tree.svelte
+++ b/ChatQnA/docker/ui/svelte/src/lib/shared/components/doc_management/treeView/svelte-tree.svelte
@@ -0,0 +1,35 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	import TreeBranch from "./tree-branch.svelte";
+	import { createEventDispatcher } from "svelte";
+
+	let dispatch = createEventDispatcher();
+	type IData = {
+		name: string;
+		id: string;
+		type: string;
+		children: never[];
+	};
+
+	export let currentIdx;
+
+	export let collapse = false,
+		data: IData[] = [],
+		onClick = "";
+
+	console.log("data", data);
+</script>
+
+<div>
+	{#if data && data.length > 0}
+		<ul>
+			<TreeBranch {data} {collapse} {onClick} {currentIdx} />
+		</ul>
+	{:else}
+		<p>Folder is empty. Please upload a file.</p>
+	{/if}
+</div>
--- a/ChatQnA/docker/ui/svelte/src/lib/shared/components/doc_management/treeView/tree-branch.svelte
+++ b/ChatQnA/docker/ui/svelte/src/lib/shared/components/doc_management/treeView/tree-branch.svelte
@@ -0,0 +1,46 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	import { storageFiles } from "$lib/shared/stores/common/Store";
+	import TreeNode from "./tree-node.svelte";
+	import { createEventDispatcher } from "svelte";
+
+	let dispatch = createEventDispatcher();
+	type IData = {
+		name: string;
+		id: string;
+		type: string;
+		children: never[];
+	};
+	export let data: IData[] = [],
+		collapse = false,
+		onClick = "";
+
+	export let currentIdx;
+
+	function changeData() {
+		console.log('change', $storageFiles);
+
+		data = $storageFiles[currentIdx].children;
+	}
+
+	$: $storageFiles ? changeData() : console.log('No change', $storageFiles);
+
+	console.log(data);
+</script>
+
+{#if data && data.length > 0}
+	{#each data as item}
+		<TreeNode
+			bind:node={item}
+			{collapse}
+			{onClick}
+			{currentIdx}
+		/>
+	{/each}
+{:else}
+	<p>Folder is empty. Please upload a file.</p>
+{/if}
--- a/ChatQnA/docker/ui/svelte/src/lib/shared/components/doc_management/treeView/tree-node.svelte
+++ b/ChatQnA/docker/ui/svelte/src/lib/shared/components/doc_management/treeView/tree-node.svelte
@@ -0,0 +1,111 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	import FileIcon from "$lib/assets/DocManagement/fileIcon.svelte";
+	import FolderIcon from "$lib/assets/DocManagement/folderIcon.svelte";
+	import LinkfolderIcon from "$lib/assets/DocManagement/LinkfolderIcon.svelte";
+	import { createEventDispatcher } from "svelte";
+
+	import { onMount } from "svelte";
+	type IData = {
+		name: string;
+		id: string;
+		type: string;
+		children: never[];
+		parent: IData;
+		currentIdx: number;
+	};
+	export let node: IData,
+		collapse = false,
+		onClick = "",
+		parent = "";
+
+	export let currentIdx;
+
+	let open = collapse;
+
+	function toggleOpen() {
+		open = !open;
+	}
+
+	function handleClickOpen() {
+		toggleOpen();
+	}
+
+	onMount(() => {
+		if (node) {
+			node.parent = parent;
+		}
+	});
+
+
+</script>
+
+<li class="relative ml-5">
+	<div
+		class="my-2 flex items-center gap-4 {node.type === 'File' ? 'ml-5' : ''}"
+	>
+		<!-- link -->
+		{#if node.type === "Directory"}
+			{#if open}
+				<!-- svelte-ignore a11y-click-events-have-key-events -->
+				<svg
+					on:click={handleClickOpen}
+					data-testid="caret-down-node"
+					xmlns="http://www.w3.org/2000/svg"
+					class="h-5 w-5 cursor-pointer"
+					viewBox="0 0 20 20"
+					fill="currentColor"
+				>
+					<path
+						fill-rule="evenodd"
+						d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z"
+						clip-rule="evenodd"
+					/>
+				</svg>
+			{:else}
+				<!-- svelte-ignore a11y-click-events-have-key-events -->
+				<svg
+					on:click={handleClickOpen}
+					data-testid="caret-up-node"
+					xmlns="http://www.w3.org/2000/svg"
+					class="h-5 w-5 cursor-pointer"
+					viewBox="0 0 20 20"
+					fill="currentColor"
+				>
+					<path
+						fill-rule="evenodd"
+						d="M7.293 14.707a1 1 0 010-1.414L10.586 10 7.293 6.707a1 1 0 011.414-1.414l4 4a1 1 0 010 1.414l-4 4a1 1 0 01-1.414 0z"
+						clip-rule="evenodd"
+					/>
+				</svg>
+			{/if}
+			{#if node.id === "uploaded_links"}
+				<LinkfolderIcon className={"w-12 h-12"} />
+			{:else}
+				<FolderIcon className={"w-12 h-12"} />
+			{/if}
+		{:else}
+			<FileIcon className={"w-10 h-10"} />
+		{/if}
+		<!-- link -->
+
+		<span>{node?.name}</span>
+	</div>
+
+	{#if open && node.type === "Directory"}
+		<ul>
+			{#each node.children as child}
+				<svelte:self
+					bind:node={child}
+					bind:parent={node}
+					{collapse}
+					{onClick}
+				/>
+			{/each}
+		</ul>
+	{/if}
+</li>
--- a/ChatQnA/docker/ui/svelte/src/lib/shared/components/upload/upload-knowledge.svelte
+++ b/ChatQnA/docker/ui/svelte/src/lib/shared/components/upload/upload-knowledge.svelte
@@ -43,6 +43,7 @@
 		bind:value
 		on:change={handleInput}
 		class="focus:border-blue-700 focus:ring-0"
+		data-testid="file-upload"
 		accept=".txt,.pdf,.json"
 	  />
 	</Label>
--- a/ChatQnA/docker/ui/svelte/src/lib/shared/components/upload/uploadFile.svelte
+++ b/ChatQnA/docker/ui/svelte/src/lib/shared/components/upload/uploadFile.svelte
@@ -20,19 +20,27 @@
 	import { sineIn } from "svelte/easing";
 	import UploadFile from "./upload-knowledge.svelte";
 	import PasteURL from "./PasteKnowledge.svelte";
-	import { knowledge1, knowledgeName } from "$lib/shared/stores/common/Store";
-	import DeleteIcon from "$lib/assets/avatar/svelte/Delete.svelte";
+	import {
+		knowledge1,
+		knowledgeName,
+		storageFiles,
+	} from "$lib/shared/stores/common/Store";
 	import { getNotificationsContext } from "svelte-notifications";
 	import {
+		fetchAllFile,
 		fetchKnowledgeBaseId,
 		fetchKnowledgeBaseIdByPaste,
 	} from "$lib/network/upload/Network";
+	import DocCard from "../doc_management/docCard.svelte";
+	import NoFile from "$lib/assets/upload/no-file.svelte";
+	import LoadingButton from "$lib/assets/upload/loading-button.svelte";

 	const { addNotification } = getNotificationsContext();
-	console.log("allKnowledges", $knowledgeName);

+	$: files = $storageFiles ? $storageFiles : [];
 	let hidden6 = true;
-	let selectKnowledge = -1;
+	let uploading = false;
+
 	let transitionParamsRight = {
 		x: 320,
 		duration: 200,
@@ -42,9 +50,10 @@
 	async function handleKnowledgePaste(
 		e: CustomEvent<{ pasteUrlList: string[] }>
 	) {
+		uploading = true;
 		try {
 			const pasteUrlList = e.detail.pasteUrlList;
-			const res = await fetchKnowledgeBaseIdByPaste(pasteUrlList, "url1");
+			const res = await fetchKnowledgeBaseIdByPaste(pasteUrlList);
 			handleUploadResult(res, "knowledge_base");
 		} catch {
 			handleUploadError();
@@ -52,6 +61,7 @@
 	}

 	async function handleKnowledgeUpload(e: CustomEvent<any>) {
+		uploading = true;
 		try {
 			const blob = await fetch(e.detail.src).then((r) => r.blob());
 			const fileName = e.detail.fileName;
@@ -62,11 +72,20 @@
 		}
 	}

-	function handleUploadResult(res: Response, fileName: string) {
+	async function handleUploadResult(res: Response, fileName: string) {
 		if (res.status === 200) {
 			knowledge1.set({ id: "default" });
 			knowledgeName.set(fileName);
 			showNotification("Uploaded successfully", "success");
+			// update fileStructure
+			const res = await fetchAllFile();
+			uploading = false;
+			console.log('handleUploadResult', res);
+
+			if (res) {
+				storageFiles.set(res);
+				files = $storageFiles;
+			}
 		} else {
 			showNotification("Uploaded failed", "error");
 		}
@@ -84,11 +103,6 @@
 			removeAfter: 3000,
 		});
 	}
-
-	function handleKnowledgeDelete() {
-		knowledge1.set({ id: "default" });
-		knowledgeName.set("");
-	}
 </script>

 <div class="text-center">
@@ -139,6 +153,7 @@
 		Please upload your local file or paste a remote file link, and Chat will
 		respond based on the content of the uploaded file.
 	</p>
+
 	<Tabs
 		style="full"
 		defaultClass="flex rounded-lg divide-x rtl:divide-x-reverse divide-gray-200 shadow dark:divide-gray-700 focus:ring-0"
@@ -152,10 +167,18 @@
 			<PasteURL on:paste={handleKnowledgePaste} />
 		</TabItem>
 	</Tabs>
-	{#if $knowledgeName && $knowledgeName !== ""}
-		<div class="relative">
-			<p class="border-b p-6 pb-2">{$knowledgeName}</p>
-			<DeleteIcon on:DeleteAvatar={() => handleKnowledgeDelete()} />
+	{#if uploading}
+		<div class="flex flex-col items-center justify-center">
+			<LoadingButton />
+		</div>
+	{/if}
+
+	{#if files.length > 0}
+		<DocCard {files} />
+	{:else}
+		<div class="flex flex-col items-center justify-center">
+			<NoFile />
+			<p class=" text-sm opacity-70">No files uploaded</p>
 		</div>
 	{/if}
 </Drawer>
--- a/ChatQnA/docker/ui/svelte/src/lib/shared/stores/common/Store.ts
+++ b/ChatQnA/docker/ui/svelte/src/lib/shared/stores/common/Store.ts
@@ -37,3 +37,5 @@ export const knowledge1 = writable<{
 }>();

 export const knowledgeName = writable("");
+
+export const storageFiles = writable([]);
--- a/ChatQnA/docker/ui/svelte/src/routes/+page.svelte
+++ b/ChatQnA/docker/ui/svelte/src/routes/+page.svelte
@@ -16,7 +16,7 @@

 <script lang="ts">
 	export let data;
-	import { ifStoreMsg, knowledge1 } from "$lib/shared/stores/common/Store";
+	import { knowledge1, storageFiles } from "$lib/shared/stores/common/Store";
 	import { onMount } from "svelte";
 	import {
 		LOCAL_STORAGE_KEY,
@@ -25,27 +25,27 @@
 		type Message,
 	} from "$lib/shared/constant/Interface";
 	import {
-		fromTimeStampToTime,
 		getCurrentTimeStamp,
 		scrollToBottom,
 		scrollToTop,
 	} from "$lib/shared/Utils";
 	import { fetchTextStream } from "$lib/network/chat/Network";
 	import LoadingAnimation from "$lib/shared/components/loading/Loading.svelte";
-	import { browser } from "$app/environment";
 	import "driver.js/dist/driver.css";
 	import "$lib/assets/layout/css/driver.css";
 	import UploadFile from "$lib/shared/components/upload/uploadFile.svelte";
 	import PaperAirplane from "$lib/assets/chat/svelte/PaperAirplane.svelte";
-	import Gallery from "$lib/shared/components/chat/gallery.svelte";
 	import Scrollbar from "$lib/shared/components/scrollbar/Scrollbar.svelte";
 	import ChatMessage from "$lib/modules/chat/ChatMessage.svelte";
+	import { fetchAllFile } from "$lib/network/upload/Network.js";
+	import { getNotificationsContext } from "svelte-notifications";

 	let query: string = "";
 	let loading: boolean = false;
 	let scrollToDiv: HTMLDivElement;
 	// ·········
 	let chatMessages: Message[] = data.chatMsg ? data.chatMsg : [];
+	const { addNotification } = getNotificationsContext();

 	// ··············

@@ -55,8 +55,22 @@
 		scrollToDiv = document
 			.querySelector(".chat-scrollbar")
 			?.querySelector(".svlr-viewport")!;
+
+		const res = await fetchAllFile();
+		if (res) {
+			storageFiles.set(res);
+		}
 	});

+	function showNotification(text: string, type: string) {
+		addNotification({
+			text: text,
+			position: "top-left",
+			type: type,
+			removeAfter: 3000,
+		});
+	}
+
 	function handleTop() {
 		scrollToTop(scrollToDiv);
 	}
@@ -87,63 +101,83 @@
 	}

 	const callTextStream = async (query: string, startSendTime: number) => {
-		const eventSource = await fetchTextStream(query, knowledge_1);
-
-		eventSource.addEventListener("message", (e: any) => {
-			let Msg = e.data;
-			if (Msg.startsWith("b")) {
-				let currentMsg = Msg.slice(2, -1);
-				const containsNewLine = /\\n/.test(currentMsg);
-				let requiresDecoding = false;
-
-				currentMsg = currentMsg.replace(/\\n/g, "\n");
-
-				if (/\\x[\dA-Fa-f]{2}/.test(currentMsg)) {
-					currentMsg = decodeEscapedBytes(currentMsg);
-					requiresDecoding = true;
-				} else if (/\\u[\dA-Fa-f]{4}/.test(currentMsg)) {
-					currentMsg = decodeUnicode(currentMsg);
-					requiresDecoding = true;
+		try {
+			const eventSource = await fetchTextStream(query);
+			eventSource.addEventListener("error", (e: any) => {
+				if (e.type === "error") {
+					showNotification("Failed to load chat content.", "error");
+					loading = false;
 				}
+			});

-				if (containsNewLine && requiresDecoding) {
-					currentMsg += "\n";
-				}
-				if (chatMessages[chatMessages.length - 1].role == MessageRole.User) {
-					chatMessages = [
-						...chatMessages,
-						{
+			eventSource.addEventListener("message", (e: any) => {
+				let msg = e.data;
+				console.log("msg", msg);
+
+				const handleDecodedMessage = (decodedMsg: string) => {
+					if (decodedMsg !== "</s>") {
+						decodedMsg = decodedMsg.replace(/\\n/g, "\n");
+					}
+
+					if (chatMessages[chatMessages.length - 1].role === MessageRole.User) {
+						chatMessages.push({
 							role: MessageRole.Assistant,
 							type: MessageType.Text,
-							content: currentMsg,
+							content: decodedMsg,
 							time: startSendTime,
-						},
-					];
+						});
+					} else {
+						chatMessages[chatMessages.length - 1].content += decodedMsg;
+					}
+
+					scrollToBottom(scrollToDiv);
+				};
+
+				if (msg.startsWith("b")) {
+					let currentMsg = msg.slice(2, -1);
+
+					if (/\\x[\dA-Fa-f]{2}/.test(currentMsg)) {
+						currentMsg = decodeEscapedBytes(currentMsg);
+					} else if (/\\u[\dA-Fa-f]{4}/.test(currentMsg)) {
+						currentMsg = decodeUnicode(currentMsg);
+					}
+
+					handleDecodedMessage(currentMsg);
+				} else if (msg === "[DONE]") {
+					console.log("Done");
+
+					let startTime = chatMessages[chatMessages.length - 1].time;
+					loading = false;
+					let totalTime = parseFloat(
+						((getCurrentTimeStamp() - startTime) / 1000).toFixed(2)
+					);
+
+					if (chatMessages.length - 1 !== -1) {
+						chatMessages[chatMessages.length - 1].time = totalTime;
+					}
+
+					storeMessages();
 				} else {
-					let content = chatMessages[chatMessages.length - 1].content as string;
-					chatMessages[chatMessages.length - 1].content = content + currentMsg;
+					if (/\\x[\dA-Fa-f]{2}/.test(msg)) {
+						msg = decodeEscapedBytes(msg);
+					} else if (/\\u[\dA-Fa-f]{4}/.test(msg)) {
+						msg = decodeUnicode(msg);
+					}
+
+					let currentMsg = msg.replace(/"/g, "").replace(/\\n/g, "\n");
+
+					handleDecodedMessage(currentMsg);
 				}
-				scrollToBottom(scrollToDiv);
-			} else if (Msg === "[DONE]") {
-				let startTime = chatMessages[chatMessages.length - 1].time;
+			});

-				loading = false;
-				let totalTime = parseFloat(
-					((getCurrentTimeStamp() - startTime) / 1000).toFixed(2)
-				);
-
-				if (chatMessages.length - 1 !== -1) {
-					chatMessages[chatMessages.length - 1].time = totalTime;
-				}
-
-				storeMessages();
-			}
-		});
-		eventSource.stream();
+			eventSource.stream();
+		} catch (error: any) {
+			showNotification("Failed to load chat content.", "error");
+			loading = false;
+		}
 	};

 	const handleTextSubmit = async () => {
-
 		loading = true;
 		const newMessage = {
 			role: MessageRole.User,
@@ -166,15 +200,6 @@
 		localStorage.removeItem(LOCAL_STORAGE_KEY.STORAGE_CHAT_KEY);
 		chatMessages = [];
 	}
-
-	function isEmptyObject(obj: any): boolean {
-		for (let key in obj) {
-			if (obj.hasOwnProperty(key)) {
-				return false;
-			}
-		}
-		return true;
-	}
 </script>

 <!-- <DropZone on:drop={handleImageSubmit}> -->
@@ -240,8 +265,7 @@
 							><path
 								d="M12.6 12 10 9.4 7.4 12 6 10.6 8.6 8 6 5.4 7.4 4 10 6.6 12.6 4 14 5.4 11.4 8l2.6 2.6zm7.4 8V2q0-.824-.587-1.412A1.93 1.93 0 0 0 18 0H2Q1.176 0 .588.588A1.93 1.93 0 0 0 0 2v12q0 .825.588 1.412Q1.175 16 2 16h14zm-3.15-6H2V2h16v13.125z"
 							/></svg
-						><span class="font-medium text-[#0597ff]">CLEAR</span
-						></button
+						><span class="font-medium text-[#0597ff]">CLEAR</span></button
 					>
 				</div>
 			</div>
--- a/ChatQnA/docker/xeon/README.md
+++ b/ChatQnA/docker/xeon/README.md
@@ -40,7 +40,7 @@ reranking
 =========
 Port 8000 - Open to 0.0.0.0/0

-tgi_service
+tgi-service or vLLM_service
 ===========
 Port 9009 - Open to 0.0.0.0/0

@@ -81,15 +81,33 @@ docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$
 ### 3. Build Rerank Image

 ```bash
-docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/langchain/docker/Dockerfile .
+docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/docker/Dockerfile .
 ```

 ### 4. Build LLM Image

+#### Use TGI as backend
+
 ```bash
 docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
 ```

+#### Use vLLM as backend
+
+Build vLLM docker.
+
+```bash
+git clone https://github.com/vllm-project/vllm.git
+cd ./vllm/
+docker build --no-cache -t vllm:cpu --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.cpu .
+```
+
+Build microservice.
+
+```bash
+docker build --no-cache -t opea/llm-vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/docker/Dockerfile.microservice .
+```
+
 ### 5. Build Dataprep Image

 ```bash
@@ -128,7 +146,8 @@ Build frontend Docker image that enables Conversational experience with ChatQnA
 cd GenAIExamples/ChatQnA/docker/ui/
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
 export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
-docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg DATAPREP_SERVICE_ENDPOINT=$DATAPREP_SERVICE_ENDPOINT -f ./docker/Dockerfile.react .
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
+docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg DATAPREP_SERVICE_ENDPOINT=$DATAPREP_SERVICE_ENDPOINT --build-arg DATAPREP_GET_FILE_ENDPOINT=$DATAPREP_GET_FILE_ENDPOINT -f ./docker/Dockerfile.react .
 cd ../../../..
 ```

@@ -138,7 +157,7 @@ Then run the command `docker images`, you will have the following 7 Docker Image
 2. `opea/embedding-tei:latest`
 3. `opea/retriever-redis:latest`
 4. `opea/reranking-tei:latest`
-5. `opea/llm-tgi:latest`
+5. `opea/llm-tgi:latest` or `opea/llm-vllm:latest`
 6. `opea/chatqna:latest`
 7. `opea/chatqna-ui:latest`

@@ -146,7 +165,7 @@ Then run the command `docker images`, you will have the following 7 Docker Image

 ### Setup Environment Variables

-Since the `docker_compose.yaml` will consume some environment variables, you need to setup them in advance as below.
+Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.

 **Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**

@@ -180,6 +199,8 @@ export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
 export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
 export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
+export vLLM_LLM_ENDPOINT="http://${host_ip}:9009"
+export LLM_SERVICE_PORT=9000
 export REDIS_URL="redis://${host_ip}:6379"
 export INDEX_NAME="rag-redis"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
@@ -190,6 +211,8 @@ export RERANK_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
 export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
 ```

 Note: Please replace with `host_ip` with you external IP address, do not use localhost.
@@ -200,7 +223,18 @@ Note: Please replace with `host_ip` with you external IP address, do not use loc

 ```bash
 cd GenAIExamples/ChatQnA/docker/xeon/
-docker compose -f docker_compose.yaml up -d
+```
+
+If use TGI backend.
+
+```bash
+docker compose -f compose.yaml up -d
+```
+
+If use vLLM backend.
+
+```bash
+docker compose -f compose_vllm.yaml up -d
 ```

 ### Validate Microservices
@@ -223,21 +257,19 @@ curl http://${host_ip}:6000/v1/embeddings\
  -H 'Content-Type: application/json'
 ```

-3. Retriever Microservice  
-   To validate the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:
+3. Retriever Microservice

-```Python
-import random
-embedding = [random.uniform(-1, 1) for _ in range(768)]
-print(embedding)
-```
+To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
+is determined by the embedding model.
+Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768.

-Then substitute your mock embedding vector for the `${your_embedding}` in the following cURL command:
+Check the vecotor dimension of your embedding model, set `your_embedding` dimension equals to it.

 ```bash
+export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
 curl http://${host_ip}:7000/v1/retrieval \
  -X POST \
-  -d '{"text":"What is the revenue of Nike in 2023?","embedding":"'"${your_embedding}"'"}' \
+  -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \
  -H 'Content-Type: application/json'
 ```

@@ -259,15 +291,23 @@ curl http://${host_ip}:8000/v1/reranking\
  -H 'Content-Type: application/json'
 ```

-6. TGI Service
+6. LLM backend Service

 ```bash
+# TGI service
 curl http://${host_ip}:9009/generate \
  -X POST \
  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
  -H 'Content-Type: application/json'
 ```

+```bash
+# vLLM Service
+curl http://${your_ip}:9009/v1/completions \
+  -H "Content-Type: application/json" \
+  -d '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}'
+```
+
 7. LLM Microservice

 ```bash
@@ -309,9 +349,35 @@ curl -X POST "http://${host_ip}:6007/v1/dataprep" \

 This command updates a knowledge base by submitting a list of HTTP links for processing.

+Also, you are able to get the file list that you uploaded:
+
+```bash
+curl -X POST "http://${host_ip}:6008/v1/dataprep/get_file" \
+     -H "Content-Type: application/json"
+```
+
+To delete the file/link you uploaded:
+
+```bash
+# delete link
+curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
+     -d '{"file_path": "https://opea.dev"}' \
+     -H "Content-Type: application/json"
+
+# delete file
+curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
+     -d '{"file_path": "nke-10k-2023.pdf"}' \
+     -H "Content-Type: application/json"
+
+# delete all uploaded files and links
+curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
+     -d '{"file_path": "all"}' \
+     -H "Content-Type: application/json"
+```
+
 ## Enable LangSmith for Monotoring Application (Optional)

-LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f docker_compose.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key.
+LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f compose.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key.

 Here's how you can do it:

@@ -330,7 +396,7 @@ export LANGCHAIN_API_KEY=ls_...

 ## 🚀 Launch the UI

-To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `docker_compose.yaml` file as shown below:
+To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:

 ```yaml
  chaqna-gaudi-ui-server:
@@ -340,12 +406,30 @@ To access the frontend, open the following URL in your browser: http://{host_ip}
      - "80:5173"
 ```

-## 🚀 Launch the Conversational UI (react)
+## 🚀 Launch the Conversational UI (Optional)

-To access the Conversational UI frontend, open the following URL in your browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `docker_compose.yaml` file as shown below:
+To access the Conversational UI (react based) frontend, modify the UI service in the `compose.yaml` file. Replace `chaqna-gaudi-ui-server` service with the `chatqna-gaudi-conversation-ui-server` service as per the config below:

 ```yaml
-  chaqna-xeon-conversation-ui-server:
+chaqna-gaudi-conversation-ui-server:
+  image: opea/chatqna-conversation-ui:latest
+  container_name: chatqna-gaudi-conversation-ui-server
+  environment:
+    - no_proxy=${no_proxy}
+    - https_proxy=${https_proxy}
+    - http_proxy=${http_proxy}
+  ports:
+    - "5174:80"
+  depends_on:
+    - chaqna-gaudi-backend-server
+  ipc: host
+  restart: always
+```
+
+Once the services are up, open the following URL in your browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
+
+```yaml
+  chaqna-gaudi-conversation-ui-server:
    image: opea/chatqna-conversation-ui:latest
    ...
    ports:
--- a/ChatQnA/docker/xeon/README_qdrant.md
+++ b/ChatQnA/docker/xeon/README_qdrant.md
@@ -0,0 +1,393 @@
+# Build Mega Service of ChatQnA (with Qdrant) on Xeon
+
+This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service.
+
+## 🚀 Apply Xeon Server on AWS
+
+To apply a Xeon server on AWS, start by creating an AWS account if you don't have one already. Then, head to the [EC2 Console](https://console.aws.amazon.com/ec2/v2/home) to begin the process. Within the EC2 service, select the Amazon EC2 M7i or M7i-flex instance type to leverage the power of 4th Generation Intel Xeon Scalable processors. These instances are optimized for high-performance computing and demanding workloads.
+
+For detailed information about these instance types, you can refer to this [link](https://aws.amazon.com/ec2/instance-types/m7i/). Once you've chosen the appropriate instance type, proceed with configuring your instance settings, including network configurations, security groups, and storage options.
+
+After launching your instance, you can connect to it using SSH (for Linux instances) or Remote Desktop Protocol (RDP) (for Windows instances). From there, you'll have full access to your Xeon server, allowing you to install, configure, and manage your applications as needed.
+
+**Certain ports in the EC2 instance need to opened up in the security group, for the microservices to work with the curl commands**
+
+> See one example below. Please open up these ports in the EC2 instance based on the IP addresses you want to allow
+
+```
+qdrant-vector-db
+===============
+Port 6333 - Open to 0.0.0.0/0
+Port 6334 - Open to 0.0.0.0/0
+
+tei_embedding_service
+=====================
+Port 6006 - Open to 0.0.0.0/0
+
+embedding
+=========
+Port 6000 - Open to 0.0.0.0/0
+
+retriever
+=========
+Port 7000 - Open to 0.0.0.0/0
+
+tei_xeon_service
+================
+Port 8808 - Open to 0.0.0.0/0
+
+reranking
+=========
+Port 8000 - Open to 0.0.0.0/0
+
+tgi-service
+===========
+Port 9009 - Open to 0.0.0.0/0
+
+llm
+===
+Port 9000 - Open to 0.0.0.0/0
+
+chaqna-xeon-backend-server
+==========================
+Port 8888 - Open to 0.0.0.0/0
+
+chaqna-xeon-ui-server
+=====================
+Port 5173 - Open to 0.0.0.0/0
+```
+
+## 🚀 Build Docker Images
+
+First of all, you need to build Docker Images locally and install the python package of it.
+
+```bash
+git clone https://github.com/opea-project/GenAIComps.git
+cd GenAIComps
+```
+
+### 1. Build Embedding Image
+
+```bash
+docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile .
+```
+
+### 2. Build Retriever Image
+
+```bash
+docker build --no-cache -t opea/retriever-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/haystack/qdrant/docker/Dockerfile .
+```
+
+### 3. Build Rerank Image
+
+```bash
+docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/docker/Dockerfile .
+```
+
+### 4. Build LLM Image
+
+```bash
+docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
+```
+
+### 5. Build Dataprep Image
+
+```bash
+docker build --no-cache -t opea/dataprep-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/qdrant/docker/Dockerfile .
+cd ..
+```
+
+### 6. Build MegaService Docker Image
+
+To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:
+
+```bash
+git clone https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples/ChatQnA/docker
+docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+cd ../../..
+```
+
+### 7. Build UI Docker Image
+
+Build frontend Docker image via below command:
+
+```bash
+cd GenAIExamples/ChatQnA/docker/ui/
+docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
+cd ../../../..
+```
+
+### 8. Build Conversational React UI Docker Image (Optional)
+
+Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command:
+
+**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
+
+```bash
+cd GenAIExamples/ChatQnA/docker/ui/
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
+docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg DATAPREP_SERVICE_ENDPOINT=$DATAPREP_SERVICE_ENDPOINT --build-arg DATAPREP_GET_FILE_ENDPOINT=$DATAPREP_GET_FILE_ENDPOINT -f ./docker/Dockerfile.react .
+cd ../../../..
+```
+
+Then run the command `docker images`, you will have the following 7 Docker Images:
+
+1. `opea/dataprep-qdrant:latest`
+2. `opea/embedding-tei:latest`
+3. `opea/retriever-qdrant:latest`
+4. `opea/reranking-tei:latest`
+5. `opea/llm-tgi:latest`
+6. `opea/chatqna:latest`
+7. `opea/chatqna-ui:latest`
+
+## 🚀 Start Microservices
+
+### Setup Environment Variables
+
+Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
+
+**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
+
+> Change the External_Public_IP below with the actual IPV4 value
+
+```
+export host_ip="External_Public_IP"
+```
+
+**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable**
+
+> Change the Your_Huggingface_API_Token below with tyour actual Huggingface API Token value
+
+```
+export your_hf_api_token="Your_Huggingface_API_Token"
+```
+
+**Append the value of the public IP address to the no_proxy list**
+
+```
+export your_no_proxy=${your_no_proxy},"External_Public_IP"
+```
+
+```bash
+export no_proxy=${your_no_proxy}
+export http_proxy=${your_http_proxy}
+export https_proxy=${your_http_proxy}
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
+export QDRANT_HOST=${host_ip}
+export QDRANT_PORT=6333
+export INDEX_NAME="rag-qdrant"
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP=${host_ip}
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
+```
+
+Note: Please replace with `host_ip` with you external IP address, do not use localhost.
+
+### Start all the services Docker Containers
+
+> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file
+
+```bash
+cd GenAIExamples/ChatQnA/docker/xeon/
+docker compose up -d
+```
+
+### Validate Microservices
+
+1. TEI Embedding Service
+
+```bash
+curl ${host_ip}:6006/embed \
+    -X POST \
+    -d '{"inputs":"What is Deep Learning?"}' \
+    -H 'Content-Type: application/json'
+```
+
+2. Embedding Microservice
+
+```bash
+curl http://${host_ip}:6000/v1/embeddings\
+  -X POST \
+  -d '{"text":"hello"}' \
+  -H 'Content-Type: application/json'
+```
+
+3. Retriever Microservice  
+   To validate the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:
+
+```Python
+import random
+embedding = [random.uniform(-1, 1) for _ in range(768)]
+print(embedding)
+```
+
+Then substitute your mock embedding vector for the `${your_embedding}` in the following cURL command:
+
+```bash
+curl http://${host_ip}:7000/v1/retrieval \
+  -X POST \
+  -d '{"text":"What is the revenue of Nike in 2023?","embedding":"'"${your_embedding}"'"}' \
+  -H 'Content-Type: application/json'
+```
+
+4. TEI Reranking Service
+
+```bash
+curl http://${host_ip}:8808/rerank \
+    -X POST \
+    -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
+    -H 'Content-Type: application/json'
+```
+
+5. Reranking Microservice
+
+```bash
+curl http://${host_ip}:8000/v1/reranking\
+  -X POST \
+  -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
+  -H 'Content-Type: application/json'
+```
+
+6. TGI Service
+
+```bash
+curl http://${host_ip}:9009/generate \
+  -X POST \
+  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+  -H 'Content-Type: application/json'
+```
+
+7. LLM Microservice
+
+```bash
+curl http://${host_ip}:9000/v1/chat/completions\
+  -X POST \
+  -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+  -H 'Content-Type: application/json'
+```
+
+8. MegaService
+
+```bash
+curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
+     "messages": "What is the revenue of Nike in 2023?"
+     }'
+```
+
+9. Dataprep Microservice（Optional）
+
+If you want to update the default knowledge base, you can use the following commands:
+
+Update Knowledge Base via Local File Upload:
+
+```bash
+curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+     -H "Content-Type: multipart/form-data" \
+     -F "files=@./nke-10k-2023.pdf"
+```
+
+This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
+
+Add Knowledge Base via HTTP Links:
+
+```bash
+curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+     -H "Content-Type: multipart/form-data" \
+     -F 'link_list=["https://opea.dev"]'
+```
+
+This command updates a knowledge base by submitting a list of HTTP links for processing.
+
+Also, you are able to get the file list that you uploaded:
+
+```bash
+curl -X POST "http://${host_ip}:6008/v1/dataprep/get_file" \
+     -H "Content-Type: application/json"
+```
+
+To delete the file/link you uploaded:
+
+```bash
+# delete link
+curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
+     -d '{"file_path": "https://opea.dev"}' \
+     -H "Content-Type: application/json"
+
+# delete file
+curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
+     -d '{"file_path": "nke-10k-2023.pdf"}' \
+     -H "Content-Type: application/json"
+
+# delete all uploaded files and links
+curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
+     -d '{"file_path": "all"}' \
+     -H "Content-Type: application/json"
+```
+
+## Enable LangSmith for Monotoring Application (Optional)
+
+LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f compose.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key.
+
+Here's how you can do it:
+
+1. Install the latest version of LangSmith:
+
+```bash
+pip install -U langsmith
+```
+
+2. Set the necessary environment variables:
+
+```bash
+export LANGCHAIN_TRACING_V2=true
+export LANGCHAIN_API_KEY=ls_...
+```
+
+## 🚀 Launch the UI
+
+To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
+
+```yaml
+  chaqna-gaudi-ui-server:
+    image: opea/chatqna-ui:latest
+    ...
+    ports:
+      - "80:5173"
+```
+
+## 🚀 Launch the Conversational UI (react)
+
+To access the Conversational UI frontend, open the following URL in your browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
+
+```yaml
+  chaqna-xeon-conversation-ui-server:
+    image: opea/chatqna-conversation-ui:latest
+    ...
+    ports:
+      - "80:80"
+```
+
+![project-screenshot](../../assets/img/chat_ui_init.png)
+
+Here is an example of running ChatQnA:
+
+![project-screenshot](../../assets/img/chat_ui_response.png)
+
+Here is an example of running ChatQnA with Conversational UI (React):
+
+![project-screenshot](../../assets/img/conversation_ui_response.png)
--- a/ChatQnA/docker/xeon/compose.yaml
+++ b/ChatQnA/docker/xeon/compose.yaml
@@ -0,0 +1,195 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    ports:
+      - "6379:6379"
+      - "8001:8001"
+  dataprep-redis-service:
+    image: opea/dataprep-redis:latest
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "6007:6007"
+      - "6008:6008"
+      - "6009:6009"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+  tei-embedding-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-embedding-server
+    ports:
+      - "6006:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
+  embedding:
+    image: opea/embedding-tei:latest
+    container_name: embedding-tei-server
+    depends_on:
+      - tei-embedding-service
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-embedding-service"
+    restart: unless-stopped
+  retriever:
+    image: opea/retriever-redis:latest
+    container_name: retriever-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "7000:7000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-retriever-service"
+    restart: unless-stopped
+  tei-reranking-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-reranking-server
+    ports:
+      - "8808:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
+  reranking:
+    image: opea/reranking-tei:latest
+    container_name: reranking-tei-xeon-server
+    depends_on:
+      - tei-reranking-service
+    ports:
+      - "8000:8000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-reranking-service"
+    restart: unless-stopped
+  tgi-service:
+    image: ghcr.io/huggingface/text-generation-inference:2.1.0
+    container_name: tgi-service
+    ports:
+      - "9009:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+    command: --model-id ${LLM_MODEL_ID}
+  llm:
+    image: opea/llm-tgi:latest
+    container_name: llm-tgi-server
+    depends_on:
+      - tgi-service
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+    restart: unless-stopped
+  chaqna-xeon-backend-server:
+    image: opea/chatqna:latest
+    container_name: chatqna-xeon-backend-server
+    depends_on:
+      - redis-vector-db
+      - tei-embedding-service
+      - embedding
+      - retriever
+      - tei-reranking-service
+      - reranking
+      - tgi-service
+      - llm
+    ports:
+      - "8888:8888"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
+      - EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
+      - RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
+      - RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
+      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+    ipc: host
+    restart: always
+  chaqna-xeon-ui-server:
+    image: opea/chatqna-ui:latest
+    container_name: chatqna-xeon-ui-server
+    depends_on:
+      - chaqna-xeon-backend-server
+    ports:
+      - "5173:5173"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
+      - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
+      - GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
+      - DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
+    ipc: host
+    restart: always
+
+networks:
+  default:
+    driver: bridge
--- a/ChatQnA/docker/xeon/docker_compose_qdrant.yaml
+++ b/ChatQnA/docker/xeon/docker_compose_qdrant.yaml
@@ -0,0 +1,205 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  qdrant-vector-db:
+    image: qdrant/qdrant
+    container_name: qdrant-vector-db
+    ports:
+      - "6333:6333"
+      - "6334:6334"
+  dataprep-qdrant-service:
+    image: opea/dataprep-qdrant:latest
+    container_name: dataprep-qdrant-server
+    depends_on:
+      - qdrant-vector-db
+    ports:
+      - "6000:6000"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      QDRANT: ${host_ip}
+      QDRANT_PORT: 6333
+      COLLECTION_NAME: ${INDEX_NAME}
+  tei-embedding-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
+    container_name: tei-embedding-server
+    ports:
+      - "6006:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
+  embedding:
+    image: opea/embedding-tei:latest
+    container_name: embedding-tei-server
+    depends_on:
+      - tei-embedding-service
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-embedding-service"
+    restart: unless-stopped
+  retriever:
+    image: opea/retriever-qdrant:latest
+    container_name: retriever-qdrant-server
+    depends_on:
+      - qdrant-vector-db
+    ports:
+      - "7000:7000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      QDRANT_HOST: ${host_ip}
+      QDRANT_PORT: 6333
+      INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+    restart: unless-stopped
+  tei-reranking-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
+    container_name: tei-reranking-server
+    ports:
+      - "8808:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
+  reranking:
+    image: opea/reranking-tei:latest
+    container_name: reranking-tei-xeon-server
+    depends_on:
+      - tei-reranking-service
+    ports:
+      - "8000:8000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-reranking-service"
+    restart: unless-stopped
+  tgi-service:
+    image: ghcr.io/huggingface/text-generation-inference:2.1.0
+    container_name: tgi-service
+    ports:
+      - "9009:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+    command: --model-id ${LLM_MODEL_ID}
+  llm:
+    image: opea/llm-tgi:latest
+    container_name: llm-tgi-server
+    depends_on:
+      - tgi-service
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+    restart: unless-stopped
+  chaqna-xeon-backend-server:
+    image: opea/chatqna:latest
+    container_name: chatqna-xeon-backend-server
+    depends_on:
+      - qdrant-vector-db
+      - tei-embedding-service
+      - embedding
+      - retriever
+      - tei-reranking-service
+      - reranking
+      - tgi-service
+      - llm
+    ports:
+      - "8888:8888"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
+      - EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
+      - RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
+      - RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
+      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+    ipc: host
+    restart: always
+  chaqna-xeon-ui-server:
+    image: opea/chatqna-ui:latest
+    container_name: chatqna-xeon-ui-server
+    depends_on:
+      - chaqna-xeon-backend-server
+    ports:
+      - "5173:5173"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
+      - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
+      - GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
+      - DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
+    ipc: host
+    restart: always
+  chaqna-xeon-conversation-ui-server:
+    image: opea/chatqna-conversation-ui:latest
+    container_name: chatqna-xeon-conversation-ui-server
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+    ports:
+        - 5174:80
+    depends_on:
+        - chaqna-xeon-backend-server
+    ipc: host
+    restart: always
+
+networks:
+  default:
+    driver: bridge
--- a/ChatQnA/docker/xeon/docker_compose_vllm.yaml
+++ b/ChatQnA/docker/xeon/docker_compose_vllm.yaml
@@ -18,6 +18,8 @@ services:
      - redis-vector-db
    ports:
      - "6007:6007"
+      - "6008:6008"
+      - "6009:6009"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -109,27 +111,26 @@ services:
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-reranking-service"
    restart: unless-stopped
-  tgi_service:
-    image: ghcr.io/huggingface/text-generation-inference:1.4
-    container_name: tgi-service
+  vllm_service:
+    image: vllm:cpu
+    container_name: vllm-service
    ports:
      - "9009:80"
    volumes:
      - "./data:/data"
-    shm_size: 1g
+    shm_size: 128g
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-    command: --model-id ${LLM_MODEL_ID}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+    command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
  llm:
-    image: opea/llm-tgi:latest
-    container_name: llm-tgi-server
+    image: opea/llm-vllm:latest
+    container_name: llm-vllm-server
    depends_on:
-      - tgi_service
+      - vllm_service
    ports:
      - "9000:9000"
    ipc: host
@@ -137,8 +138,9 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      vLLM_ENDPOINT: ${vLLM_LLM_ENDPOINT}
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LLM_MODEL: ${LLM_MODEL_ID}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
@@ -155,7 +157,7 @@ services:
      - retriever
      - tei-reranking-service
      - reranking
-      - tgi_service
+      - vllm_service
      - llm
    ports:
      - "8888:8888"
@@ -183,6 +185,8 @@ services:
      - http_proxy=${http_proxy}
      - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
      - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
+      - GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
+      - DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
    ipc: host
    restart: always
  chaqna-xeon-conversation-ui-server:
--- a/ChatQnA/docker/xeon/set_env.sh
+++ b/ChatQnA/docker/xeon/set_env.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP=${host_ip}
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
--- a/ChatQnA/kubernetes/README.md
+++ b/ChatQnA/kubernetes/README.md
@@ -0,0 +1,106 @@
+# Deploy ChatQnA in Kubernetes Cluster on Xeon and Gaudi
+
+This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline components on Intel Xeon server and Gaudi machines.
+
+The ChatQnA Service leverages a Kubernetes operator called genai-microservices-connector(GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere.
+
+Install GMC  in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
+
+
+The ChatQnA application is defined as a Custom Resource (CR) file that the above GMC operator acts  upon. It first checks if the microservices listed in the CR yaml file are running, if not starts them and then proceeds to connect them. When the ChatQnA RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular `embedding`, `retriever`, `rerank`, and `llm`.
+
+
+## Using prebuilt images
+
+The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
+
+- redis-vector-db: redis/redis-stack:7.2.0-v9
+- tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+- embedding: opea/embedding-tei:latest
+- retriever: opea/retriever-redis:latest
+- tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+- reranking: opea/reranking-tei:latest
+- tgi-service: ghcr.io/huggingface/text-generation-inference:1.4
+- llm: opea/llm-tgi:latest
+- chaqna-xeon-backend-server: opea/chatqna:latest
+
+Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
+For Gaudi:
+
+- tei-embedding-service: opea/tei-gaudi:latest
+- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
+
+> [NOTE]  
+> Please refer to [Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker/xeon/README.md) or [Gaudi README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker/gaudi/README.md) to build the OPEA images. These too will be available on Docker Hub soon to simplify use.
+
+## Deploy ChatQnA pipeline
+This involves deploying the ChatQnA custom resource. You can use chatQnA_xeon.yaml or if you have a Gaudi cluster, you could use chatQnA_gaudi.yaml. 
+
+1. Create namespace and deploy application
+   ```sh
+   kubectl create ns chatqa
+   kubectl apply -f $(pwd)/chatQnA_xeon.yaml
+   ```
+
+2. GMC will reconcile the ChatQnA custom resource and get all related components/services ready. Check if the service up.
+
+   ```sh
+   kubectl get service -n chatqa
+   ```
+
+3. Retrieve the application access URL
+
+   ```sh
+   kubectl get gmconnectors.gmc.opea.io -n chatqa
+   NAME     URL                                                      READY     AGE
+   chatqa   http://router-service.chatqa.svc.cluster.local:8080      8/0/8     3m
+   ```
+
+4. Deploy a client pod to test the application
+
+   ```sh
+   kubectl create deployment client-test -n chatqa --image=python:3.8.13 -- sleep infinity
+   ```
+
+5. Access the application using the above URL from the client pod
+
+   ```sh
+   export CLIENT_POD=$(kubectl get pod -n chatqa -l app=client-test -o jsonpath={.items..metadata.name})
+   export accessUrl=$(kubectl get gmc -n chatqa -o jsonpath="{.items[?(@.metadata.name=='chatqa')].status.accessUrl}")
+   kubectl exec "$CLIENT_POD" -n chatqa -- curl $accessUrl  -X POST  -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
+   ```
+
+6. Perhaps you want to try another LLM model? Just modify the application custom resource to use another LLM model
+
+   Should you, for instance, want to change the LLM model you are using in the ChatQnA pipeline, just edit the custom resource file.
+   For example, to use Llama-2-7b-chat-hf make the following edit:
+
+
+   ```yaml
+   - name: Tgi
+     internalService:
+       serviceName: tgi-service-m
+       config:
+         LLM_MODEL_ID: Llama-2-7b-chat-hf
+   ```
+
+7. Apply the change
+   ```
+   kubectl apply -f $(pwd)/chatQnA_xeon.yaml
+   ```
+
+8. Check that the tgi-svc-deployment has been changed to use the new LLM Model
+
+   ```sh
+   kubectl get deployment tgi-service-m-deployment -n chatqa -o jsonpath="{.spec.template.spec.containers[*].env[?(@.name=='LLM_MODEL_ID')].value}"
+   ```
+
+9. Access the updated pipeline using the same URL from above using the client pod
+
+   ```sh
+   kubectl exec "$CLIENT_POD" -n chatqa -- curl $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
+   ```
+
+> [NOTE]
+
+You can remove your ChatQnA pipeline by executing standard Kubernetes kubectl commands to remove a custom resource. Verify it was removed by executing kubectl get pods in the chatqa namespace.
--- a/ChatQnA/kubernetes/chatQnA_gaudi.yaml
+++ b/ChatQnA/kubernetes/chatQnA_gaudi.yaml
@@ -23,6 +23,7 @@ spec:
          serviceName: embedding-svc
          config:
            endpoint: /v1/embeddings
+            TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc
      - name: TeiEmbeddingGaudi
        internalService:
          serviceName: tei-embedding-gaudi-svc
@@ -33,6 +34,8 @@ spec:
          serviceName: retriever-svc
          config:
            endpoint: /v1/retrieval
+            REDIS_URL: redis-vector-db
+            TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc
      - name: VectorDB
        internalService:
          serviceName: redis-vector-db
@@ -43,6 +46,7 @@ spec:
          serviceName: reranking-svc
          config:
            endpoint: /v1/reranking
+            TEI_RERANKING_ENDPOINT: tei-reranking-svc
      - name: TeiReranking
        internalService:
          serviceName: tei-reranking-svc
@@ -55,6 +59,7 @@ spec:
          serviceName: llm-svc
          config:
            endpoint: /v1/chat/completions
+            TGI_LLM_ENDPOINT: tgi-gaudi-svc
      - name: TgiGaudi
        internalService:
          serviceName: tgi-gaudi-svc
--- a/ChatQnA/kubernetes/chatQnA_switch_gaudi.yaml
+++ b/ChatQnA/kubernetes/chatQnA_switch_gaudi.yaml
@@ -0,0 +1,124 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: gmc.opea.io/v1alpha3
+kind: GMConnector
+metadata:
+  labels:
+    app.kubernetes.io/name: gmconnector
+    app.kubernetes.io/managed-by: kustomize
+    gmc/platform: gaudi
+  name: switch
+  namespace: switch
+spec:
+  routerConfig:
+    name: router
+    serviceName: router-service
+  nodes:
+    root:
+      routerType: Sequence
+      steps:
+      - name: Embedding
+        nodeName: node1
+      - name: Reranking
+        data: $response
+        internalService:
+          serviceName: reranking-svc
+          config:
+            endpoint: /v1/reranking
+            TEI_RERANKING_ENDPOINT: tei-reranking-svc
+      - name: TeiReranking
+        internalService:
+          serviceName: tei-reranking-svc
+          config:
+            endpoint: /rerank
+          isDownstreamService: true
+      - name: Llm
+        data: $response
+        nodeName: node2
+    node1:
+      routerType: Switch
+      steps:
+        - name: Embedding
+          condition: embedding-model-id==large
+          internalService:
+            serviceName: embedding-svc-large
+            config:
+              endpoint: /v1/embeddings
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge15
+        - name: Embedding
+          condition: embedding-model-id==small
+          internalService:
+            serviceName: embedding-svc-small
+            config:
+              endpoint: /v1/embeddings
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge-small
+        - name: TeiEmbeddingGaudi
+          internalService:
+            serviceName: tei-embedding-gaudi-svc-bge15
+            config:
+              MODEL_ID: BAAI/bge-base-en-v1.5
+            isDownstreamService: true
+        - name: TeiEmbeddingGaudi
+          internalService:
+            serviceName: tei-embedding-gaudi-svc-bge-small
+            config:
+              MODEL_ID: BAAI/bge-base-en-v1.5
+            isDownstreamService: true
+        - name: Retriever
+          condition: embedding-model-id==large
+          data: $response
+          internalService:
+            serviceName: retriever-svc-large
+            config:
+              endpoint: /v1/retrieval
+              REDIS_URL: redis-vector-db-large
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge15
+        - name: Retriever
+          condition: embedding-model-id==small
+          data: $response
+          internalService:
+            serviceName: retriever-svc-small
+            config:
+              endpoint: /v1/retrieval
+              REDIS_URL: redis-vector-db-small
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge-small
+        - name: VectorDB
+          internalService:
+            serviceName: redis-vector-db-large
+            isDownstreamService: true
+        - name: VectorDB
+          internalService:
+            serviceName: redis-vector-db-small
+            isDownstreamService: true
+    node2:
+      routerType: Switch
+      steps:
+        - name: Llm
+          condition: model-id==intel
+          internalService:
+            serviceName: llm-svc-intel
+            config:
+              endpoint: /v1/chat/completions
+              TGI_LLM_ENDPOINT: tgi-gaudi-service-intel
+        - name: Llm
+          condition: model-id==llama
+          internalService:
+            serviceName: llm-svc-llama
+            config:
+              endpoint: /v1/chat/completions
+              TGI_LLM_ENDPOINT: tgi-gaudi-service-llama
+        - name: TgiGaudi
+          internalService:
+            serviceName: tgi-gaudi-service-intel
+            config:
+              endpoint: /generate
+              MODEL_ID: Intel/neural-chat-7b-v3-3
+            isDownstreamService: true
+        - name: TgiGaudi
+          internalService:
+            serviceName: tgi-gaudi-service-llama
+            config:
+              endpoint: /generate
+              MODEL_ID: openlm-research/open_llama_3b
+            isDownstreamService: true
--- a/ChatQnA/kubernetes/chatQnA_switch_xeon.yaml
+++ b/ChatQnA/kubernetes/chatQnA_switch_xeon.yaml
@@ -0,0 +1,124 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: gmc.opea.io/v1alpha3
+kind: GMConnector
+metadata:
+  labels:
+    app.kubernetes.io/name: gmconnector
+    app.kubernetes.io/managed-by: kustomize
+    gmc/platform: xeon
+  name: switch
+  namespace: switch
+spec:
+  routerConfig:
+    name: router
+    serviceName: router-service
+  nodes:
+    root:
+      routerType: Sequence
+      steps:
+      - name: Embedding
+        nodeName: node1
+      - name: Reranking
+        data: $response
+        internalService:
+          serviceName: reranking-svc
+          config:
+            endpoint: /v1/reranking
+            TEI_RERANKING_ENDPOINT: tei-reranking-svc
+      - name: TeiReranking
+        internalService:
+          serviceName: tei-reranking-svc
+          config:
+            endpoint: /rerank
+          isDownstreamService: true
+      - name: Llm
+        data: $response
+        nodeName: node2
+    node1:
+      routerType: Switch
+      steps:
+        - name: Embedding
+          condition: embedding-model-id==large
+          internalService:
+            serviceName: embedding-svc-large
+            config:
+              endpoint: /v1/embeddings
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15
+        - name: Embedding
+          condition: embedding-model-id==small
+          internalService:
+            serviceName: embedding-svc-small
+            config:
+              endpoint: /v1/embeddings
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small
+        - name: TeiEmbedding
+          internalService:
+            serviceName: tei-embedding-svc-bge15
+            config:
+              MODEL_ID: BAAI/bge-base-en-v1.5
+            isDownstreamService: true
+        - name: TeiEmbedding
+          internalService:
+            serviceName: tei-embedding-svc-bge-small
+            config:
+              MODEL_ID: BAAI/bge-base-en-v1.5
+            isDownstreamService: true
+        - name: Retriever
+          condition: embedding-model-id==large
+          data: $response
+          internalService:
+            serviceName: retriever-svc-large
+            config:
+              endpoint: /v1/retrieval
+              REDIS_URL: redis-vector-db-large
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15
+        - name: Retriever
+          condition: embedding-model-id==small
+          data: $response
+          internalService:
+            serviceName: retriever-svc-small
+            config:
+              endpoint: /v1/retrieval
+              REDIS_URL: redis-vector-db-small
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small
+        - name: VectorDB
+          internalService:
+            serviceName: redis-vector-db-large
+            isDownstreamService: true
+        - name: VectorDB
+          internalService:
+            serviceName: redis-vector-db-small
+            isDownstreamService: true
+    node2:
+      routerType: Switch
+      steps:
+        - name: Llm
+          condition: model-id==intel
+          internalService:
+            serviceName: llm-svc-intel
+            config:
+              endpoint: /v1/chat/completions
+              TGI_LLM_ENDPOINT: tgi-service-intel
+        - name: Llm
+          condition: model-id==llama
+          internalService:
+            serviceName: llm-svc-llama
+            config:
+              endpoint: /v1/chat/completions
+              TGI_LLM_ENDPOINT: tgi-service-llama
+        - name: Tgi
+          internalService:
+            serviceName: tgi-service-intel
+            config:
+              endpoint: /generate
+              MODEL_ID: Intel/neural-chat-7b-v3-3
+            isDownstreamService: true
+        - name: Tgi
+          internalService:
+            serviceName: tgi-service-llama
+            config:
+              endpoint: /generate
+              MODEL_ID: bigscience/bloom-560m
+            isDownstreamService: true
--- a/ChatQnA/kubernetes/chatQnA_xeon.yaml
+++ b/ChatQnA/kubernetes/chatQnA_xeon.yaml
@@ -23,6 +23,7 @@ spec:
          serviceName: embedding-svc
          config:
            endpoint: /v1/embeddings
+            TEI_EMBEDDING_ENDPOINT: tei-embedding-svc
      - name: TeiEmbedding
        internalService:
          serviceName: tei-embedding-svc
@@ -33,6 +34,8 @@ spec:
          serviceName: retriever-svc
          config:
            endpoint: /v1/retrieval
+            REDIS_URL: redis-vector-db
+            TEI_EMBEDDING_ENDPOINT: tei-embedding-svc
      - name: VectorDB
        internalService:
          serviceName: redis-vector-db
@@ -43,6 +46,7 @@ spec:
          serviceName: reranking-svc
          config:
            endpoint: /v1/reranking
+            TEI_RERANKING_ENDPOINT: tei-reranking-svc
      - name: TeiReranking
        internalService:
          serviceName: tei-reranking-svc
@@ -55,6 +59,7 @@ spec:
          serviceName: llm-svc
          config:
            endpoint: /v1/chat/completions
+            TGI_LLM_ENDPOINT: tgi-service-m
      - name: Tgi
        internalService:
          serviceName: tgi-service-m
--- a/ChatQnA/kubernetes/manifests/README.md
+++ b/ChatQnA/kubernetes/manifests/README.md
@@ -1,105 +1,41 @@
-<h1 align="center" id="title">Deploy ChatQnA in Kubernetes Cluster on Xeon and Gaudi</h1>
-This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline components on Intel Xeon server and Gaudi machines.
-
-The ChatQnA Service leverages a Kubernetes operator called genai-microservices-connector(GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere.
-
-Please install GMC  in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
-
-
-The ChatQnA application is defined as a Custom Resource (CR) file that the above GMC operator acts  upon. It first checks if the microservices listed in the CR yaml file are running, if not starts them and then proceeds to connect them. When the ChatQnA RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular `embedding`, `retriever`, `rerank`, and `llm`.
-
-
-## Using prebuilt images
-
-The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
-
- redis-vector-db: redis/redis-stack:7.2.0-v9
- tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
- embedding: opea/embedding-tei:latest
- retriever: opea/retriever-redis:latest
- tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
- reranking: opea/reranking-tei:latest
- tgi_service: ghcr.io/huggingface/text-generation-inference:1.4
- llm: opea/llm-tgi:latest
- chaqna-xeon-backend-server: opea/chatqna:latest
-
-Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
-For Gaudi:
-
- tei-embedding-service: opea/tei-gaudi:latest
- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
-
-> [NOTE]  
-> Please refer to [Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker/xeon/README.md) or [Gaudi README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker/gaudi/README.md) to build the OPEA images. These too will be available on Docker Hub soon to simplify use.
-
-## Deploy ChatQnA pipeline
-This involves deploying the ChatQnA custom resource. You can use chatQnA_xeon.yaml or if you have a Gaudi cluster, you could use chatQnA_gaudi.yaml. 
-
-```sh
-kubectl create ns chatqa
-kubectl apply -f $(pwd)/chatQnA_xeon.yaml
-```
-
-**GMC will reconcile the ChatQnA custom resource and get all related components/services ready**
-
-```sh
-kubectl get service -n chatqa
-```
-
-**Obtain the ChatQnA custom resource/pipeline access URL**
-
-```sh
-kubectl get gmconnectors.gmc.opea.io -n chatqa
-NAME     URL                                                      READY     AGE
-chatqa   http://router-service.chatqa.svc.cluster.local:8080      8/0/8     3m
-```
-
-**Deploy a client pod to test the ChatQnA application**
-
-```sh
-kubectl create deployment client-test -n chatqa --image=python:3.8.13 -- sleep infinity
-```
-
-**Access the pipeline using the above URL from the client pod**
-
-```sh
-export CLIENT_POD=$(kubectl get pod -l app=client-test -o jsonpath={.items..metadata.name})
-export accessUrl=$(kubectl get gmc -n chatqa -o jsonpath="{.items[?(@.metadata.name=='chatqa')].status.accessUrl}")
-kubectl exec "$CLIENT_POD" -n chatqa -- curl $accessUrl  -X POST  -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
-```
-
-**Modify ChatQnA custom resource to use another LLM model**
-
-Should you, for instance, want to change the LLM model you are using in the ChatQnA pipeline, just edit the custom resource file.
-For example, to use Llama-2-7b-chat-hf make the following edit:
-
-
-
-```yaml
- name: Tgi
-  internalService:
-    serviceName: tgi-svc
-    config:
-      LLM_MODEL_ID: Llama-2-7b-chat-hf
-```
-
-Apply the change using
-```
-kubectl apply -f $(pwd)/chatQnA_xeon.yaml
-```
-
-**Check that the tgi-svc-deployment has been changed to use the new LLM Model**
-
-```sh
-kubectl get deployment tgi-svc-deployment -n chatqa -o jsonpath="{.spec.template.spec.containers[*].env[?(@.name=='LLM_MODEL_ID')].value}"
-```
-
-**Access the updated pipeline using the same URL frm above from within the client pod**
-
-```sh
-kubectl exec "$CLIENT_POD" -n chatqa -- curl $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
-```
+# Deploy ChatQnA in Kubernetes Cluster

 > [NOTE]
+> The following values must be set before you can deploy:
+> HUGGINGFACEHUB_API_TOKEN

-You can remove your ChatQnA pipeline by executing standard Kubernetes kubectl commands to remove a custom resource. Verify it was removed by executing kubectl get pods in the chatqa namespace.
+> You can also customize the "MODEL_ID" if needed.
+
+> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the ChatQnA workload is running. Otherwise, you need to modify the `chatqna.yaml` file to change the `model-volume` to a directory that exists on the node.
+
+## Deploy On Xeon
+
+```
+cd GenAIExamples/ChatQnA/kubernetes/manifests/xeon
+export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
+sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" chatqna.yaml
+kubectl apply -f chatqna.yaml
+```
+
+## Deploy On Gaudi
+
+```
+cd GenAIExamples/ChatQnA/kubernetes/manifests/gaudi
+export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
+sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" chatqna.yaml
+kubectl apply -f chatqna.yaml
+```
+
+## Verify Services
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/chatqna 8888:8888` to expose the ChatQnA service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:8888/v1/chatqna \
+    -H 'Content-Type: application/json' \
+    -d '{"messages": "What is the revenue of Nike in 2023?"}'
+```
--- a/ChatQnA/kubernetes/manifests/chaqna-xeon-backend-server.yaml
+++ b/ChatQnA/kubernetes/manifests/chaqna-xeon-backend-server.yaml
@@ -1,45 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: chaqna-xeon-backend-server-deploy
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: chaqna-xeon-backend-server-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: "true"
-      labels:
-        app: chaqna-xeon-backend-server-deploy
-    spec:
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/chatqna:latest
-        imagePullPolicy: IfNotPresent
-        name: chaqna-xeon-backend-server-deploy
-        args:
-        ports:
-        - containerPort: 8888
-      serviceAccountName: default
---
-kind: Service
-apiVersion: v1
-metadata:
-  name: chaqna-xeon-backend-server-svc
-spec:
-  type: NodePort
-  selector:
-    app: chaqna-xeon-backend-server-deploy
-  ports:
-    - name: service
-      port: 8888
-      targetPort: 8888
--- a/ChatQnA/kubernetes/manifests/docsum_gaudi_llm.yaml
+++ b/ChatQnA/kubernetes/manifests/docsum_gaudi_llm.yaml
@@ -1,74 +0,0 @@
-# Source: llm-uservice/charts/tgi/templates/service.yaml
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: Service
-metadata:
-  name: docsum-llm-uservice
-  labels:
-    helm.sh/chart: llm-uservice-0.1.0
-    app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: docsum
-    app.kubernetes.io/version: "1.0.0"
-    app.kubernetes.io/managed-by: Helm
-spec:
-  type: ClusterIP
-  ports:
-    - port: 9000
-      targetPort: 9000
-      protocol: TCP
-      name: llm-uservice
-  selector:
-    app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: docsum
---
-# Source: llm-uservice/templates/deployment.yaml
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: docsum-llm-uservice
-  labels:
-    helm.sh/chart: llm-uservice-0.1.0
-    app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: docsum
-    app.kubernetes.io/version: "1.0.0"
-    app.kubernetes.io/managed-by: Helm
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app.kubernetes.io/name: llm-uservice
-      app.kubernetes.io/instance: docsum
-  template:
-    metadata:
-      labels:
-        app.kubernetes.io/name: llm-uservice
-        app.kubernetes.io/instance: docsum
-    spec:
-      securityContext: {}
-      containers:
-        - name: docsum
-          envFrom:
-            - configMapRef:
-                name: qna-config
-          env:
-            - name: HUGGING_FACE_HUB_TOKEN
-              value: $(HUGGINGFACEHUB_API_TOKEN)
-            - name: HF_TOKEN
-              value: $(HUGGINGFACEHUB_API_TOKEN)
-            - name: LANGCHAIN_TRACING_V2
-              value: "false"
-            - name: LANGCHAIN_PROJECT
-              value: "opea-llm-service"
-          securityContext: {}
-          image: "opea/llm-docsum-tgi:latest"
-          imagePullPolicy: IfNotPresent
-          ports:
-            - name: llm-uservice
-              containerPort: 9000
-              protocol: TCP
-          resources: {}
--- a/ChatQnA/kubernetes/manifests/docsum_llm.yaml
+++ b/ChatQnA/kubernetes/manifests/docsum_llm.yaml
@@ -1,74 +0,0 @@
-# Source: llm-uservice/charts/tgi/templates/service.yaml
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: Service
-metadata:
-  name: docsum-llm-uservice
-  labels:
-    helm.sh/chart: llm-uservice-0.1.0
-    app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: docsum
-    app.kubernetes.io/version: "1.0.0"
-    app.kubernetes.io/managed-by: Helm
-spec:
-  type: ClusterIP
-  ports:
-    - port: 9000
-      targetPort: 9000
-      protocol: TCP
-      name: llm-uservice
-  selector:
-    app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: docsum
---
-# Source: llm-uservice/templates/deployment.yaml
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: docsum-llm-uservice
-  labels:
-    helm.sh/chart: llm-uservice-0.1.0
-    app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: docsum
-    app.kubernetes.io/version: "1.0.0"
-    app.kubernetes.io/managed-by: Helm
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app.kubernetes.io/name: llm-uservice
-      app.kubernetes.io/instance: docsum
-  template:
-    metadata:
-      labels:
-        app.kubernetes.io/name: llm-uservice
-        app.kubernetes.io/instance: docsum
-    spec:
-      securityContext: {}
-      containers:
-        - name: docsum
-          envFrom:
-            - configMapRef:
-                name: qna-config
-          env:
-            - name: HUGGING_FACE_HUB_TOKEN
-              value: $(HUGGINGFACEHUB_API_TOKEN)
-            - name: HF_TOKEN
-              value: $(HUGGINGFACEHUB_API_TOKEN)
-            - name: LANGCHAIN_TRACING_V2
-              value: "false"
-            - name: LANGCHAIN_PROJECT
-              value: "opea-llm-service"
-          securityContext: {}
-          image: "opea/llm-docsum-tgi:latest"
-          imagePullPolicy: IfNotPresent
-          ports:
-            - name: llm-uservice
-              containerPort: 9000
-              protocol: TCP
-          resources: {}
--- a/ChatQnA/kubernetes/manifests/gaudi/chatqna.yaml
+++ b/ChatQnA/kubernetes/manifests/gaudi/chatqna.yaml
--- a/ChatQnA/kubernetes/manifests/install_all_gaudi.sh
+++ b/ChatQnA/kubernetes/manifests/install_all_gaudi.sh
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-# Array of YAML file names
-yaml_files=("qna_configmap_gaudi" "redis-vector-db"  "tei_embedding_gaudi_service" "tei_reranking_service" "tgi_gaudi_service" "retriever" "embedding" "reranking" "llm" "chaqna-xeon-backend-server")
-for element in ${yaml_files[@]}
-do
-    echo "Applying manifest from ${element}.yaml"
-    kubectl apply -f "${element}.yaml"
-done
--- a/ChatQnA/kubernetes/manifests/install_all_xeon.sh
+++ b/ChatQnA/kubernetes/manifests/install_all_xeon.sh
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-# Array of YAML file names
-yaml_files=("qna_configmap_xeon" "redis-vector-db"  "tei_embedding_service" "tei_reranking_service" "tgi_service" "retriever" "embedding" "reranking" "llm" "chaqna-xeon-backend-server")
-for element in ${yaml_files[@]}
-do
-    echo "Applying manifest from ${element}.yaml"
-    kubectl apply -f "${element}.yaml"
-done
--- a/ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml
+++ b/ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml
@@ -1,21 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: qna-config
-data:
-  EMBEDDING_MODEL_ID: "BAAI/bge-base-en-v1.5"
-  RERANK_MODEL_ID: "BAAI/bge-reranker-base"
-  LLM_MODEL_ID: "Intel/neural-chat-7b-v3-3"
-  TEI_EMBEDDING_ENDPOINT: "http://tei-embedding-gaudi-svc.default.svc.cluster.local:6006"
-  TEI_RERANKING_ENDPOINT: "http://tei-reranking-svc.default.svc.cluster.local:8808"
-  TGI_LLM_ENDPOINT: "http://tgi-gaudi-svc.default.svc.cluster.local:9009"
-  REDIS_URL: "redis://redis-vector-db.default.svc.cluster.local:6379"
-  INDEX_NAME: "rag-redis"
-  HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-  EMBEDDING_SERVICE_HOST_IP: embedding-svc
-  RETRIEVER_SERVICE_HOST_IP: retriever-svc
-  RERANK_SERVICE_HOST_IP: reranking-svc
-  LLM_SERVICE_HOST_IP: llm-svc
--- a/Show More
+++ b/Show More