remove the examples not target v0.6

Signed-off-by: chensuyue <suyue.chen@intel.com>
Update the front end to adapt to the new return format (#237 )
2024-05-31 21:38:46 +08:00 · 2024-05-31 21:23:15 +08:00 · 2024-05-31 21:22:59 +08:00 · 2024-05-31 20:59:18 +08:00 · 2024-05-31 20:57:49 +08:00 · 2024-05-31 20:55:28 +08:00
392 changed files with 10521 additions and 4607 deletions
--- a/.github/license_template.txt
+++ b/.github/license_template.txt
@@ -1,13 +1,2 @@
-Copyright (c) 2024 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
+Copyright (C) 2024 Intel Corporation
+SPDX-License-Identifier: Apache-2.0
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -0,0 +1,23 @@
+## Description
+
+The summary of the proposed changes as long as the relevant motivation and context.
+
+## Issues
+
+List the issue or RFC link this PR is working on. If there is no such link, please mark it as `n/a`.
+
+## Type of change
+
+List the type of change like below. Please delete options that are not relevant.
+
+- [ ] Bug fix (non-breaking change which fixes an issue)
+- [ ] New feature (non-breaking change which adds new functionality)
+- [ ] Breaking change (fix or feature that would break existing design and interface)
+
+## Dependencies
+
+List the newly introduced 3rd party dependency if exists.
+
+## Tests
+
+Describe the tests that you ran to verify your changes.
--- a/.github/workflows/ChatQnA.yml
+++ b/.github/workflows/ChatQnA.yml
@@ -1,44 +0,0 @@
-name: ChatQnA-test
-
-on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
-    paths:
-      - ChatQnA/**
-      - "!**.md"
-      - .github/workflows/ChatQnA.yml
-  workflow_dispatch:
-
-# If there is a new commit, the previous jobs will be canceled
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  ChatQnA:
-    runs-on: gaudi2
-    strategy:
-      matrix:
-        job_name: ["langchain"]
-      fail-fast: false
-    steps:
-      - name: Clean Up Working Directory
-        run: sudo rm -rf ${{github.workspace}}/*
-
-      - name: Checkout out Repo
-        uses: actions/checkout@v4
-
-      - name: Run Test
-        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
-        run: |
-          cd ${{ github.workspace }}/ChatQnA/tests
-          bash test_${{ matrix.job_name }}_inference.sh
-
-      - name: Publish pipeline artifact
-        if: ${{ !cancelled() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.job_name }}
-          path: ${{ github.workspace }}/ChatQnA/tests/*.log
--- a/.github/workflows/CodeGen.yml
+++ b/.github/workflows/CodeGen.yml
@@ -1,44 +0,0 @@
-name: CodeGen-test
-
-on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
-    paths:
-      - CodeGen/**
-      - "!**.md"
-      - .github/workflows/CodeGen.yml
-  workflow_dispatch:
-
-# If there is a new commit, the previous jobs will be canceled
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  CodeGen:
-    runs-on: gaudi2
-    strategy:
-      matrix:
-        job_name: ["codegen"]
-      fail-fast: false
-    steps:
-      - name: Clean Up Working Directory
-        run: sudo rm -rf ${{github.workspace}}/*
-
-      - name: Checkout out Repo
-        uses: actions/checkout@v4
-
-      - name: Run Test
-        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
-        run: |
-          cd ${{ github.workspace }}/CodeGen/tests
-          bash test_${{ matrix.job_name }}_inference.sh
-
-      - name: Publish pipeline artifact
-        if: ${{ !cancelled() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.job_name }}
-          path: ${{ github.workspace }}/CodeGen/tests/*.log
--- a/.github/workflows/DocSum.yml
+++ b/.github/workflows/DocSum.yml
@@ -1,44 +0,0 @@
-name: DocSum-test
-
-on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
-    paths:
-      - DocSum/**
-      - "!**.md"
-      - .github/workflows/DocSum.yml
-  workflow_dispatch:
-
-# If there is a new commit, the previous jobs will be canceled
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  DocSum:
-    runs-on: gaudi2
-    strategy:
-      matrix:
-        job_name: ["langchain"]
-      fail-fast: false
-    steps:
-      - name: Clean Up Working Directory
-        run: sudo rm -rf ${{github.workspace}}/*
-
-      - name: Checkout out Repo
-        uses: actions/checkout@v4
-
-      - name: Run Test
-        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
-        run: |
-          cd ${{ github.workspace }}/DocSum/tests
-          bash test_${{ matrix.job_name }}_inference.sh
-
-      - name: Publish pipeline artifact
-        if: ${{ !cancelled() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.job_name }}
-          path: ${{ github.workspace }}/DocSum/tests/*.log
--- a/.github/workflows/E2E_test_with_compose.yml
+++ b/.github/workflows/E2E_test_with_compose.yml
@@ -0,0 +1,106 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: E2E test with docker compose
+
+on:
+  pull_request_target:
+    branches: [main]
+    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
+    paths:
+      - "**/docker/**"
+      - "**/tests/**"
+      - "**/ui/**"
+      - "!**.md"
+      - "!**.txt"
+      - .github/workflows/E2E_test_with_compose.yml
+  workflow_dispatch:
+
+# If there is a new commit, the previous jobs will be canceled
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  job1:
+    name: Get-test-matrix
+    runs-on: ubuntu-latest
+    outputs:
+      run_matrix: ${{ steps.get-test-matrix.outputs.run_matrix }}
+    steps:
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+        with:
+          ref: "refs/pull/${{ github.event.number }}/merge"
+          fetch-depth: 0
+
+      - name: Get test matrix
+        id: get-test-matrix
+        run: |
+          set -xe
+          merged_commit=$(git log -1 --format='%H')
+          changed_files=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${merged_commit} | \
+          grep -vE '.github|README.md|*.txt|deprecate|kubernetes|manifest')
+          examples=$(printf '%s\n' "${changed_files[@]}" | grep '/' | cut -d'/' -f1 | sort -u)
+          run_matrix="{\"include\":["
+          for example in ${examples}; do
+              run_hardware=""
+              if [ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | grep -c gaudi) != 0 ]; then run_hardware="gaudi"; fi
+              if [ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | grep -c xeon) != 0 ]; then run_hardware="xeon ${run_hardware}"; fi
+              if [ "$run_hardware" = "" ]; then run_hardware="xeon"; fi
+              for hw in ${run_hardware}; do
+                  run_matrix="${run_matrix}{\"example\":\"${example}\",\"hardware\":\"${hw}\"},"
+              done
+          done
+          run_matrix=$run_matrix"]}"
+          echo "run_matrix=${run_matrix}" >> $GITHUB_OUTPUT
+
+  Example-test:
+    needs: job1
+    strategy:
+      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
+    runs-on: ${{ matrix.hardware }}
+    continue-on-error: true
+    steps:
+      - name: Test example
+        run: |
+          echo "Matrix - example ${{ matrix.example }}, hardware ${{ matrix.hardware }}"
+
+      - name: Clean Up Working Directory
+        run: sudo rm -rf ${{github.workspace}}/*
+
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+        with:
+          ref: "refs/pull/${{ github.event.number }}/merge"
+
+      - name: Run test
+        env:
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          example: ${{ matrix.example }}
+          hardware: ${{ matrix.hardware }}
+        run: |
+          cd ${{ github.workspace }}/$example/tests
+          example_l=$(echo $example | tr '[:upper:]' '[:lower:]')
+          if [ -f test_${example_l}_on_${hardware}.sh ]; then timeout 30m bash test_${example_l}_on_${hardware}.sh; else echo "Test script not found, skip test!"; fi
+
+      - name: Clean up container
+        env:
+          example: ${{ matrix.example }}
+          hardware: ${{ matrix.hardware }}
+        if: cancelled() || failure()
+        run: |
+          cd ${{ github.workspace }}/$example/docker/$hardware
+          container_list=$(cat docker_compose.yaml | grep container_name | cut -d':' -f2)
+          for container_name in $container_list; do
+              cid=$(docker ps -aq --filter "name=$container_name")
+              if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+          done
+          echo y | docker system prune
+
+      - name: Publish pipeline artifact
+        if: ${{ !cancelled() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.example }}-${{ matrix.hardware }}
+          path: ${{ github.workspace }}/${{ matrix.example }}/tests/*.log
--- a/.github/workflows/SearchQnA.yml
+++ b/.github/workflows/SearchQnA.yml
@@ -1,47 +0,0 @@
-name: SearchQnA-test
-
-on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
-    paths:
-      - SearchQnA/**
-      - "!**.md"
-      - .github/workflows/SearchQnA.yml
-  workflow_dispatch:
-
-# If there is a new commit, the previous jobs will be canceled
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  SearchQnA:
-    runs-on: gaudi2
-    strategy:
-      matrix:
-        job_name: ["langchain"]
-      fail-fast: false
-    steps:
-      - name: Clean Up Working Directory
-        run: sudo rm -rf ${{github.workspace}}/*
-
-      - name: Checkout out Repo
-        uses: actions/checkout@v4
-
-      - name: Run Test
-        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
-          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
-          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
-          AISE_GAUDI_00_IP: ${{ secrets.AISE_GAUDI_00_IP }}
-        run: |
-          cd ${{ github.workspace }}/SearchQnA/tests
-          bash test_${{ matrix.job_name }}_inference.sh
-
-      - name: Publish pipeline artifact
-        if: ${{ !cancelled() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.job_name }}
-          path: ${{ github.workspace }}/SearchQnA/tests/*.log
--- a/.github/workflows/VisualQnA.yml
+++ b/.github/workflows/VisualQnA.yml
@@ -1,44 +0,0 @@
-name: VisualQnA-test
-
-on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
-    paths:
-      - VisualQnA/**
-      - "!**.md"
-      - .github/workflows/VisualQnA.yml
-  workflow_dispatch:
-
-# If there is a new commit, the previous jobs will be canceled
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  VisualQnA:
-    runs-on: gaudi2
-    strategy:
-      matrix:
-        job_name: ["basic"]
-      fail-fast: false
-    steps:
-      - name: Clean Up Working Directory
-        run: sudo rm -rf ${{github.workspace}}/*
-
-      - name: Checkout out Repo
-        uses: actions/checkout@v4
-
-      - name: Run Test
-        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
-        run: |
-          cd ${{ github.workspace }}/VisualQnA/tests
-          bash test_${{ matrix.job_name }}_inference.sh
-
-      - name: Publish pipeline artifact
-        if: ${{ !cancelled() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.job_name }}
-          path: ${{ github.workspace }}/VisualQnA/tests/*.log
--- a/.github/workflows/format-scan.yml
+++ b/.github/workflows/format-scan.yml
@@ -1,4 +1,7 @@
-name: Format Scan
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Code Scan

 on:
  pull_request:
@@ -15,17 +18,17 @@ concurrency:

 env:
  DOCKER_CONFIG_NAME: "commonDockerConfig"
-  REPO_NAME: "format-scan"
+  REPO_NAME: "code-scan"
  REPO_TAG: "1.0"
-  DOCKER_FILE_NAME: "format-scan"
-  CONTAINER_NAME: "format-scan"
+  DOCKER_FILE_NAME: "code-scan"
+  CONTAINER_NAME: "code-scan"

 jobs:
-  format-scan:
+  code-scan:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        job_name: ["bandit"]
+        job_name: ["bandit", "hadolint"]
      fail-fast: false
    steps:
      - name: Checkout out Repo
@@ -48,11 +51,11 @@ jobs:
      - name: Code scan check
        run: |
          docker exec ${{ env.CONTAINER_NAME }} \
-          bash -c "bash /GenAIExamples/.github/workflows/scripts/formatScan/${{ matrix.job_name }}.sh"
+          bash -c "bash /GenAIExamples/.github/workflows/scripts/codeScan/${{ matrix.job_name }}.sh"

      - name: Publish pipeline artifact
        if: ${{ !cancelled() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.job_name }}
-          path: ${{ github.workspace }}/.github/workflows/scripts/formatScan/${{ matrix.job_name }}.*
+          path: ${{ github.workspace }}/.github/workflows/scripts/codeScan/${{ matrix.job_name }}.*
--- a/.github/workflows/container-build.yml
+++ b/.github/workflows/container-build.yml
@@ -0,0 +1,44 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Container Build
+permissions: read-all
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "35 1 * * 5"
+jobs:
+  # https://github.com/intel/ai-containers/blob/main/.github/action.yml
+  build-containers:
+    runs-on: docker
+    env:
+      REGISTRY: ${{ secrets.REGISTRY }}
+      REPO: ${{ secrets.REPO }}
+    steps:
+      - uses: step-security/harden-runner@v2
+        with:
+          egress-policy: audit
+      - uses: actions/checkout@v4
+      - uses: docker/login-action@v3
+        with:
+          registry: ${{ secrets.REGISTRY }}
+          username: ${{ secrets.REGISTRY_USER }}
+          password: ${{ secrets.REGISTRY_TOKEN }}
+      - name: Build Containers
+        run: |
+          docker compose -p ${GITHUB_RUN_NUMBER} build --no-cache
+        working-directory: .github/workflows/docker
+      - name: Print Containers to Summary
+        run: |
+          docker compose -p ${GITHUB_RUN_NUMBER} images --format json | jq -r --arg registry "$REGISTRY" '.[] | select(.Repository | contains($registry)) | .Tag' >> $GITHUB_STEP_SUMMARY
+      - name: Push Containers
+        run: |
+          docker compose -p ${GITHUB_RUN_NUMBER} push
+        working-directory: .github/workflows/docker
+      - name: Un-Tag Containers
+        run: |
+          docker compose -p ${GITHUB_RUN_NUMBER} down --rmi all
+        working-directory: .github/workflows/docker
+      - name: Remove Containers
+        if: always()
+        run: docker system prune --force
--- a/.github/workflows/docker/code-scan.dockerfile
+++ b/.github/workflows/docker/code-scan.dockerfile
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+ARG UBUNTU_VER=22.04
+FROM ubuntu:${UBUNTU_VER} as devel
+
+ENV LANG C.UTF-8
+
+RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
+    aspell \
+    aspell-en \
+    build-essential \
+    python3 \
+    python3-pip \
+    python3-dev \
+    python3-distutils \
+    wget
+
+RUN ln -sf $(which python3) /usr/bin/python
+
+RUN python -m pip install --no-cache-dir bandit==1.7.8
+RUN wget -O /bin/hadolint https://github.com/hadolint/hadolint/releases/download/v2.12.0/hadolint-Linux-x86_64
+RUN chmod +x /bin/hadolint
+
+WORKDIR /
--- a/.github/workflows/docker/docker-compose.yaml
+++ b/.github/workflows/docker/docker-compose.yaml
@@ -0,0 +1,29 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  chatqna-megaservice-server:
+    build:
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+        no_proxy: ${no_proxy}
+      context: ../../../ChatQnA/microservice/xeon
+      dockerfile: docker/Dockerfile
+    image: ${REGISTRY}/${REPO}:chatqna-megaservice-server
+    pull_policy: always
+  chatqna-ui-server:
+    build:
+      context: ../../../ChatQnA/ui
+    extends: chatqna-megaservice-server
+    image: ${REGISTRY}/${REPO}:chatqna-ui-server
+  codegen-megaservice-server:
+    build:
+      context: ../../../CodeGen/microservice/xeon
+    extends: chatqna-megaservice-server
+    image: ${REGISTRY}/${REPO}:codegen-megaservice-server
+  codegen-ui-server:
+    build:
+      context: ../../../CodeGen/ui
+    extends: chatqna-megaservice-server
+    image: ${REGISTRY}/${REPO}:codegen-ui-server
--- a/.github/workflows/docker/format-scan.dockerfile
+++ b/.github/workflows/docker/format-scan.dockerfile
@@ -1,36 +0,0 @@
-#
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-ARG UBUNTU_VER=22.04
-FROM ubuntu:${UBUNTU_VER} as devel
-
-ENV LANG C.UTF-8
-
-RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
-    aspell \
-    aspell-en \
-    build-essential \
-    python3 \
-    python3-pip \
-    python3-dev \
-    python3-distutils \
-    wget
-
-RUN ln -sf $(which python3) /usr/bin/python
-
-RUN python -m pip install --no-cache-dir pylint==2.12.1\
-    bandit
-
-WORKDIR /
--- a/.github/workflows/manifest-e2e.yaml
+++ b/.github/workflows/manifest-e2e.yaml
@@ -0,0 +1,127 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: E2E test with manifests
+
+on:
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
+    paths:
+      - "**/kubernetes/manifests/**"
+      - "**/tests/**"
+      - "!**.md"
+      - "!**.txt"
+      - .github/workflows/manifest-e2e.yml
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  job1:
+    name: Get-test-matrix
+    runs-on: ubuntu-latest
+    outputs:
+      run_matrix: ${{ steps.get-test-matrix.outputs.run_matrix }}
+    steps:
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Get test matrix
+        id: get-test-matrix
+        run: |
+          set -xe
+          changed_files="$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} | \
+          grep "/kubernetes/manifests/" | \
+          grep -vE '.github|deprecated|docker')" || true
+          examples=$(printf '%s\n' "${changed_files[@]}" | grep '/' | cut -d'/' -f1 | sort -u)
+          run_matrix="{\"include\":["
+          for example in ${examples}; do
+              run_hardware=""
+              if [ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | grep -c gaudi) != 0 ]; then run_hardware="gaudi"; fi
+              if [ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | grep -c xeon) != 0 ]; then run_hardware="xeon ${run_hardware}"; fi
+              if [[ -z "$run_hardware" ]]; then run_hardware="xeon"; fi
+              for hw in ${run_hardware}; do
+                  if [ $hw = "gaudi" ]; then
+                      continue # skip gaudi for K8s test temporarily
+                  else
+                      #lower_example=$(echo "${example}" | tr '[:upper:]' '[:lower:]')
+                      run_matrix="${run_matrix}{\"example\":\"${example}\",\"hardware\":\"inspur-icx-1\"},"
+                  fi
+              done
+          done
+          run_matrix=$run_matrix"]}"
+          echo "run_matrix=${run_matrix}" >> $GITHUB_OUTPUT
+
+  manifest-test:
+    needs: job1
+    if: always() && ${{ needs.job1.outputs.run_matrix.include.length }} > 0
+    strategy:
+      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
+    runs-on: ${{ matrix.hardware }}
+    continue-on-error: true
+    steps:
+      - name: E2e test manifest
+        run: |
+          echo "Matrix - manifest: ${{ matrix.example }}"
+
+      - name: Clean Up Working Directory
+        run: sudo rm -rf ${{github.workspace}}/*
+
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set variables
+        run: |
+          lower_example=$(echo "${{ matrix.example }}" | tr '[:upper:]' '[:lower:]')
+          echo "NAMESPACE=$lower_example-$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
+          echo "ROLLOUT_TIMEOUT_SECONDS=1800s" >> $GITHUB_ENV
+          echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
+          echo "should_cleanup=false" >> $GITHUB_ENV
+          echo "skip_validate=false" >> $GITHUB_ENV
+          echo "NAMESPACE=$NAMESPACE"
+
+      - name: Initialize manifest testing
+        run: |
+          ${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_xeon.sh init_${{ matrix.example }}
+
+      - name: Kubectl install
+        id: install
+        run: |
+          echo "should_cleanup=true" >> $GITHUB_ENV
+          kubectl create ns $NAMESPACE
+          ${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_xeon.sh install_${{ matrix.example }} $NAMESPACE
+          echo "Testing ${{ matrix.example }}, waiting for pod ready..."
+          if kubectl rollout status deployment --namespace "$NAMESPACE" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
+            echo "Testing manifests ${{ matrix.example }}, waiting for pod ready done!"
+          else
+            echo "Timeout waiting for pods in namespace $NAMESPACE to be ready!"
+            echo "skip_validate=true" >> $GITHUB_ENV
+            exit 1
+          fi
+          sleep 60
+
+      - name: Validate e2e test
+        if: always()
+        run: |
+          if $skip_validate; then
+            echo "Skip validate"
+          else
+            ${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_xeon.sh validate_${{ matrix.example }} $NAMESPACE
+          fi
+
+      - name: Kubectl uninstall
+        if: always()
+        run: |
+          if $should_cleanup; then
+            if ! kubectl delete ns $NAMESPACE --timeout=$KUBECTL_TIMEOUT_SECONDS; then
+              kubectl delete pods --namespace $NAMESPACE --force --grace-period=0 --all
+              kubectl delete ns $NAMESPACE --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
+            fi
+          fi
--- a/.github/workflows/manifest-validate.yaml
+++ b/.github/workflows/manifest-validate.yaml
@@ -0,0 +1,54 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Manifests Validate
+
+on:
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
+    paths:
+      - "**/kubernetes/manifests/**"
+      - .github/workflows/manifest-validate.yaml
+  workflow_dispatch:
+
+# If there is a new commit, the previous jobs will be canceled
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+env:
+  MANIFEST_DIR: "manifests"
+
+jobs:
+  manifests-validate:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: changed files
+        id: changed_files
+        run: |
+          set -xe
+          changed_folder=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} | \
+          grep "kubernetes/manifests" | grep -vE '.github|README.md|*.txt|*.sh' | cut -d'/' -f1 | sort -u )
+          echo "changed_folder: $changed_folder"
+          if [ -z "$changed_folder" ]; then
+              echo "No changes in manifests folder"
+              echo "SKIP=true" >> $GITHUB_OUTPUT
+              exit 0
+          fi
+          echo "SKIP=false" >> $GITHUB_OUTPUT
+          for folder in $changed_folder; do
+              folder_str="$folder_str $folder/kubernetes/manifests/"
+          done
+          echo "folder_str=$folder_str"
+          echo "folder_str=$folder_str" >> $GITHUB_ENV
+
+      - uses: docker://ghcr.io/yannh/kubeconform:latest
+        if: steps.changed_files.outputs.SKIP == 'false'
+        with:
+          args: "-summary -output json ${{env.folder_str}}"
--- a/.github/workflows/scripts/formatScan/bandit.sh
+++ b/.github/workflows/scripts/formatScan/bandit.sh
@@ -1,7 +1,10 @@
 #!/bin/bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 source /GenAIExamples/.github/workflows/scripts/change_color
-pip install bandit==1.7.8
-log_dir=/GenAIExamples/.github/workflows/scripts/formatScan
+log_dir=/GenAIExamples/.github/workflows/scripts/codeScan
 python -m bandit -r -lll -iii /GenAIExamples >${log_dir}/bandit.log
 exit_code=$?

@@ -10,7 +13,7 @@ cat ${log_dir}/bandit.log
 $BOLD_YELLOW && echo " -----------------  Current log file output end --------------------------" && $RESET

 if [ ${exit_code} -ne 0 ]; then
-    $BOLD_RED && echo "Error!! Please Click on the artifact button to download and view Bandit error details." && $RESET
+    $BOLD_RED && echo "Error!! Please Click on the artifact button to download and check error details." && $RESET
    exit 1
 fi

--- a/.github/workflows/scripts/codeScan/hadolint.sh
+++ b/.github/workflows/scripts/codeScan/hadolint.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+source /GenAIExamples/.github/workflows/scripts/change_color
+log_dir=/GenAIExamples/.github/workflows/scripts/codeScan
+
+find . -type f \( -name "Dockerfile*" \) -print -exec hadolint --ignore DL3006 --ignore DL3007 --ignore DL3008 {} \; 2>&1 | tee ${log_dir}/hadolint.log
+
+if [[ $(grep -c "error" ${log_dir}/hadolint.log) != 0 ]]; then
+    $BOLD_RED && echo "Error!! Please Click on the artifact button to download and check error details." && $RESET
+    exit 1
+fi
+
+$BOLD_PURPLE && echo "Congratulations, Hadolint check passed!" && $LIGHT_PURPLE && echo " You can click on the artifact button to see the log details." && $RESET
+exit 0
--- a/.github/workflows/scripts/codeScan/trellix.sh
+++ b/.github/workflows/scripts/codeScan/trellix.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+source ${workspace}/.github/workflows/scripts/change_color
+log_dir=${workspace}/.github/workflows/scripts/codeScan
+
+
+echo "---Updating definition (DAT) files ---"
+DEFS_URL=https://update.nai.com/products/commonupdater/current/vscandat1000/dat/0000
+echo "Finding latest defs at $DEFS_URL/avvdat.ini..." \
+ && wget -q $DEFS_URL/avvdat.ini \
+ && echo "SUCCESS" || fail
+
+inifile="avvdat.ini"
+filename=`awk -F"=" '$2 ~ /avvdat.*zip/ { print $2 } ' $inifile`
+filename2="$(echo -e "${filename}" | tr -d '[:space:]')"
+
+if [ -z "$filename2" ]
+then
+      echo "Cannot get defs information from INI file:"
+      cat $inifile
+      fail
+fi
+
+echo "Downloading latest defs from $DEFS_URL/$filename2..." \
+ && wget -q $DEFS_URL/$filename2 \
+ && echo "SUCCESS" || fail
+
+echo "Extracting latest defs..." \
+ && unzip -o $filename2 -d /usr/local/uvscan \
+ && echo "SUCCESS" || fail
+
+echo "--- Scanning ---"
+ENV_SCAN_OPTS="--analyze --mime --program --recursive --unzip --threads 4 --summary --verbose --html=${workspace}/.github/workflows/scripts/codeScan/report.html"
+echo "Scan Options: $ENV_SCAN_OPTS"
+
+rm -r ${workspace}/avvdat*
+rm -r ${workspace}/.git
+uvscan $ENV_SCAN_OPTS ${workspace} 2>&1 | tee ${log_dir}/trellix.log
+
+
+if [[ $(grep "Possibly Infected" ${log_dir}/trellix.log | sed 's/[^0-9]//g') != 0 ]]; then
+    $BOLD_RED && echo "Error!! Please Click on the artifact button to download and check error details." && $RESET
+    exit 1
+fi
+
+$BOLD_PURPLE && echo "Congratulations, Trellix Scan passed!" && $LIGHT_PURPLE && echo " You can click on the artifact button to see the log details." && $RESET
+exit 0
--- a/.github/workflows/trellix.yml
+++ b/.github/workflows/trellix.yml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Trellix Command Line Scanner
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "35 1 * * 6"
+
+jobs:
+  Trellix:
+    runs-on: trellix
+    steps:
+      - name: Clean Up Working Directory
+        run: sudo rm -rf ${{github.workspace}}/*
+
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+
+      - name: Run Trellix Scanner
+        env:
+          workspace: ${{ github.workspace }}
+        run: bash .github/workflows/scripts/codeScan/trellix.sh
+
+      - name: Publish pipeline artifact
+        if: ${{ !cancelled() }}
+        uses: actions/upload-artifact@v4
+        with:
+          path: ${{ github.workspace }}/.github/workflows/scripts/codeScan/report.html
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,8 @@
 **/node_modules
-**/.svelte-kit
+**/.svelte-kit
+**/package-lock.json
+**/playwright-report/
+**/playwright/.cache/
+**/test-results/
+
+__pycache__/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 ci:
  autofix_prs: true
  autoupdate_schedule: quarterly
@@ -11,10 +14,12 @@ repos:
      - id: check-json
        exclude: |
          (?x)^(
-              ChatQnA/ui/tsconfig.json|
-              SearchQnA/ui/tsconfig.json
+              ChatQnA/docker/ui/svelte/tsconfig.json|
+              SearchQnA/ui/svelte/tsconfig.json|
+              DocSum/docker/ui/svelte/tsconfig.json
          )$
      - id: check-yaml
+        args: [--allow-multiple-documents]
      - id: debug-statements
      - id: requirements-txt-fixer
      - id: trailing-whitespace
@@ -24,40 +29,40 @@ repos:
    rev: v1.5.5
    hooks:
      - id: insert-license
-        files: |
-          (?x)^(
-            (ChatQnA|CodeGen|DocSum|SearchQnA|VisualQnA)/.*(py|yaml|yml|sh)|
-          )$
+        files: (Dockerfile)$
        args:
          [
            --license-filepath=.github/license_template.txt,
            --use-current-year,
-            --detect-license-in-X-top-lines=40,
+            --detect-license-in-X-top-lines=5,
            --skip-license-insertion-comment=Copyright,
          ]
      - id: insert-license
-        files: |
-          (?x)^(
-            (ChatQnA|CodeGen|DocSum|SearchQnA|VisualQnA)/.*(ts|js)|
-          )$
+        files: (.*\.(py|yaml|yml|sh))$
        args:
          [
            --license-filepath=.github/license_template.txt,
            --use-current-year,
-            --detect-license-in-X-top-lines=40,
+            --detect-license-in-X-top-lines=5,
+            --skip-license-insertion-comment=Copyright,
+          ]
+      - id: insert-license
+        files: (.*\.(ts|js))$
+        args:
+          [
+            --license-filepath=.github/license_template.txt,
+            --use-current-year,
+            --detect-license-in-X-top-lines=5,
            --skip-license-insertion-comment=Copyright,
            --comment-style=//,
          ]
      - id: insert-license
-        files: |
-          (?x)^(
-            (ChatQnA|CodeGen|DocSum|SearchQnA|VisualQnA)/.*(html|svelte)|
-          )$
+        files: (.*\.(html|svelte))$
        args:
          [
            --license-filepath=.github/license_template.txt,
            --use-current-year,
-            --detect-license-in-X-top-lines=40,
+            --detect-license-in-X-top-lines=5,
            --skip-license-insertion-comment=Copyright,
            --comment-style=<!--| |-->,
          ]
@@ -90,7 +95,7 @@ repos:
    hooks:
      - id: prettier
        args: [--print-width=120]
-        types_or: [yaml, markdown, html, css, scss, javascript, json, ts, shell, sh]
+        types_or: [markdown, html, css, scss, javascript, json, ts, shell, sh]
        additional_dependencies:
          - prettier@3.2.5

--- a/.prettierignore
+++ b/.prettierignore
@@ -0,0 +1 @@
+**/kubernetes/
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -59,8 +59,7 @@ representative at an online or offline event.
 ## Enforcement

 Instances of abusive, harassing, or otherwise unacceptable behavior may be
-reported to the community leaders responsible for enforcement at
-CommunityCodeOfConduct AT intel DOT com.
+reported to the community leaders.
 All complaints will be reviewed and investigated promptly and fairly.

 All community leaders are obligated to respect the privacy and security of the
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,57 +1,53 @@
 # Contributing

-### License
+## License

-Intel Generative AI Examples is licensed under the terms in [LICENSE](/LICENSE). By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms.
+Generative AI Examples is licensed under the terms in [LICENSE](/LICENSE).
+By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms.

-### Sign your work
+## Create Pull Request

-Please use the sign-off line at the end of the patch. Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch. The rules are pretty simple: if you can certify
-the below (from [developercertificate.org](http://developercertificate.org/)):
+If you have improvements to Generative AI Examples, send your pull requests for
+[review](https://github.com/opea-project/GenAIExamples/pulls).
+If you are new to GitHub, view the pull request [How To](https://help.github.com/articles/using-pull-requests/).

-```
-Developer Certificate of Origin
-Version 1.1
+### Step-by-Step guidelines

-Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
-660 York Street, Suite 102,
-San Francisco, CA 94110 USA
+- Star this repository using the button `Star` in the top right corner.
+- Fork this Repository using the button `Fork` in the top right corner.
+- Clone your forked repository to your pc.
+  `git clone "url to your repo"`
+- Create a new branch for your modifications.
+  `git checkout -b new-branch`
+- Add your files with `git add -A`, commit with `git commit -s -m "This is my commit message"` and push `git push origin new-branch`.
+- Create a [pull request](https://github.com/opea-project/GenAIExamples/pulls).

-Everyone is permitted to copy and distribute verbatim copies of this
-license document, but changing it is not allowed.
+## Pull Request Template

-Developer's Certificate of Origin 1.1
+See [PR template](/.github/pull_request_template.md)

-By making a contribution to this project, I certify that:
+## Pull Request Acceptance Criteria

-(a) The contribution was created in whole or in part by me and I
-    have the right to submit it under the open source license
-    indicated in the file; or
+- At least two approvals from reviewers

-(b) The contribution is based upon previous work that, to the best
-    of my knowledge, is covered under an appropriate open source
-    license and I have the right under that license to submit that
-    work with modifications, whether created in whole or in part
-    by me, under the same open source license (unless I am
-    permitted to submit under a different license), as indicated
-    in the file; or
+- All detected status checks pass

-(c) The contribution was provided directly to me by some other
-    person who certified (a), (b) or (c) and I have not modified
-    it.
+- All conversations solved

-(d) I understand and agree that this project and the contribution
-    are public and that a record of the contribution (including all
-    personal information I submit with it, including my sign-off) is
-    maintained indefinitely and may be redistributed consistent with
-    this project or the open source license(s) involved.
-```
+- Third-party dependency license compatible

-Then you just add a line to every git commit message:
+## Pull Request Status Checks Overview

-    Signed-off-by: Joe Smith <joe.smith@email.com>
+Generative AI Examples use [Actions](https://github.com/opea-project/GenAIExamples/actions) for CI test.
+| Test Name | Test Scope | Test Pass Criteria |
+|-------------------------------|-----------------------------------------------|---------------------------|
+| Security Scan | Dependabot/Bandit | PASS |
+| Format Scan | pre-commit.ci | PASS |
+| Examples Test | Cases under Examples/tests folder | PASS |
+| DCO | Use `git commit -s` to sign off | PASS |

-Use your real name (sorry, no pseudonyms or anonymous contributions.)
+> Notes: [Developer Certificate of Origin (DCO)](https://en.wikipedia.org/wiki/Developer_Certificate_of_Origin), you must agree to the terms of Developer Certificate of Origin by signing off each of your commits with `-s`, e.g. `git commit -s -m 'This is my commit message'`.

-If you set your `user.name` and `user.email` git configs, you can sign your
-commit automatically with `git commit -s`.
+## Support
+
+Submit your questions, feature requests, and bug reports to the [GitHub issues](https://github.com/opea-project/GenAIExamples/issues) page.
--- a/ChatQnA/README.md
+++ b/ChatQnA/README.md
@@ -1,271 +1,27 @@
 # ChatQnA Application

-Chatbots are the most widely adopted use case for leveraging the powerful chat and reasoning capabilities of large language models (LLM). The retrieval augmented generation (RAG) architecture is quickly becoming the industry standard for developing chatbots because it combines the benefits of a knowledge base (via a vector store) and generative models to reduce hallucinations, maintain up-to-date information, and leverage domain-specific knowledge.
+Chatbots are the most widely adopted use case for leveraging the powerful chat and reasoning capabilities of large language models (LLMs). The retrieval augmented generation (RAG) architecture is quickly becoming the industry standard for chatbots development. It combines the benefits of a knowledge base (via a vector store) and generative models to reduce hallucinations, maintain up-to-date information, and leverage domain-specific knowledge.

-RAG bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. At the heart of this architecture are vector databases, instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity.
+RAG bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. The core of this architecture are vector databases, which are instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity.

 ChatQnA architecture shows below:

 ![architecture](https://i.imgur.com/lLOnQio.png)

-This ChatQnA use case performs RAG using LangChain, Redis vectordb and Text Generation Inference on Intel Gaudi2. The Intel Gaudi2 accelerator supports both training and inference for deep learning models in particular for LLMs. Please visit [Habana AI products](https://habana.ai/products) for more details.
+This ChatQnA use case performs RAG using LangChain, Redis VectorDB and Text Generation Inference on Intel Gaudi2 or Intel XEON Scalable Processors. The Intel Gaudi2 accelerator supports both training and inference for deep learning models in particular for LLMs. Please visit [Habana AI products](https://habana.ai/products) for more details.

-# Solution Overview
+# Deploy ChatQnA Service

-Steps to implement the solution are as follows
+The ChatQnA service can be effortlessly deployed on either Intel Gaudi2 or Intel XEON Scalable Processors.

-## In Intel Gaudi2 Platform
+## Deploy ChatQnA on Gaudi

-1. [Deploy a TGI container with LLM model of your choice](#launch-tgi-gaudi-service) (Solution uses 70B model by default)
+Refer to the [Gaudi Guide](./docker/gaudi/README.md) for instructions on deploying ChatQnA on Gaudi.

-## In Intel Xeon Platform
+## Deploy ChatQnA on Xeon

-1. [Export TGI endpoint as environment variable](#customize-tgi-gaudi-service)
-2. [Deploy a TEI container for Embedding model service and export the endpoint](#enable-tei-for-embedding-model)
-3. [Launch a Redis container and Langchain container](#launch-redis-and-langchain-backend-service)
-4. [Ingest data into redis](#ingest-data-into-redis), this example provides few example PDF documents
-5. [Start the backend service](#start-the-backend-service) to accept queries to Langchain
-6. [Start the GUI](#start-the-frontend-service) based chatbot service to experiment with RAG based Chatbot
+Refer to the [Xeon Guide](./docker/xeon/README.md) for instructions on deploying ChatQnA on Xeon.

-To use [🤗 text-generation-inference](https://github.com/huggingface/text-generation-inference) on Habana Gaudi/Gaudi2, please follow these steps:
+## Deploy ChatQnA into Kubernetes on Xeon & Gaudi

-## Prepare TGI Docker
-
-Getting started is straightforward with the official Docker container. Simply pull the image using:
-
-```bash
-docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
-```
-
-Alternatively, you can build the Docker image yourself using latest [TGI-Gaudi](https://github.com/huggingface/tgi-gaudi) code with the below command:
-
-```bash
-bash ./serving/tgi_gaudi/build_docker.sh
-```
-
-## Launch TGI Gaudi Service
-
-### Launch a local server instance on 1 Gaudi card:
-
-```bash
-bash ./serving/tgi_gaudi/launch_tgi_service.sh
-```
-
-For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
-
-Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
-
-```bash
-export HUGGINGFACEHUB_API_TOKEN=<token>
-```
-
-### Launch a local server instance on 8 Gaudi cards:
-
-```bash
-bash ./serving/tgi_gaudi/launch_tgi_service.sh 8
-```
-
-And then you can make requests like below to check the service status:
-
-```bash
-curl 127.0.0.1:8080/generate \
-  -X POST \
-  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":32}}' \
-  -H 'Content-Type: application/json'
-```
-
-### Customize TGI Gaudi Service
-
-The ./serving/tgi_gaudi/launch_tgi_service.sh script accepts three parameters:
-
- num_cards: The number of Gaudi cards to be utilized, ranging from 1 to 8. The default is set to 1.
- port_number: The port number assigned to the TGI Gaudi endpoint, with the default being 8080.
- model_name: The model name utilized for LLM, with the default set to "Intel/neural-chat-7b-v3-3".
-
-You have the flexibility to customize these parameters according to your specific needs. Additionally, you can set the TGI Gaudi endpoint by exporting the environment variable `TGI_LLM_ENDPOINT`:
-
-```bash
-export TGI_LLM_ENDPOINT="http://xxx.xxx.xxx.xxx:8080"
-```
-
-## Enable TEI for embedding model
-
-Text Embeddings Inference (TEI) is a toolkit designed for deploying and serving open-source text embeddings and sequence classification models efficiently. With TEI, users can extract high-performance features using various popular models. It supports token-based dynamic batching for enhanced performance.
-
-To launch the TEI service, you can use the following commands:
-
-```bash
-model=BAAI/bge-large-en-v1.5
-revision=refs/pr/5
-volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
-docker run -p 9090:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision
-export TEI_ENDPOINT="http://xxx.xxx.xxx.xxx:9090"
-```
-
-And then you can make requests like below to check the service status:
-
-```bash
-curl 127.0.0.1:9090/embed \
-    -X POST \
-    -d '{"inputs":"What is Deep Learning?"}' \
-    -H 'Content-Type: application/json'
-```
-
-Note: If you want to integrate the TEI service into the LangChain application, you'll need to restart the LangChain backend service after launching the TEI service.
-
-## Launch Redis and LangChain Backend Service
-
-Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
-
-```bash
-cd langchain/docker
-docker compose -f docker-compose.yml up -d
-cd ../../
-```
-
-> [!NOTE]
-> If you modified any files and want that change introduced in this step, add `--build` to the end of the command to build the container image instead of pulling it from dockerhub.
-
-## Ingest data into Redis
-
-Each time the Redis container is launched, data should be ingested into the container using the commands:
-
-```bash
-docker exec -it qna-rag-redis-server bash
-cd /ws
-python ingest.py
-```
-
-Note: `ingest.py` will download the embedding model. Please set the proxy if necessary.
-
-# Start LangChain Server
-
-## Enable GuardRails using Meta's Llama Guard model (Optional)
-
-We offer content moderation support utilizing Meta's [Llama Guard](https://huggingface.co/meta-llama/LlamaGuard-7b) model. To activate GuardRails, kindly follow the instructions below to deploy the Llama Guard model on TGI Gaudi.
-
-```bash
-volume=$PWD/data
-model_id="meta-llama/LlamaGuard-7b"
-docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HUGGING_FACE_HUB_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
-export SAFETY_GUARD_ENDPOINT="http://xxx.xxx.xxx.xxx:8088"
-```
-
-And then you can make requests like below to check the service status:
-
-```bash
-curl 127.0.0.1:8088/generate \
-  -X POST \
-  -d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
-  -H 'Content-Type: application/json'
-```
-
-## Start the Backend Service
-
-Make sure TGI-Gaudi service is running and also make sure data is populated into Redis. Launch the backend service:
-
-```bash
-docker exec -it qna-rag-redis-server bash
-nohup python app/server.py &
-```
-
-The LangChain backend service listens to port 8000, you can customize it by changing the code in `docker/qna-app/app/server.py`.
-
-And then you can make requests like below to check the LangChain backend service status:
-
-```bash
-# non-streaming endpoint
-curl 127.0.0.1:8000/v1/rag/chat \
-  -X POST \
-  -d '{"query":"What is the total revenue of Nike in 2023?"}' \
-  -H 'Content-Type: application/json'
-```
-
-```bash
-# streaming endpoint
-curl 127.0.0.1:8000/v1/rag/chat_stream \
-  -X POST \
-  -d '{"query":"What is the total revenue of Nike in 2023?"}' \
-  -H 'Content-Type: application/json'
-```
-
-## Start the Frontend Service
-
-Navigate to the "ui" folder and execute the following commands to start the frontend GUI:
-
-```bash
-cd ui
-sudo apt-get install npm && \
-    npm install -g n && \
-    n stable && \
-    hash -r && \
-    npm install -g npm@latest
-```
-
-For CentOS, please use the following commands instead:
-
-```bash
-curl -sL https://rpm.nodesource.com/setup_20.x | sudo bash -
-sudo yum install -y nodejs
-```
-
-Update the `DOC_BASE_URL` environment variable in the `.env` file by replacing the IP address '127.0.0.1' with the actual IP address.
-
-Run the following command to install the required dependencies:
-
-```bash
-npm install
-```
-
-Start the development server by executing the following command:
-
-```bash
-nohup npm run dev &
-```
-
-This will initiate the frontend service and launch the application.
-
-# Enable TGI Gaudi FP8 for higher throughput (Optional)
-
-The TGI Gaudi utilizes BFLOAT16 optimization as the default setting. If you aim to achieve higher throughput, you can enable FP8 quantization on the TGI Gaudi. Note that currently only Llama2 series and Mistral series models support FP8 quantization. Please follow the below steps to enable FP8 quantization.
-
-## Prepare Metadata for FP8 Quantization
-
-Enter into the TGI Gaudi docker container, and then run the below commands:
-
-```bash
-pip install git+https://github.com/huggingface/optimum-habana.git
-git clone https://github.com/huggingface/optimum-habana.git
-cd optimum-habana/examples/text-generation
-pip install -r requirements_lm_eval.txt
-QUANT_CONFIG=./quantization_config/maxabs_measure.json python ../gaudi_spawn.py run_lm_eval.py -o acc_7b_bs1_measure.txt --model_name_or_path Intel/neural-chat-7b-v3-3 --attn_softmax_bf16 --use_hpu_graphs --trim_logits --use_kv_cache --reuse_cache --bf16 --batch_size 1
-QUANT_CONFIG=./quantization_config/maxabs_quant.json python ../gaudi_spawn.py run_lm_eval.py -o acc_7b_bs1_quant.txt --model_name_or_path Intel/neural-chat-7b-v3-3 --attn_softmax_bf16 --use_hpu_graphs --trim_logits --use_kv_cache --reuse_cache --bf16 --batch_size 1 --fp8
-```
-
-After finishing the above commands, the quantization metadata will be generated. Move the metadata directory ./hqt_output/ and copy the quantization JSON file to the host (under …/data). Please adapt the commands with your Docker ID and directory path.
-
-```bash
-docker cp 262e04bbe466:/usr/src/optimum-habana/examples/text-generation/hqt_output data/
-docker cp 262e04bbe466:/usr/src/optimum-habana/examples/text-generation/quantization_config/maxabs_quant.json data/
-```
-
-Then modify the `dump_stats_path` to "/data/hqt_output/measure" and update `dump_stats_xlsx_path` to /data/hqt_output/measure/fp8stats.xlsx" in maxabs_quant.json file.
-
-## Restart the TGI Gaudi server within all the metadata mapped
-
-```bash
-docker run -p 8080:80 -e QUANT_CONFIG=/data/maxabs_quant.json -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id Intel/neural-chat-7b-v3-3
-```
-
-Now the TGI Gaudi will launch the FP8 model by default and you can make requests like below to check the service status:
-
-```bash
-curl 127.0.0.1:8080/generate \
-  -X POST \
-  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":32}}' \
-  -H 'Content-Type: application/json'
-```
-
-#
-
-SCRIPT USAGE NOTICE:  By downloading and using any script file included with the associated software package (such as files with .bat, .cmd, or .JS extensions, Docker files, or any other type of file that, when executed, automatically downloads and/or installs files onto your system) (the “Script File”), it is your obligation to review the Script File to understand what files (e.g.,  other software, AI models, AI Datasets) the Script File will download to your system (“Downloaded Files”). Furthermore, by downloading and using the Downloaded Files, even if they are installed through a silent install, you agree to any and all terms and conditions associated with such files, including but not limited to, license terms, notices, or disclaimers.
+Refer to the [Kubernetes Guide](./kubernetes/manifests/README.md) for instructions on deploying ChatQnA into Kubernetes on Xeon & Gaudi.
--- a/ChatQnA/chatqna.yaml
+++ b/ChatQnA/chatqna.yaml
@@ -0,0 +1,84 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+opea_micro_services:
+  redis-vector-db:
+    host: ${REDIS_SERVICE_HOST_IP}
+    ports:
+      - "6379:6379"
+      - "8001:8001"
+    image: redis/redis-stack:7.2.0-v9
+  dataprep-redis-service:
+    host: ${DATAPREP_SERVICE_HOST_IP}
+    ports: ${DATAPREP_SERVICE_PORT}
+    image: opea/dataprep-redis:latest
+    environment:
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+  tei-embedding-service:
+    host: ${TEI_EMBEDDING_SERVICE_IP}
+    ports: ${TEI_EMBEDDING_SERVICE_PORT}
+    image: opea/tei-gaudi:latest
+    volumes:
+      - "./data:/data"
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    environment:
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+    model-id: ${EMBEDDING_MODEL_ID}
+  embedding:
+    host: ${EMBEDDING_SERVICE_HOST_IP}
+    ports: ${EMBEDDING_SERVICE_PORT}
+    image: opea/embedding-tei:latest
+    endpoint: /v1/embeddings
+  retrieval:
+    host: ${RETRIEVER_SERVICE_HOST_IP}
+    ports: ${RETRIEVER_SERVICE_PORT}
+    image: opea/retriever-redis:latest
+    endpoint: /v1/retrieval
+  reranking:
+    host: ${RERANK_SERVICE_HOST_IP}
+    ports: ${RERANK_SERVICE_PORT}
+    image: opea/reranking-tei:latest
+    endpoint: /v1/reranking
+  tgi_service:
+    host: ${TGI_SERVICE_IP}
+    ports: ${TGI_SERVICE_PORT}
+    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    volumes:
+      - "./data:/data"
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    environment:
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    model-id: ${LLM_MODEL_ID}
+  llm:
+    host: ${LLM_SERVICE_HOST_IP}
+    ports: ${LLM_SERVICE_PORT}
+    image: opea/llm-tgi:latest
+    endpoint: /v1/chat/completions
+  ui:
+    host: ${UI_SERVICE_HOST_IP}
+    ports:
+      - "5173:5173"
+    image: opea/chatqna-ui:latest
+    environment:
+      - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
+      - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
+
+opea_mega_service:
+  host: ${MEGA_SERVICE_HOST_IP}
+  ports: ${MEGA_SERVICE_PORT}
+  image: opea/chatqna:latest
+  endpoint: /v1/chatqna
+  mega_flow:
+    - embedding >> retrieval >> reranking >> llm
--- a/ChatQnA/deployment/nginx/nginx/Dockerfile
+++ b/ChatQnA/deployment/nginx/nginx/Dockerfile
@@ -1,25 +0,0 @@
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# FROM nginx
-
-# RUN rm /etc/nginx/conf.d/default.conf
-# COPY nginx.conf /etc/nginx/conf.d/default.conf
-
-
-FROM nginx:latest
-RUN rm /etc/nginx/conf.d/default.conf
-COPY nginx.conf /etc/nginx/conf.d/default.conf
-EXPOSE 80
-CMD ["nginx", "-g", "daemon off;"]
--- a/ChatQnA/deprecated/README.md
+++ b/ChatQnA/deprecated/README.md
@@ -0,0 +1,279 @@
+# ChatQnA Application
+
+Chatbots are the most widely adopted use case for leveraging the powerful chat and reasoning capabilities of large language models (LLM). The retrieval augmented generation (RAG) architecture is quickly becoming the industry standard for developing chatbots because it combines the benefits of a knowledge base (via a vector store) and generative models to reduce hallucinations, maintain up-to-date information, and leverage domain-specific knowledge.
+
+RAG bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. At the heart of this architecture are vector databases, instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity.
+
+ChatQnA architecture shows below:
+
+![architecture](https://i.imgur.com/lLOnQio.png)
+
+This ChatQnA use case performs RAG using LangChain, Redis vectordb and Text Generation Inference on Intel Gaudi2. The Intel Gaudi2 accelerator supports both training and inference for deep learning models in particular for LLMs. Please visit [Habana AI products](https://habana.ai/products) for more details.
+
+# Solution Overview
+
+Steps to implement the solution are as follows
+
+## In Intel Gaudi2 Platform
+
+1. [Deploy a TGI container with LLM model of your choice](#launch-tgi-gaudi-service) (Solution uses 70B model by default)
+
+## In Intel Xeon Platform
+
+1. [Export TGI endpoint as environment variable](#customize-tgi-gaudi-service)
+2. [Deploy a TEI container for Embedding model service and export the endpoint](#enable-tei-for-embedding-model)
+3. [Launch a Redis container and Langchain container](#launch-redis-and-langchain-backend-service)
+4. [Ingest data into redis](#ingest-data-into-redis), this example provides few example PDF documents
+5. [Start the backend service](#start-the-backend-service) to accept queries to Langchain
+6. [Start the GUI](#start-the-frontend-service) based chatbot service to experiment with RAG based Chatbot
+
+To use [🤗 text-generation-inference](https://github.com/huggingface/text-generation-inference) on Habana Gaudi/Gaudi2, please follow these steps:
+
+## Prepare TGI Docker
+
+Getting started is straightforward with the official Docker container. Simply pull the image using:
+
+```bash
+docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
+```
+
+Alternatively, you can build the Docker image yourself using latest [TGI-Gaudi](https://github.com/huggingface/tgi-gaudi) code with the below command:
+
+```bash
+bash ./serving/tgi_gaudi/build_docker.sh
+```
+
+## Launch TGI Gaudi Service
+
+### Launch a local server instance on 1 Gaudi card:
+
+```bash
+bash ./serving/tgi_gaudi/launch_tgi_service.sh
+```
+
+For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
+
+Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
+
+```bash
+export HUGGINGFACEHUB_API_TOKEN=<token>
+```
+
+### Launch a local server instance on 8 Gaudi cards:
+
+```bash
+bash ./serving/tgi_gaudi/launch_tgi_service.sh 8
+```
+
+And then you can make requests like below to check the service status:
+
+```bash
+curl 127.0.0.1:8080/generate \
+  -X POST \
+  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":32}}' \
+  -H 'Content-Type: application/json'
+```
+
+### Customize TGI Gaudi Service
+
+The ./serving/tgi_gaudi/launch_tgi_service.sh script accepts three parameters:
+
+- num_cards: The number of Gaudi cards to be utilized, ranging from 1 to 8. The default is set to 1.
+- port_number: The port number assigned to the TGI Gaudi endpoint, with the default being 8080.
+- model_name: The model name utilized for LLM, with the default set to "Intel/neural-chat-7b-v3-3".
+
+You have the flexibility to customize these parameters according to your specific needs. Additionally, you can set the TGI Gaudi endpoint by exporting the environment variable `TGI_LLM_ENDPOINT`:
+
+```bash
+export TGI_LLM_ENDPOINT="http://xxx.xxx.xxx.xxx:8080"
+```
+
+## Enable TEI for embedding model
+
+Text Embeddings Inference (TEI) is a toolkit designed for deploying and serving open-source text embeddings and sequence classification models efficiently. With TEI, users can extract high-performance features using various popular models. It supports token-based dynamic batching for enhanced performance.
+
+To launch the TEI service, you can use the following commands:
+
+```bash
+model=BAAI/bge-large-en-v1.5
+revision=refs/pr/5
+volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
+docker run -p 9090:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision
+export TEI_ENDPOINT="http://xxx.xxx.xxx.xxx:9090"
+```
+
+And then you can make requests like below to check the service status:
+
+```bash
+curl 127.0.0.1:9090/embed \
+    -X POST \
+    -d '{"inputs":"What is Deep Learning?"}' \
+    -H 'Content-Type: application/json'
+```
+
+Note: If you want to integrate the TEI service into the LangChain application, you'll need to restart the LangChain backend service after launching the TEI service.
+
+## Launch Vector Database and LangChain Backend Service
+
+Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
+
+By default, Redis is used as the vector store. To use Qdrant, use the `docker-compose-qdrant.yml` file instead.
+
+```bash
+cd langchain/docker
+docker compose -f docker-compose.yml up -d
+# To use Qdrant, run
+# docker compose -f docker-compose-qdrant.yml up -d
+cd ../../
+```
+
+> [!NOTE]
+> If you modified any files and want that change introduced in this step, add `--build` to the end of the command to build the container image instead of pulling it from dockerhub.
+
+## Ingest Data Into Vector Database
+
+Each time the vector database container is launched, data should be ingested into the container using the commands:
+
+```bash
+docker exec -it qna-rag-redis-server bash
+# To use Qdrant, run
+# docker exec -it qna-rag-qdrant-server bash
+cd /ws
+python ingest.py
+```
+
+Note: `ingest.py` will download the embedding model. Please set the proxy if necessary.
+
+# Start LangChain Server
+
+## Enable GuardRails using Meta's Llama Guard model (Optional)
+
+We offer content moderation support utilizing Meta's [Llama Guard](https://huggingface.co/meta-llama/LlamaGuard-7b) model. To activate GuardRails, kindly follow the instructions below to deploy the Llama Guard model on TGI Gaudi.
+
+```bash
+volume=$PWD/data
+model_id="meta-llama/LlamaGuard-7b"
+docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HUGGING_FACE_HUB_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
+export SAFETY_GUARD_ENDPOINT="http://xxx.xxx.xxx.xxx:8088"
+```
+
+And then you can make requests like below to check the service status:
+
+```bash
+curl 127.0.0.1:8088/generate \
+  -X POST \
+  -d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
+  -H 'Content-Type: application/json'
+```
+
+## Start the Backend Service
+
+Make sure TGI-Gaudi service is running and also make sure data is populated into Redis. Launch the backend service:
+
+```bash
+docker exec -it qna-rag-redis-server bash
+# export TGI_LLM_ENDPOINT="http://xxx.xxx.xxx.xxx:8080" - can be omitted if set before in docker-compose.yml
+# export TEI_ENDPOINT="http://xxx.xxx.xxx.xxx:9090" - Needs to be added only if TEI to be used and can be omitted if set before in docker-compose.yml
+nohup python app/server.py &
+```
+
+The LangChain backend service listens to port 8000, you can customize it by changing the code in `docker/qna-app/app/server.py`.
+
+And then you can make requests like below to check the LangChain backend service status:
+
+```bash
+# non-streaming endpoint
+curl 127.0.0.1:8000/v1/rag/chat \
+  -X POST \
+  -d '{"query":"What is the total revenue of Nike in 2023?"}' \
+  -H 'Content-Type: application/json'
+```
+
+```bash
+# streaming endpoint
+curl 127.0.0.1:8000/v1/rag/chat_stream \
+  -X POST \
+  -d '{"query":"What is the total revenue of Nike in 2023?"}' \
+  -H 'Content-Type: application/json'
+```
+
+## Start the Frontend Service
+
+Navigate to the "ui" folder and execute the following commands to start the frontend GUI:
+
+```bash
+cd ui
+sudo apt-get install npm && \
+    npm install -g n && \
+    n stable && \
+    hash -r && \
+    npm install -g npm@latest
+```
+
+For CentOS, please use the following commands instead:
+
+```bash
+curl -sL https://rpm.nodesource.com/setup_20.x | sudo bash -
+sudo yum install -y nodejs
+```
+
+Update the `DOC_BASE_URL` environment variable in the `.env` file by replacing the IP address '127.0.0.1' with the actual IP address.
+
+Run the following command to install the required dependencies:
+
+```bash
+npm install
+```
+
+Start the development server by executing the following command:
+
+```bash
+nohup npm run dev &
+```
+
+This will initiate the frontend service and launch the application.
+
+# Enable TGI Gaudi FP8 for higher throughput (Optional)
+
+The TGI Gaudi utilizes BFLOAT16 optimization as the default setting. If you aim to achieve higher throughput, you can enable FP8 quantization on the TGI Gaudi. Note that currently only Llama2 series and Mistral series models support FP8 quantization. Please follow the below steps to enable FP8 quantization.
+
+## Prepare Metadata for FP8 Quantization
+
+Enter into the TGI Gaudi docker container, and then run the below commands:
+
+```bash
+pip install git+https://github.com/huggingface/optimum-habana.git
+git clone https://github.com/huggingface/optimum-habana.git
+cd optimum-habana/examples/text-generation
+pip install -r requirements_lm_eval.txt
+QUANT_CONFIG=./quantization_config/maxabs_measure.json python ../gaudi_spawn.py run_lm_eval.py -o acc_7b_bs1_measure.txt --model_name_or_path Intel/neural-chat-7b-v3-3 --attn_softmax_bf16 --use_hpu_graphs --trim_logits --use_kv_cache --reuse_cache --bf16 --batch_size 1
+QUANT_CONFIG=./quantization_config/maxabs_quant.json python ../gaudi_spawn.py run_lm_eval.py -o acc_7b_bs1_quant.txt --model_name_or_path Intel/neural-chat-7b-v3-3 --attn_softmax_bf16 --use_hpu_graphs --trim_logits --use_kv_cache --reuse_cache --bf16 --batch_size 1 --fp8
+```
+
+After finishing the above commands, the quantization metadata will be generated. Move the metadata directory ./hqt_output/ and copy the quantization JSON file to the host (under …/data). Please adapt the commands with your Docker ID and directory path.
+
+```bash
+docker cp 262e04bbe466:/usr/src/optimum-habana/examples/text-generation/hqt_output data/
+docker cp 262e04bbe466:/usr/src/optimum-habana/examples/text-generation/quantization_config/maxabs_quant.json data/
+```
+
+Then modify the `dump_stats_path` to "/data/hqt_output/measure" and update `dump_stats_xlsx_path` to /data/hqt_output/measure/fp8stats.xlsx" in maxabs_quant.json file.
+
+## Restart the TGI Gaudi server within all the metadata mapped
+
+```bash
+docker run -p 8080:80 -e QUANT_CONFIG=/data/maxabs_quant.json -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id Intel/neural-chat-7b-v3-3
+```
+
+Now the TGI Gaudi will launch the FP8 model by default and you can make requests like below to check the service status:
+
+```bash
+curl 127.0.0.1:8080/generate \
+  -X POST \
+  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":32}}' \
+  -H 'Content-Type: application/json'
+```
+
+#
+
+SCRIPT USAGE NOTICE:  By downloading and using any script file included with the associated software package (such as files with .bat, .cmd, or .JS extensions, Docker files, or any other type of file that, when executed, automatically downloads and/or installs files onto your system) (the “Script File”), it is your obligation to review the Script File to understand what files (e.g.,  other software, AI models, AI Datasets) the Script File will download to your system (“Downloaded Files”). Furthermore, by downloading and using the Downloaded Files, even if they are installed through a silent install, you agree to any and all terms and conditions associated with such files, including but not limited to, license terms, notices, or disclaimers.
--- a/ChatQnA/deprecated/benchmarking/README.md
+++ b/ChatQnA/deprecated/benchmarking/README.md
--- a/ChatQnA/deprecated/benchmarking/client.py
+++ b/ChatQnA/deprecated/benchmarking/client.py
@@ -1,19 +1,9 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 #
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

 import argparse
 import concurrent.futures
--- a/ChatQnA/deprecated/benchmarking/devtest.json
+++ b/ChatQnA/deprecated/benchmarking/devtest.json
--- a/ChatQnA/deprecated/deployment/nginx/.env
+++ b/ChatQnA/deprecated/deployment/nginx/.env
--- a/ChatQnA/deprecated/deployment/nginx/README.md
+++ b/ChatQnA/deprecated/deployment/nginx/README.md
--- a/ChatQnA/deprecated/deployment/nginx/docker-compose.yml
+++ b/ChatQnA/deprecated/deployment/nginx/docker-compose.yml
@@ -1,16 +1,6 @@
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0

 version: "3"
 services:
--- a/ChatQnA/deprecated/deployment/nginx/nginx/Dockerfile
+++ b/ChatQnA/deprecated/deployment/nginx/nginx/Dockerfile
@@ -0,0 +1,15 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# FROM nginx
+
+# RUN rm /etc/nginx/conf.d/default.conf
+# COPY nginx.conf /etc/nginx/conf.d/default.conf
+
+
+FROM nginx:latest
+RUN rm /etc/nginx/conf.d/default.conf
+COPY nginx.conf /etc/nginx/conf.d/default.conf
+EXPOSE 80
+CMD ["nginx", "-g", "daemon off;"]
--- a/ChatQnA/deprecated/deployment/nginx/nginx/nginx.conf
+++ b/ChatQnA/deprecated/deployment/nginx/nginx/nginx.conf
--- a/ChatQnA/deprecated/langchain/chroma/README.md
+++ b/ChatQnA/deprecated/langchain/chroma/README.md
--- a/ChatQnA/deprecated/langchain/docker/Dockerfile
+++ b/ChatQnA/deprecated/langchain/docker/Dockerfile
@@ -1,16 +1,6 @@
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0

 # SCRIPT USAGE NOTICE:  By downloading and using any script file included
 # with the associated software package (such as files with .bat, .cmd, or 
--- a/ChatQnA/deprecated/langchain/docker/docker-compose-qdrant.yml
+++ b/ChatQnA/deprecated/langchain/docker/docker-compose-qdrant.yml
@@ -0,0 +1,35 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  qdrant-vector-db:
+    image: qdrant/qdrant:v1.9.0
+    container_name: qdrant-vector-db
+    ports:
+      - "6333:6333"
+      - "6334:6334"
+  qna-rag-qdrant-server:
+    build:
+      args:
+        https_proxy: ${https_proxy}
+        http_proxy: ${http_proxy}
+      dockerfile: Dockerfile
+      context: .
+    image: intel/gen-ai-examples:qna-rag-qdrant-server
+    container_name: qna-rag-qdrant-server
+    environment:
+      - https_proxy=${https_proxy}
+      - HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+      - "EMBED_MODEL=BAAI/bge-base-en-v1.5"
+      - "VECTOR_DATABASE=QDRANT"
+      - "TGI_LLM_ENDPOINT=http://localhost:8080"
+      # "TEI_ENDPOINT="http://xxx.xxx.xxx.xxx:9090" - To use a custom TEI endpoint
+    ulimits:
+      memlock:
+        soft: -1 # Set memlock to unlimited (no soft or hard limit)
+        hard: -1
+    volumes:
+      - ../qdrant:/ws
+      - ../test:/test
+    network_mode: "host"
--- a/ChatQnA/deprecated/langchain/docker/docker-compose.yml
+++ b/ChatQnA/deprecated/langchain/docker/docker-compose.yml
@@ -1,16 +1,6 @@
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0

 services:
  redis-vector-db:
@@ -23,15 +13,27 @@ services:
    build:
      args:
        https_proxy: ${https_proxy}
+        http_proxy: ${http_proxy}
      dockerfile: Dockerfile
+      context: .
    image: intel/gen-ai-examples:qna-rag-redis-server
    container_name: qna-rag-redis-server
    environment:
      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - HTTP_PROXY=${HTTP_PROXY}
+      - HTTPS_PROXY=${HTTPS_PROXY}
+      - no_proxy=${no_proxy}
+      - SOCKS_PROXY=${SOCKS_PROXY}
+      - socks_proxy=${socks_proxy}
+      - FTP_PROXY=${FTP_PROXY}
+      - ftp_proxy=${ftp_proxy}
      - HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+      - CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN}
      - "REDIS_PORT=6379"
      - "EMBED_MODEL=BAAI/bge-base-en-v1.5"
      - "REDIS_SCHEMA=schema_dim_768.yml"
+      - "VECTOR_DATABASE=REDIS"
    ulimits:
      memlock:
        soft: -1 # Set memlock to unlimited (no soft or hard limit)
--- a/ChatQnA/deprecated/langchain/docker/qna-app/Dockerfile
+++ b/ChatQnA/deprecated/langchain/docker/qna-app/Dockerfile
@@ -0,0 +1,25 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+RUN pip install --no-cache-dir poetry==1.6.1
+
+RUN poetry config virtualenvs.create false
+
+WORKDIR /code
+
+COPY ./pyproject.toml ./README.md ./poetry.lock* ./
+
+COPY ./package[s] ./packages
+
+RUN poetry install  --no-interaction --no-ansi --no-root
+
+COPY ./app ./app
+
+RUN poetry install --no-interaction --no-ansi
+
+EXPOSE 8080
+
+CMD exec uvicorn app.server:app --host 0.0.0.0 --port 8080
--- a/ChatQnA/deprecated/langchain/docker/qna-app/README.md
+++ b/ChatQnA/deprecated/langchain/docker/qna-app/README.md
--- a/ChatQnA/deprecated/langchain/docker/qna-app/app/init.py
+++ b/ChatQnA/deprecated/langchain/docker/qna-app/app/init.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+#
--- a/ChatQnA/deprecated/langchain/docker/qna-app/app/guardrails.py
+++ b/ChatQnA/deprecated/langchain/docker/qna-app/app/guardrails.py
@@ -1,19 +1,9 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 #
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

 unsafe_categories = """O1: Violence and Hate.
 Should not
--- a/ChatQnA/deprecated/langchain/docker/qna-app/app/prompts.py
+++ b/ChatQnA/deprecated/langchain/docker/qna-app/app/prompts.py
@@ -1,19 +1,9 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 #
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

--- a/ChatQnA/deprecated/langchain/docker/qna-app/app/server.py
+++ b/ChatQnA/deprecated/langchain/docker/qna-app/app/server.py
@@ -1,20 +1,11 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-#
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0

+#
+
+import argparse
 import os

 from fastapi import APIRouter, FastAPI, File, Request, UploadFile
@@ -22,15 +13,14 @@ from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
 from guardrails import moderation_prompt_for_chat, unsafe_dict
 from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings
 from langchain_community.llms import HuggingFaceEndpoint
-from langchain_community.vectorstores import Redis
 from langchain_core.messages import HumanMessage
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
 from langserve import add_routes
-from prompts import contextualize_q_prompt, qa_prompt
-from rag_redis.config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL
+from prompts import contextualize_q_prompt, prompt, qa_prompt
 from starlette.middleware.cors import CORSMiddleware
 from utils import (
+    VECTOR_DATABASE,
    create_kb_folder,
    create_retriever_from_files,
    create_retriever_from_links,
@@ -39,6 +29,15 @@ from utils import (
    reload_retriever,
 )

+if VECTOR_DATABASE == "REDIS":
+    from rag_redis.config import INDEX_NAME
+elif VECTOR_DATABASE == "QDRANT":
+    from rag_qdrant.config import COLLECTION_NAME as INDEX_NAME
+
+parser = argparse.ArgumentParser(description="Server Configuration")
+parser.add_argument("--chathistory", action="store_true", help="Enable debug mode")
+args = parser.parse_args()
+
 app = FastAPI()

 app.add_middleware(
@@ -47,7 +46,6 @@ app.add_middleware(


 class RAGAPIRouter(APIRouter):
-
    def __init__(self, upload_dir, entrypoint, safety_guard_endpoint, tei_endpoint=None) -> None:
        super().__init__()
        self.upload_dir = upload_dir
@@ -69,19 +67,7 @@ class RAGAPIRouter(APIRouter):
            repetition_penalty=1.03,
            streaming=True,
        )
-        # for NeuralChatEndpoint:
-        """
-        self.llm = NeuralChatEndpoint(
-            endpoint_url=entrypoint,
-            max_new_tokens=1024,
-            top_k=10,
-            top_p=0.95,
-            typical_p=0.95,
-            temperature=0.01,
-            repetition_penalty=1.03,
-            streaming=True,
-        )
-        """
+
        if self.safety_guard_endpoint:
            self.llm_guard = HuggingFaceEndpoint(
                endpoint_url=safety_guard_endpoint,
@@ -100,23 +86,45 @@ class RAGAPIRouter(APIRouter):
            self.embeddings = HuggingFaceHubEmbeddings(model=tei_endpoint)
        else:
            # create embeddings using local embedding model
+            EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
            self.embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)

-        rds = Redis.from_existing_index(
-            self.embeddings,
-            index_name=INDEX_NAME,
-            redis_url=REDIS_URL,
-            schema=INDEX_SCHEMA,
-        )
-        retriever = rds.as_retriever(search_type="mmr")
+        if VECTOR_DATABASE == "REDIS":
+            from langchain_community.vectorstores import Redis
+            from rag_redis.config import INDEX_SCHEMA, REDIS_URL
+
+            vdb = Redis.from_existing_index(
+                self.embeddings,
+                index_name=INDEX_NAME,
+                redis_url=REDIS_URL,
+                schema=INDEX_SCHEMA,
+            )
+        elif VECTOR_DATABASE == "QDRANT":
+            from langchain_community.vectorstores import Qdrant
+            from qdrant_client import QdrantClient
+            from rag_qdrant.config import QDRANT_HOST, QDRANT_PORT
+
+            client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
+            vdb = Qdrant(
+                embeddings=self.embeddings,
+                collection_name=INDEX_NAME,
+                client=client,
+            )
+        retriever = vdb.as_retriever(search_type="mmr")

        # Define contextualize chain
        self.contextualize_q_chain = contextualize_q_prompt | self.llm | StrOutputParser()

        # Define LLM chain
-        self.llm_chain = (
-            RunnablePassthrough.assign(context=self.contextualized_question | retriever) | qa_prompt | self.llm
-        )
+        if args.chathistory:
+            self.llm_chain = (
+                RunnablePassthrough.assign(context=self.contextualized_question | retriever) | qa_prompt | self.llm
+            )
+        else:
+            self.llm_chain = (
+                RunnablePassthrough.assign(context=self.contextualized_question | retriever) | prompt | self.llm
+            )
+
        print("[rag - router] LLM chain initialized.")

        # Define chat history
@@ -129,9 +137,12 @@ class RAGAPIRouter(APIRouter):
            return input["question"]

    def handle_rag_chat(self, query: str):
-        response = self.llm_chain.invoke({"question": query, "chat_history": self.chat_history})
+        response = self.llm_chain.invoke(
+            {"question": query, "chat_history": self.chat_history} if args.chathistory else {"question": query}
+        )
        result = response.split("</s>")[0]
-        self.chat_history.extend([HumanMessage(content=query), response])
+        if args.chathistory:
+            self.chat_history.extend([HumanMessage(content=query), response])
        # output guardrails
        if self.safety_guard_endpoint:
            response_output_guard = self.llm_guard(
@@ -160,7 +171,6 @@ async def rag_chat(request: Request):
    print(f"[rag - chat] POST request: /v1/rag/chat, params:{params}")
    query = params["query"]
    kb_id = params.get("knowledge_base_id", "default")
-    print(f"[rag - chat] history: {router.chat_history}")

    # prompt guardrails
    if router.safety_guard_endpoint:
@@ -174,16 +184,26 @@ async def rag_chat(request: Request):
    if kb_id == "default":
        print("[rag - chat] use default knowledge base")
        retriever = reload_retriever(router.embeddings, INDEX_NAME)
-        router.llm_chain = (
-            RunnablePassthrough.assign(context=router.contextualized_question | retriever) | qa_prompt | router.llm
-        )
+        if args.chathistory:
+            router.llm_chain = (
+                RunnablePassthrough.assign(context=router.contextualized_question | retriever) | qa_prompt | router.llm
+            )
+        else:
+            router.llm_chain = (
+                RunnablePassthrough.assign(context=router.contextualized_question | retriever) | prompt | router.llm
+            )
    elif kb_id.startswith("kb"):
        new_index_name = INDEX_NAME + kb_id
        print(f"[rag - chat] use knowledge base {kb_id}, index name is {new_index_name}")
        retriever = reload_retriever(router.embeddings, new_index_name)
-        router.llm_chain = (
-            RunnablePassthrough.assign(context=router.contextualized_question | retriever) | qa_prompt | router.llm
-        )
+        if args.chathistory:
+            router.llm_chain = (
+                RunnablePassthrough.assign(context=router.contextualized_question | retriever) | qa_prompt | router.llm
+            )
+        else:
+            router.llm_chain = (
+                RunnablePassthrough.assign(context=router.contextualized_question | retriever) | prompt | router.llm
+            )
    else:
        return JSONResponse(status_code=400, content={"message": "Wrong knowledge base id."})
    return router.handle_rag_chat(query=query)
@@ -195,7 +215,6 @@ async def rag_chat_stream(request: Request):
    print(f"[rag - chat_stream] POST request: /v1/rag/chat_stream, params:{params}")
    query = params["query"]
    kb_id = params.get("knowledge_base_id", "default")
-    print(f"[rag - chat_stream] history: {router.chat_history}")

    # prompt guardrails
    if router.safety_guard_endpoint:
@@ -214,28 +233,41 @@ async def rag_chat_stream(request: Request):

    if kb_id == "default":
        retriever = reload_retriever(router.embeddings, INDEX_NAME)
-        router.llm_chain = (
-            RunnablePassthrough.assign(context=router.contextualized_question | retriever) | qa_prompt | router.llm
-        )
+        if args.chathistory:
+            router.llm_chain = (
+                RunnablePassthrough.assign(context=router.contextualized_question | retriever) | qa_prompt | router.llm
+            )
+        else:
+            router.llm_chain = (
+                RunnablePassthrough.assign(context=router.contextualized_question | retriever) | prompt | router.llm
+            )
    elif kb_id.startswith("kb"):
        new_index_name = INDEX_NAME + kb_id
        retriever = reload_retriever(router.embeddings, new_index_name)
-        router.llm_chain = (
-            RunnablePassthrough.assign(context=router.contextualized_question | retriever) | qa_prompt | router.llm
-        )
+        if args.chathistory:
+            router.llm_chain = (
+                RunnablePassthrough.assign(context=router.contextualized_question | retriever) | qa_prompt | router.llm
+            )
+        else:
+            router.llm_chain = (
+                RunnablePassthrough.assign(context=router.contextualized_question | retriever) | prompt | router.llm
+            )
    else:
        return JSONResponse(status_code=400, content={"message": "Wrong knowledge base id."})

    def stream_generator():
        chat_response = ""
-        for text in router.llm_chain.stream({"question": query, "chat_history": router.chat_history}):
+        for text in router.llm_chain.stream(
+            {"question": query, "chat_history": router.chat_history} if args.chathistory else {"question": query}
+        ):
            chat_response += text
            processed_text = post_process_text(text)
-            if text is not None:
+            if text and processed_text:
                yield processed_text
        chat_response = chat_response.split("</s>")[0]
        print(f"[rag - chat_stream] stream response: {chat_response}")
-        router.chat_history.extend([HumanMessage(content=query), chat_response])
+        if args.chathistory:
+            router.chat_history.extend([HumanMessage(content=query), chat_response])
        yield "data: [DONE]\n\n"

    return StreamingResponse(stream_generator(), media_type="text/event-stream")
@@ -263,9 +295,14 @@ async def rag_create(file: UploadFile = File(...)):
        print("[rag - create] starting to create local db...")
        index_name = INDEX_NAME + kb_id
        retriever = create_retriever_from_files(save_file_name, router.embeddings, index_name)
-        router.llm_chain = (
-            RunnablePassthrough.assign(context=router.contextualized_question | retriever) | qa_prompt | router.llm
-        )
+        if args.chathistory:
+            router.llm_chain = (
+                RunnablePassthrough.assign(context=router.contextualized_question | retriever) | qa_prompt | router.llm
+            )
+        else:
+            router.llm_chain = (
+                RunnablePassthrough.assign(context=router.contextualized_question | retriever) | prompt | router.llm
+            )
        print("[rag - create] kb created successfully")
    except Exception as e:
        print(f"[rag - create] create knowledge base failed! {e}")
@@ -286,9 +323,14 @@ async def rag_upload_link(request: Request):
        print("[rag - upload_link] starting to create local db...")
        index_name = INDEX_NAME + kb_id
        retriever = create_retriever_from_links(router.embeddings, link_list, index_name)
-        router.llm_chain = (
-            RunnablePassthrough.assign(context=router.contextualized_question | retriever) | qa_prompt | router.llm
-        )
+        if args.chathistory:
+            router.llm_chain = (
+                RunnablePassthrough.assign(context=router.contextualized_question | retriever) | qa_prompt | router.llm
+            )
+        else:
+            router.llm_chain = (
+                RunnablePassthrough.assign(context=router.contextualized_question | retriever) | prompt | router.llm
+            )
        print("[rag - upload_link] kb created successfully")
    except Exception as e:
        print(f"[rag - upload_link] create knowledge base failed! {e}")
--- a/ChatQnA/deprecated/langchain/docker/qna-app/app/utils.py
+++ b/ChatQnA/deprecated/langchain/docker/qna-app/app/utils.py
@@ -1,19 +1,9 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 #
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

 import multiprocessing
 import os
@@ -28,9 +18,13 @@ import requests
 from bs4 import BeautifulSoup
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.document_loaders import UnstructuredFileLoader
-from langchain_community.vectorstores import Redis
 from langchain_core.documents import Document
-from rag_redis.config import INDEX_SCHEMA, REDIS_URL
+
+SUPPORTED_VECTOR_DATABASES = ["REDIS", "QDRANT"]
+
+VECTOR_DATABASE = str(os.getenv("VECTOR_DATABASE", "redis")).upper()
+
+assert VECTOR_DATABASE in SUPPORTED_VECTOR_DATABASES, f"Invalid VECTOR_DATABASE: {VECTOR_DATABASE}"


 def get_current_beijing_time():
@@ -57,7 +51,6 @@ def create_kb_folder(upload_dir):


 class Crawler:
-
    def __init__(self, pool=None):
        if pool:
            assert isinstance(pool, (str, list, tuple)), "url pool should be str, list or tuple"
@@ -292,16 +285,33 @@ def create_retriever_from_files(doc, embeddings, index_name: str):
    loader = UnstructuredFileLoader(doc, mode="single", strategy="fast")
    chunks = loader.load_and_split(text_splitter)

-    rds = Redis.from_texts(
-        texts=[chunk.page_content for chunk in chunks],
-        metadatas=[chunk.metadata for chunk in chunks],
-        embedding=embeddings,
-        index_name=index_name,
-        redis_url=REDIS_URL,
-        index_schema=INDEX_SCHEMA,
-    )
+    if VECTOR_DATABASE == "REDIS":
+        from langchain_community.vectorstores import Redis
+        from rag_redis.config import INDEX_SCHEMA, REDIS_URL

-    retriever = rds.as_retriever(search_type="mmr")
+        vdb = Redis.from_texts(
+            texts=[chunk.page_content for chunk in chunks],
+            metadatas=[chunk.metadata for chunk in chunks],
+            embedding=embeddings,
+            index_name=index_name,
+            redis_url=REDIS_URL,
+            index_schema=INDEX_SCHEMA,
+        )
+
+    elif VECTOR_DATABASE == "QDRANT":
+        from langchain_community.vectorstores import Qdrant
+        from rag_qdrant.config import COLLECTION_NAME, QDRANT_HOST, QDRANT_PORT
+
+        vdb = Qdrant.from_texts(
+            texts=[chunk.page_content for chunk in chunks],
+            metadatas=[chunk.metadata for chunk in chunks],
+            embedding=embeddings,
+            collection_name=COLLECTION_NAME,
+            host=QDRANT_HOST,
+            port=QDRANT_PORT,
+        )
+
+    retriever = vdb.as_retriever(search_type="mmr")
    return retriever


@@ -315,38 +325,72 @@ def create_retriever_from_links(embeddings, link_list: list, index_name):
        texts.append(data)
        metadatas.append(metadata)

-    rds = Redis.from_texts(
-        texts=texts,
-        metadatas=metadatas,
-        embedding=embeddings,
-        index_name=index_name,
-        redis_url=REDIS_URL,
-        index_schema=INDEX_SCHEMA,
-    )
+    if VECTOR_DATABASE == "REDIS":
+        from langchain_community.vectorstores import Redis
+        from rag_redis.config import INDEX_SCHEMA, REDIS_URL

-    retriever = rds.as_retriever(search_type="mmr")
+        vdb = Redis.from_texts(
+            texts=texts,
+            metadatas=metadatas,
+            embedding=embeddings,
+            index_name=index_name,
+            redis_url=REDIS_URL,
+            index_schema=INDEX_SCHEMA,
+        )
+
+    elif VECTOR_DATABASE == "QDRANT":
+        from langchain_community.vectorstores import Qdrant
+        from rag_qdrant.config import COLLECTION_NAME, QDRANT_HOST, QDRANT_PORT
+
+        vdb = Qdrant.from_texts(
+            texts=texts,
+            metadatas=metadatas,
+            embedding=embeddings,
+            collection_name=COLLECTION_NAME,
+            host=QDRANT_HOST,
+            port=QDRANT_PORT,
+        )
+
+    retriever = vdb.as_retriever(search_type="mmr")
    return retriever


 def reload_retriever(embeddings, index_name):
    print(f"[rag - reload retriever] reload with index: {index_name}")
-    rds = Redis.from_existing_index(
-        embeddings,
-        index_name=index_name,
-        redis_url=REDIS_URL,
-        schema=INDEX_SCHEMA,
-    )

-    retriever = rds.as_retriever(search_type="mmr")
+    if VECTOR_DATABASE == "REDIS":
+        from langchain_community.vectorstores import Redis
+        from rag_redis.config import INDEX_SCHEMA, REDIS_URL
+
+        vdb = Redis.from_existing_index(
+            embeddings,
+            index_name=index_name,
+            redis_url=REDIS_URL,
+            schema=INDEX_SCHEMA,
+        )
+
+    elif VECTOR_DATABASE == "QDRANT":
+        from langchain_community.vectorstores import Qdrant
+        from qdrant_client import QdrantClient
+        from rag_qdrant.config import COLLECTION_NAME, QDRANT_HOST, QDRANT_PORT
+
+        client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
+        vdb = Qdrant(
+            embeddings=embeddings,
+            collection_name=COLLECTION_NAME,
+            client=client,
+        )
+
+    retriever = vdb.as_retriever(search_type="mmr")
    return retriever


 def post_process_text(text: str):
    if text == " ":
        return "data: @#$\n\n"
-    if text.isspace():
-        return None
    if text == "\n":
        return "data: <br/>\n\n"
+    if text.isspace():
+        return None
    new_text = text.replace(" ", "@#$")
    return f"data: {new_text}\n\n"
--- a/ChatQnA/deprecated/langchain/docker/qna-app/packages/README.md
+++ b/ChatQnA/deprecated/langchain/docker/qna-app/packages/README.md
--- a/ChatQnA/deprecated/langchain/docker/qna-app/pyproject.toml
+++ b/ChatQnA/deprecated/langchain/docker/qna-app/pyproject.toml
--- a/ChatQnA/deprecated/langchain/docker/requirements.txt
+++ b/ChatQnA/deprecated/langchain/docker/requirements.txt
@@ -1,4 +1,5 @@
 -f https://download.pytorch.org/whl/torch_stable.html
+atlassian-python-api
 cryptography==42.0.4
 easyocr
 intel-extension-for-pytorch
@@ -11,6 +12,7 @@ poetry
 pyarrow
 pydantic==1.10.13
 pymupdf
+qdrant-client==1.9.0
 redis
 sentence-transformers
 unstructured
--- a/ChatQnA/deprecated/langchain/qdrant/LICENSE
+++ b/ChatQnA/deprecated/langchain/qdrant/LICENSE
--- a/ChatQnA/deprecated/langchain/qdrant/data/nke-10k-2023.pdf
+++ b/ChatQnA/deprecated/langchain/qdrant/data/nke-10k-2023.pdf
--- a/ChatQnA/deprecated/langchain/qdrant/ingest.py
+++ b/ChatQnA/deprecated/langchain/qdrant/ingest.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import io
+import os
+
+import numpy as np
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings
+from langchain_community.vectorstores import Qdrant
+from PIL import Image
+from rag_qdrant.config import COLLECTION_NAME, EMBED_MODEL, QDRANT_HOST, QDRANT_PORT, TEI_EMBEDDING_ENDPOINT
+
+
+def pdf_loader(file_path):
+    try:
+        import easyocr
+        import fitz
+    except ImportError:
+        raise ImportError(
+            "`PyMuPDF` or 'easyocr' package is not found, please install it with "
+            "`pip install pymupdf or pip install easyocr.`"
+        )
+
+    doc = fitz.open(file_path)
+    reader = easyocr.Reader(["en"])
+    result = ""
+    for i in range(doc.page_count):
+        page = doc.load_page(i)
+        pagetext = page.get_text().strip()
+        if pagetext:
+            result = result + pagetext
+        if len(doc.get_page_images(i)) > 0:
+            for img in doc.get_page_images(i):
+                if img:
+                    pageimg = ""
+                    xref = img[0]
+                    img_data = doc.extract_image(xref)
+                    img_bytes = img_data["image"]
+                    pil_image = Image.open(io.BytesIO(img_bytes))
+                    img = np.array(pil_image)
+                    img_result = reader.readtext(img, paragraph=True, detail=0)
+                    pageimg = pageimg + ", ".join(img_result).strip()
+                    if pageimg.endswith("!") or pageimg.endswith("?") or pageimg.endswith("."):
+                        pass
+                    else:
+                        pageimg = pageimg + "."
+                result = result + pageimg
+    return result
+
+
+def ingest_documents():
+    """Ingest PDF to Qdrant from the data/ directory that
+    contains Edgar 10k filings data for Nike."""
+    # Load list of pdfs
+    company_name = "Nike"
+    data_path = "data/"
+    doc_path = [os.path.join(data_path, file) for file in os.listdir(data_path)][0]
+
+    print("Parsing 10k filing doc for NIKE", doc_path)
+
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100, add_start_index=True)
+    content = pdf_loader(doc_path)
+    chunks = text_splitter.split_text(content)
+
+    print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf")
+    # Create vectorstore
+    if TEI_EMBEDDING_ENDPOINT:
+        # create embeddings using TEI endpoint service
+        embedder = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT)
+    else:
+        # create embeddings using local embedding model
+        embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
+
+    # Batch size
+    batch_size = 32
+    num_chunks = len(chunks)
+    for i in range(0, num_chunks, batch_size):
+        batch_chunks = chunks[i : i + batch_size]
+        batch_texts = [f"Company: {company_name}. " + chunk for chunk in batch_chunks]
+
+        _ = Qdrant.from_texts(
+            texts=batch_texts,
+            embedding=embedder,
+            collection_name=COLLECTION_NAME,
+            host=QDRANT_HOST,
+            port=QDRANT_PORT,
+        )
+        print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}")
+
+
+if __name__ == "__main__":
+    ingest_documents()
--- a/ChatQnA/deprecated/langchain/qdrant/rag_qdrant.ipynb
+++ b/ChatQnA/deprecated/langchain/qdrant/rag_qdrant.ipynb
@@ -0,0 +1,94 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "fe1adb29",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "681a5d1e",
+   "metadata": {},
+   "source": [
+    "## Connect to RAG App\n",
+    "\n",
+    "Assuming you are already running this server:\n",
+    "```bash\n",
+    "langserve start\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "d774be2a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Nike's revenue in 2023 was $51.2 billion. \n",
+      "\n",
+      "Source: 'data/nke-10k-2023.pdf', Start Index: '146100'\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langserve.client import RemoteRunnable\n",
+    "\n",
+    "rag_qdrant = RemoteRunnable(\"http://localhost:8000/rag-qdrant\")\n",
+    "\n",
+    "print(rag_qdrant.invoke(\"What was Nike's revenue in 2023?\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "07ae0005",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "As of May 31, 2023, Nike had approximately 83,700 employees worldwide. This information can be found in the first piece of context provided. (source: data/nke-10k-2023.pdf, start_index: 32532)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(rag_qdrant.invoke(\"How many employees work at Nike?\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4a6b9f00",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/ChatQnA/deprecated/langchain/qdrant/rag_qdrant/init.py
+++ b/ChatQnA/deprecated/langchain/qdrant/rag_qdrant/init.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
--- a/ChatQnA/deprecated/langchain/qdrant/rag_qdrant/chain.py
+++ b/ChatQnA/deprecated/langchain/qdrant/rag_qdrant/chain.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.llms import HuggingFaceEndpoint
+from langchain_community.vectorstores import Qdrant
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.pydantic_v1 import BaseModel
+from langchain_core.runnables import RunnableParallel, RunnablePassthrough
+from qdrant_client import QdrantClient
+from rag_qdrant.config import COLLECTION_NAME, EMBED_MODEL, QDRANT_HOST, QDRANT_PORT, TGI_LLM_ENDPOINT
+
+
+# Make this look better in the docs.
+class Question(BaseModel):
+    __root__: str
+
+
+# Init Embeddings
+embedder = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
+
+# Connect to pre-loaded vectorstore
+# run the ingest.py script to populate this
+
+client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
+vectorstore = Qdrant(embeddings=embedder, collection_name=COLLECTION_NAME, client=client)
+
+# TODO allow user to change parameters
+retriever = vectorstore.as_retriever(search_type="mmr")
+
+# Define our prompt
+template = """
+Use the following pieces of context from retrieved
+dataset to answer the question. Do not make up an answer if there is no
+context provided to help answer it. Include the 'source' and 'start_index'
+from the metadata included in the context you used to answer the question
+
+Context:
+---------
+{context}
+
+---------
+Question: {question}
+---------
+
+Answer:
+"""
+
+prompt = ChatPromptTemplate.from_template(template)
+
+# RAG Chain
+model = HuggingFaceEndpoint(
+    endpoint_url=TGI_LLM_ENDPOINT,
+    max_new_tokens=512,
+    top_k=10,
+    top_p=0.95,
+    typical_p=0.95,
+    temperature=0.01,
+    repetition_penalty=1.03,
+    streaming=True,
+    truncate=1024,
+)
+
+chain = (
+    RunnableParallel({"context": retriever, "question": RunnablePassthrough()}) | prompt | model | StrOutputParser()
+).with_types(input_type=Question)
--- a/ChatQnA/deprecated/langchain/qdrant/rag_qdrant/config.py
+++ b/ChatQnA/deprecated/langchain/qdrant/rag_qdrant/config.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+# Embedding model
+EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
+
+# Qdrant configuration
+QDRANT_HOST = os.getenv("QDRANT", "localhost")
+QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333))
+COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag-qdrant")
+
+# LLM/Embedding endpoints
+TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
+TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081")
+TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT")
--- a/ChatQnA/deprecated/langchain/redis/LICENSE
+++ b/ChatQnA/deprecated/langchain/redis/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 LangChain, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/ChatQnA/deprecated/langchain/redis/data/nke-10k-2023.pdf
+++ b/ChatQnA/deprecated/langchain/redis/data/nke-10k-2023.pdf
--- a/ChatQnA/deprecated/langchain/redis/data_intel/ia_spec.pdf
+++ b/ChatQnA/deprecated/langchain/redis/data_intel/ia_spec.pdf
--- a/ChatQnA/deprecated/langchain/redis/ingest.py
+++ b/ChatQnA/deprecated/langchain/redis/ingest.py
@@ -1,30 +1,22 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 #
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

 import io
 import os

 import numpy as np
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings, HuggingFaceHubEmbeddings
 from langchain_community.vectorstores import Redis
 from PIL import Image
 from rag_redis.config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL

+tei_embedding_endpoint = os.getenv("TEI_ENDPOINT")
+

 def pdf_loader(file_path):
    try:
@@ -79,17 +71,28 @@ def ingest_documents():

    print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf")
    # Create vectorstore
-    embedder = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
+    if tei_embedding_endpoint:
+        # create embeddings using TEI endpoint service
+        embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint)
+    else:
+        # create embeddings using local embedding model
+        embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)

-    _ = Redis.from_texts(
-        # appending this little bit can sometimes help with semantic retrieval
-        # especially with multiple companies
-        texts=[f"Company: {company_name}. " + chunk for chunk in chunks],
-        embedding=embedder,
-        index_name=INDEX_NAME,
-        index_schema=INDEX_SCHEMA,
-        redis_url=REDIS_URL,
-    )
+    # Batch size
+    batch_size = 32
+    num_chunks = len(chunks)
+    for i in range(0, num_chunks, batch_size):
+        batch_chunks = chunks[i : i + batch_size]
+        batch_texts = [f"Company: {company_name}. " + chunk for chunk in batch_chunks]
+
+        _ = Redis.from_texts(
+            texts=batch_texts,
+            embedding=embedder,
+            index_name=INDEX_NAME,
+            index_schema=INDEX_SCHEMA,
+            redis_url=REDIS_URL,
+        )
+        print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}")


 if __name__ == "__main__":
--- a/ChatQnA/deprecated/langchain/redis/ingest_dir_text.py
+++ b/ChatQnA/deprecated/langchain/redis/ingest_dir_text.py
@@ -1,19 +1,9 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 #
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.document_loaders import DirectoryLoader, TextLoader, UnstructuredFileLoader
--- a/ChatQnA/deprecated/langchain/redis/ingest_intel.py
+++ b/ChatQnA/deprecated/langchain/redis/ingest_intel.py
@@ -1,30 +1,22 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 #
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

 import io
 import os

 import numpy as np
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings, HuggingFaceHubEmbeddings
 from langchain_community.vectorstores import Redis
 from PIL import Image
 from rag_redis.config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL

+tei_embedding_endpoint = os.getenv("TEI_ENDPOINT")
+

 def pdf_loader(file_path):
    try:
@@ -79,17 +71,29 @@ def ingest_documents():

    print("Done preprocessing. Created", len(chunks), "chunks of the original pdf")
    # Create vectorstore
-    embedder = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
+    # Create vectorstore
+    if tei_embedding_endpoint:
+        # create embeddings using TEI endpoint service
+        embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint)
+    else:
+        # create embeddings using local embedding model
+        embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)

-    _ = Redis.from_texts(
-        # appending this little bit can sometimes help with semantic retrieval
-        # especially with multiple companies
-        texts=[f"Company: {company_name}. " + chunk for chunk in chunks],
-        embedding=embedder,
-        index_name=INDEX_NAME,
-        index_schema=INDEX_SCHEMA,
-        redis_url=REDIS_URL,
-    )
+    # Batch size
+    batch_size = 32
+    num_chunks = len(chunks)
+    for i in range(0, num_chunks, batch_size):
+        batch_chunks = chunks[i : i + batch_size]
+        batch_texts = [f"Company: {company_name}. " + chunk for chunk in batch_chunks]
+
+        _ = Redis.from_texts(
+            texts=batch_texts,
+            embedding=embedder,
+            index_name=INDEX_NAME,
+            index_schema=INDEX_SCHEMA,
+            redis_url=REDIS_URL,
+        )
+        print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}")


 if __name__ == "__main__":
--- a/ChatQnA/deprecated/langchain/redis/ingest_wiki.py
+++ b/ChatQnA/deprecated/langchain/redis/ingest_wiki.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+#
+
+import io
+import os
+
+import numpy as np
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import ConfluenceLoader
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings, HuggingFaceHubEmbeddings
+from langchain_community.vectorstores import Redis
+
+# from PIL import Image
+from rag_redis.config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL
+
+tei_embedding_endpoint = os.getenv("TEI_ENDPOINT")
+confluence_access_token = os.getenv("CONFLUENCE_ACCESS_TOKEN")
+
+
+def wiki_loader(wiki_url, page_ids):
+    loader = ConfluenceLoader(
+        url=wiki_url,
+        token=confluence_access_token,
+        confluence_kwargs={"verify_ssl": False},
+    )
+    print(wiki_url)
+    print(page_ids)
+    documents = loader.load(page_ids=page_ids, include_attachments=True, limit=50, max_pages=50)
+    return documents
+
+
+def ingest_documents(wiki_url, page_ids):
+    """Ingest Wiki Pages to Redis from the variables (wiki_url, page_ids) that
+    contains your contents of interest."""
+
+    # Load list of wiki pages
+    company_name = "Intel"
+    print("Parsing Intel wiki pages", page_ids)
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100, add_start_index=True)
+    documents = wiki_loader(wiki_url, page_ids)
+    content = ""
+    for doc in documents:
+        content += doc.page_content
+    chunks = text_splitter.split_text(content)
+
+    print("Done preprocessing. Created", len(chunks), "chunks of the original pdf")
+    # Create vectorstore
+    # Create vectorstore
+    if tei_embedding_endpoint:
+        # create embeddings using TEI endpoint service
+        embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint)
+    else:
+        # create embeddings using local embedding model
+        embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
+
+    # Batch size
+    batch_size = 2
+    num_chunks = len(chunks)
+    for i in range(0, num_chunks, batch_size):
+        batch_chunks = chunks[i : i + batch_size]
+        batch_texts = [f"Company: {company_name}. " + chunk for chunk in batch_chunks]
+
+        _ = Redis.from_texts(
+            texts=batch_texts,
+            embedding=embedder,
+            index_name=INDEX_NAME,
+            index_schema=INDEX_SCHEMA,
+            redis_url=REDIS_URL,
+        )
+        print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}")
+
+
+if __name__ == "__main__":
+
+    wiki_url = "https://wiki.ith.intel.com/"
+    page_ids = [3458609323, 3467299836]
+
+    ingest_documents(wiki_url, page_ids)
--- a/ChatQnA/deprecated/langchain/redis/rag_redis.ipynb
+++ b/ChatQnA/deprecated/langchain/redis/rag_redis.ipynb
--- a/ChatQnA/deprecated/langchain/redis/rag_redis/init.py
+++ b/ChatQnA/deprecated/langchain/redis/rag_redis/init.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
--- a/ChatQnA/deprecated/langchain/redis/rag_redis/chain.py
+++ b/ChatQnA/deprecated/langchain/redis/rag_redis/chain.py
@@ -1,19 +1,9 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 #
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.llms import HuggingFaceEndpoint
--- a/ChatQnA/deprecated/langchain/redis/rag_redis/config.py
+++ b/ChatQnA/deprecated/langchain/redis/rag_redis/config.py
@@ -1,19 +1,9 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 #
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

 import os

--- a/ChatQnA/deprecated/langchain/redis/rag_redis/schema.yml
+++ b/ChatQnA/deprecated/langchain/redis/rag_redis/schema.yml
@@ -0,0 +1,15 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+text:
+  - name: content
+  - name: source
+numeric:
+  - name: start_index
+vector:
+  - name: content_vector
+    algorithm: HNSW
+    datatype: FLOAT32
+    dims: 384
+    distance_metric: COSINE
--- a/ChatQnA/deprecated/langchain/redis/rag_redis/schema_dim_1024.yml
+++ b/ChatQnA/deprecated/langchain/redis/rag_redis/schema_dim_1024.yml
@@ -0,0 +1,15 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+text:
+  - name: content
+  - name: source
+numeric:
+  - name: start_index
+vector:
+  - name: content_vector
+    algorithm: HNSW
+    datatype: FLOAT32
+    dims: 1024
+    distance_metric: COSINE
--- a/ChatQnA/deprecated/langchain/redis/rag_redis/schema_dim_768.yml
+++ b/ChatQnA/deprecated/langchain/redis/rag_redis/schema_dim_768.yml
@@ -0,0 +1,15 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+text:
+  - name: content
+  - name: source
+numeric:
+  - name: start_index
+vector:
+  - name: content_vector
+    algorithm: HNSW
+    datatype: FLOAT32
+    dims: 768
+    distance_metric: COSINE
--- a/ChatQnA/deprecated/langchain/redis/rag_redis/schema_lcdocs_dim_768.yml
+++ b/ChatQnA/deprecated/langchain/redis/rag_redis/schema_lcdocs_dim_768.yml
@@ -0,0 +1,19 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+text:
+  - name: content
+  - name: changefreq
+  - name: description
+  - name: language
+  - name: loc
+  - name: priority
+  - name: source
+  - name: title
+vector:
+  - name: content_vector
+    algorithm: HNSW
+    datatype: FLOAT32
+    dims: 768
+    distance_metric: COSINE
--- a/ChatQnA/deprecated/langchain/test/README.md
+++ b/ChatQnA/deprecated/langchain/test/README.md
--- a/ChatQnA/deprecated/langchain/test/end_to_end_rag_test.py
+++ b/ChatQnA/deprecated/langchain/test/end_to_end_rag_test.py
@@ -1,18 +1,8 @@
 #!/usr/bin/env python

-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0

 import argparse
 import os
--- a/ChatQnA/deprecated/langchain/test/tgi_gaudi.ipynb
+++ b/ChatQnA/deprecated/langchain/test/tgi_gaudi.ipynb
--- a/ChatQnA/deprecated/llamaindex/README.md
+++ b/ChatQnA/deprecated/llamaindex/README.md
--- a/ChatQnA/deprecated/serving/tgi_gaudi/README.md
+++ b/ChatQnA/deprecated/serving/tgi_gaudi/README.md
--- a/ChatQnA/deprecated/serving/tgi_gaudi/build_docker.sh
+++ b/ChatQnA/deprecated/serving/tgi_gaudi/build_docker.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+git clone https://github.com/huggingface/tgi-gaudi.git
+cd ./tgi-gaudi/
+docker build -t ghcr.io/huggingface/tgi-gaudi:1.2.1 . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
--- a/ChatQnA/deprecated/serving/tgi_gaudi/launch_tgi_service.sh
+++ b/ChatQnA/deprecated/serving/tgi_gaudi/launch_tgi_service.sh
@@ -1,18 +1,8 @@
 #!/bin/bash

-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0

 # Set default values
 default_port=8080
--- a/ChatQnA/deprecated/serving/vllm/README.md
+++ b/ChatQnA/deprecated/serving/vllm/README.md
--- a/ChatQnA/deprecated/tests/test_langchain_inference.sh
+++ b/ChatQnA/deprecated/tests/test_langchain_inference.sh
@@ -1,22 +1,12 @@
 #!/bin/bash
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 set -xe

 function test_env_setup() {
    WORKPATH=$(dirname "$PWD")
-    LOG_PATH="$WORKPATH/tests/langchain.log"
+    LOG_PATH="$WORKPATH/tests"

    REDIS_CONTAINER_NAME="test-redis-vector-db"
    LANGCHAIN_CONTAINER_NAME="test-qna-rag-redis-server"
@@ -75,23 +65,33 @@ function run_tests() {
    curl 127.0.0.1:$port/v1/rag/chat \
        -X POST \
        -d "{\"query\":\"What is the total revenue of Nike in 2023?\"}" \
-        -H 'Content-Type: application/json' > $LOG_PATH
+        -H 'Content-Type: application/json' > $LOG_PATH/langchain.log
+
+    curl 127.0.0.1:$port/v1/rag/chat_stream  \
+        -X POST \
+        -d "{\"query\":\"What is the total revenue of Nike in 2023?\"}" \
+        -H 'Content-Type: application/json' > $LOG_PATH/langchain_stream.log
 }

 function check_response() {
    cd $WORKPATH
    echo "Checking response"
    local status=false
-    if [[ -f $LOG_PATH ]] && [[ $(grep -c "\$51.2 billion" $LOG_PATH) != 0 ]]; then
+    if [[ -f $LOG_PATH/langchain.log ]] && [[ $(grep -c "\$51.2 billion" $LOG_PATH/langchain.log) != 0 ]]; then
        status=true
    fi

+    if [[ ! -f $LOG_PATH/langchain_stream.log ]] || [[ $(grep -c "billion" $LOG_PATH/langchain_stream.log) == 0 ]]; then
+        status=false
+    fi
+
    if [ $status == false ]; then
-        echo "Response check failed"
+        echo "Response check failed, please check the logs in artifacts!"
        exit 1
    else
-        echo "Response check succeed"
+        echo "Response check succeed!"
    fi
+
 }

 function docker_stop() {
@@ -110,11 +110,10 @@ function main() {
    start_backend_service

    run_tests
+    check_response

    docker_stop $CHATQNA_CONTAINER_NAME && docker_stop $LANGCHAIN_CONTAINER_NAME && docker_stop $REDIS_CONTAINER_NAME && sleep 5s
    echo y | docker system prune
-
-    check_response
 }

 main
--- a/ChatQnA/docker/Dockerfile
+++ b/ChatQnA/docker/Dockerfile
@@ -0,0 +1,32 @@
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    vim \
+    git
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+RUN cd /home/user/ && \
+    git clone https://github.com/opea-project/GenAIComps.git
+
+RUN cd /home/user/GenAIComps && pip install --no-cache-dir --upgrade pip && \
+    pip install -r /home/user/GenAIComps/requirements.txt
+
+COPY ./chatqna.py /home/user/chatqna.py
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
+
+USER user
+
+WORKDIR /home/user
+
+ENTRYPOINT ["python", "chatqna.py"]
--- a/ChatQnA/docker/chatqna.py
+++ b/ChatQnA/docker/chatqna.py
@@ -0,0 +1,75 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import asyncio
+import os
+
+from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
+
+MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
+MEGA_SERVICE_PORT = os.getenv("MEGA_SERVICE_PORT", 8888)
+EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
+EMBEDDING_SERVICE_PORT = os.getenv("EMBEDDING_SERVICE_PORT", 6000)
+RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
+RETRIEVER_SERVICE_PORT = os.getenv("RETRIEVER_SERVICE_PORT", 7000)
+RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
+RERANK_SERVICE_PORT = os.getenv("RERANK_SERVICE_PORT", 8000)
+LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
+LLM_SERVICE_PORT = os.getenv("LLM_SERVICE_PORT", 9000)
+
+
+class ChatQnAService:
+    def __init__(self, host="0.0.0.0", port=8000):
+        self.host = host
+        self.port = port
+        self.megaservice = ServiceOrchestrator()
+
+    def add_remote_service(self):
+        embedding = MicroService(
+            name="embedding",
+            host=EMBEDDING_SERVICE_HOST_IP,
+            port=EMBEDDING_SERVICE_PORT,
+            endpoint="/v1/embeddings",
+            use_remote_service=True,
+            service_type=ServiceType.EMBEDDING,
+        )
+        retriever = MicroService(
+            name="retriever",
+            host=RETRIEVER_SERVICE_HOST_IP,
+            port=RETRIEVER_SERVICE_PORT,
+            endpoint="/v1/retrieval",
+            use_remote_service=True,
+            service_type=ServiceType.RETRIEVER,
+        )
+        rerank = MicroService(
+            name="rerank",
+            host=RERANK_SERVICE_HOST_IP,
+            port=RERANK_SERVICE_PORT,
+            endpoint="/v1/reranking",
+            use_remote_service=True,
+            service_type=ServiceType.RERANK,
+        )
+        llm = MicroService(
+            name="llm",
+            host=LLM_SERVICE_HOST_IP,
+            port=LLM_SERVICE_PORT,
+            endpoint="/v1/chat/completions",
+            use_remote_service=True,
+            service_type=ServiceType.LLM,
+        )
+        self.megaservice.add(embedding).add(retriever).add(rerank).add(llm)
+        self.megaservice.flow_to(embedding, retriever)
+        self.megaservice.flow_to(retriever, rerank)
+        self.megaservice.flow_to(rerank, llm)
+        self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
+
+    async def schedule(self):
+        await self.megaservice.schedule(initial_inputs={"text": "What is the revenue of Nike in 2023?"})
+        result_dict = self.megaservice.result_dict
+        print(result_dict)
+
+
+if __name__ == "__main__":
+    chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
+    chatqna.add_remote_service()
+    asyncio.run(chatqna.schedule())
--- a/ChatQnA/docker/gaudi/README.md
+++ b/ChatQnA/docker/gaudi/README.md
@@ -0,0 +1,261 @@
+# Build MegaService of ChatQnA on Gaudi
+
+This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as embedding, retriever, rerank, and llm. We will publish the Docker images to Docker Hub, it will simplify the deployment process for this service.
+
+## 🚀 Build Docker Images
+
+First of all, you need to build Docker Images locally. This step can be ignored after the Docker images published to Docker hub.
+
+### 1. Source Code install GenAIComps
+
+```bash
+git clone https://github.com/opea-project/GenAIComps.git
+cd GenAIComps
+```
+
+### 2. Build Embedding Image
+
+```bash
+docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile .
+```
+
+### 3. Build Retriever Image
+
+```bash
+docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/docker/Dockerfile .
+```
+
+### 4. Build Rerank Image
+
+```bash
+docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/langchain/docker/Dockerfile .
+```
+
+### 5. Build LLM Image
+
+```bash
+docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
+```
+
+### 6. Build Dataprep Image
+
+```bash
+docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/docker/Dockerfile .
+```
+
+### 7. Build TEI Gaudi Image
+
+Since a TEI Gaudi Docker image hasn't been published, we'll need to build it from the [tei-guadi](https://github.com/huggingface/tei-gaudi) repository.
+
+```bash
+git clone https://github.com/huggingface/tei-gaudi
+cd tei-gaudi/
+docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest .
+```
+
+### 8. Build MegaService Docker Image
+
+To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build the MegaService Docker image using the command below:
+
+```bash
+git clone https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples/ChatQnA/docker
+docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+```
+
+### 9. Build UI Docker Image
+
+Construct the frontend Docker image using the command below:
+
+```bash
+cd GenAIExamples/ChatQnA/ui/
+docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
+```
+
+Then run the command `docker images`, you will have the following 8 Docker Images:
+
+1. `opea/embedding-tei:latest`
+2. `opea/retriever-redis:latest`
+3. `opea/reranking-tei:latest`
+4. `opea/llm-tgi:latest`
+5. `opea/tei-gaudi:latest`
+6. `opea/dataprep-redis:latest`
+7. `opea/chatqna:latest`
+8. `opea/chatqna-ui:latest`
+
+## 🚀 Start MicroServices and MegaService
+
+### Setup Environment Variables
+
+Since the `docker_compose.yaml` will consume some environment variables, you need to setup them in advance as below.
+
+```bash
+export http_proxy=${your_http_proxy}
+export https_proxy=${your_http_proxy}
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP=${host_ip}
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+```
+
+Note: Please replace with `host_ip` with you external IP address, do **NOT** use localhost.
+
+### Start all the services Docker Containers
+
+```bash
+cd GenAIExamples/ChatQnA/docker-composer/gaudi/
+docker compose -f docker_compose.yaml up -d
+```
+
+### Validate MicroServices and MegaService
+
+1. TEI Embedding Service
+
+```bash
+curl ${host_ip}:8090/embed \
+    -X POST \
+    -d '{"inputs":"What is Deep Learning?"}' \
+    -H 'Content-Type: application/json'
+```
+
+2. Embedding Microservice
+
+```bash
+curl http://${host_ip}:6000/v1/embeddings \
+  -X POST \
+  -d '{"text":"hello"}' \
+  -H 'Content-Type: application/json'
+```
+
+3. Retriever Microservice
+
+To consume the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:
+
+```python
+import random
+
+embedding = [random.uniform(-1, 1) for _ in range(768)]
+print(embedding)
+```
+
+Then substitute your mock embedding vector for the `${your_embedding}` in the following `curl` command:
+
+```bash
+curl http://${host_ip}:7000/v1/retrieval \
+  -X POST \
+  -d '{"text":"test", "embedding":${your_embedding}}' \
+  -H 'Content-Type: application/json'
+```
+
+4. TEI Reranking Service
+
+```bash
+curl http://${host_ip}:8808/rerank \
+    -X POST \
+    -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
+    -H 'Content-Type: application/json'
+```
+
+5. Reranking Microservice
+
+```bash
+curl http://${host_ip}:8000/v1/reranking \
+  -X POST \
+  -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
+  -H 'Content-Type: application/json'
+```
+
+6. TGI Service
+
+```bash
+curl http://${host_ip}:8008/generate \
+  -X POST \
+  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \
+  -H 'Content-Type: application/json'
+```
+
+7. LLM Microservice
+
+```bash
+curl http://${host_ip}:9000/v1/chat/completions \
+  -X POST \
+  -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+  -H 'Content-Type: application/json'
+```
+
+8. MegaService
+
+```bash
+curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
+     "messages": "What is the revenue of Nike in 2023?"
+     }'
+```
+
+9. Dataprep Microservice（Optional）
+
+If you want to update the default knowledge base, you can use the following commands:
+
+Update Knowledge Base via Local File Upload:
+
+```bash
+curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+     -H "Content-Type: multipart/form-data" \
+     -F "files=@./nke-10k-2023.pdf"
+```
+
+This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
+
+Add Knowledge Base via HTTP Links:
+
+```bash
+curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+     -H "Content-Type: multipart/form-data" \
+     -F 'link_list=["https://opea.dev"]'
+```
+
+This command updates a knowledge base by submitting a list of HTTP links for processing.
+
+## Enable LangSmith for Monotoring Application (Optional)
+
+LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f docker_compose.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key.
+
+Here's how you can do it:
+
+1. Install the latest version of LangSmith:
+
+```bash
+pip install -U langsmith
+```
+
+2. Set the necessary environment variables:
+
+```bash
+export LANGCHAIN_TRACING_V2=true
+export LANGCHAIN_API_KEY=ls_...
+```
+
+## 🚀 Launch the UI
+
+To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `docker_compose.yaml` file as shown below:
+
+```yaml
+  chaqna-gaudi-ui-server:
+    image: opea/chatqna-ui:latest
+    ...
+    ports:
+      - "80:5173"
+```
+
+![project-screenshot](https://i.imgur.com/26zMnEr.png)
--- a/ChatQnA/docker/gaudi/docker_compose.yaml
+++ b/ChatQnA/docker/gaudi/docker_compose.yaml
@@ -0,0 +1,181 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    ports:
+      - "6379:6379"
+      - "8001:8001"
+  dataprep-redis-service:
+    image: opea/dataprep-redis:latest
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "6007:6007"
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+  tei-embedding-service:
+    image: opea/tei-gaudi:latest
+    container_name: tei-embedding-gaudi-server
+    ports:
+      - "8090:80"
+    volumes:
+      - "./data:/data"
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+    command: --model-id ${EMBEDDING_MODEL_ID}
+  embedding:
+    image: opea/embedding-tei:latest
+    container_name: embedding-tei-server
+    depends_on:
+      - tei-embedding-service
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-embedding-service"
+    restart: unless-stopped
+  retriever:
+    image: opea/retriever-redis:latest
+    container_name: retriever-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "7000:7000"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-retriever-service"
+    restart: unless-stopped
+  tei-reranking-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
+    container_name: tei-reranking-gaudi-server
+    ports:
+      - "8808:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    command: --model-id ${RERANK_MODEL_ID}
+  reranking:
+    image: opea/reranking-tei:latest
+    container_name: reranking-tei-gaudi-server
+    depends_on:
+      - tei-reranking-service
+    ports:
+      - "8000:8000"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-reranking-service"
+    restart: unless-stopped
+  tgi-service:
+    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    container_name: tgi-gaudi-server
+    ports:
+      - "8008:80"
+    volumes:
+      - "./data:/data"
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model-id ${LLM_MODEL_ID}
+  llm:
+    image: opea/llm-tgi:latest
+    container_name: llm-tgi-gaudi-server
+    depends_on:
+      - tgi-service
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+    restart: unless-stopped
+  chaqna-gaudi-backend-server:
+    image: opea/chatqna:latest
+    container_name: chatqna-gaudi-backend-server
+    depends_on:
+      - redis-vector-db
+      - tei-embedding-service
+      - embedding
+      - retriever
+      - tei-reranking-service
+      - reranking
+      - tgi-service
+      - llm
+    ports:
+      - "8888:8888"
+    environment:
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
+      - EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
+      - RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
+      - RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
+      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+    ipc: host
+    restart: always
+  chaqna-gaudi-ui-server:
+    image: opea/chatqna-ui:latest
+    container_name: chatqna-gaudi-ui-server
+    depends_on:
+      - chaqna-gaudi-backend-server
+    ports:
+      - "5173:5173"
+    environment:
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
+      - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
+    ipc: host
+    restart: always
+
+networks:
+  default:
+    driver: bridge
--- a/ChatQnA/docker/ui/docker/Dockerfile
+++ b/ChatQnA/docker/ui/docker/Dockerfile
@@ -0,0 +1,26 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Use node 20.11.1 as the base image
+FROM node:20.11.1
+
+# Update package manager and install Git
+RUN apt-get update -y && apt-get install -y git
+
+# Copy the front-end code repository
+COPY svelte /home/user/svelte
+
+# Set the working directory
+WORKDIR /home/user/svelte
+
+# Install front-end dependencies
+RUN npm install
+
+# Build the front-end application
+RUN npm run build
+
+# Expose the port of the front-end application
+EXPOSE 5173
+
+# Run the front-end application in preview mode
+CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
--- a/ChatQnA/docker/ui/svelte/.editorconfig
+++ b/ChatQnA/docker/ui/svelte/.editorconfig
--- a/ChatQnA/docker/ui/svelte/.env
+++ b/ChatQnA/docker/ui/svelte/.env
@@ -0,0 +1,3 @@
+CHAT_BASE_URL = 'http://backend_address:8888/v1/chatqna'
+
+UPLOAD_FILE_BASE_URL = 'http://backend_address:6007/v1/dataprep'
--- a/ChatQnA/docker/ui/svelte/.eslintignore
+++ b/ChatQnA/docker/ui/svelte/.eslintignore
--- a/ChatQnA/docker/ui/svelte/.eslintrc.cjs
+++ b/ChatQnA/docker/ui/svelte/.eslintrc.cjs
--- a/ChatQnA/docker/ui/svelte/.prettierignore
+++ b/ChatQnA/docker/ui/svelte/.prettierignore
--- a/ChatQnA/docker/ui/svelte/.prettierrc
+++ b/ChatQnA/docker/ui/svelte/.prettierrc
@@ -0,0 +1 @@
+{"pluginSearchDirs": ["."], "overrides": [{"files": "*.svelte", "options": {"parser": "svelte"}}]}
--- a/ChatQnA/docker/ui/svelte/README.md
+++ b/ChatQnA/docker/ui/svelte/README.md
--- a/ChatQnA/docker/ui/svelte/package.json
+++ b/ChatQnA/docker/ui/svelte/package.json
@@ -3,7 +3,7 @@
  "version": "0.0.1",
  "private": true,
  "scripts": {
-    "dev": "vite dev --port 80 --host 0.0.0.0",
+    "dev": "vite dev",
    "build": "vite build",
    "preview": "vite preview",
    "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
@@ -13,10 +13,12 @@
  },
  "devDependencies": {
    "@fortawesome/free-solid-svg-icons": "6.2.0",
+    "@playwright/test": "^1.33.0",
    "@sveltejs/adapter-auto": "1.0.0-next.75",
    "@sveltejs/kit": "^1.30.4",
    "@tailwindcss/typography": "0.5.7",
    "@types/debug": "4.1.7",
+    "@types/node": "^20.12.13",
    "@typescript-eslint/eslint-plugin": "^5.27.0",
    "@typescript-eslint/parser": "^5.27.0",
    "autoprefixer": "^10.4.7",
@@ -50,6 +52,7 @@
    "flowbite-svelte-icons": "^1.4.0",
    "fuse.js": "^6.6.2",
    "lodash": "^4.17.21",
+    "playwright": "^1.44.0",
    "ramda": "^0.29.0",
    "sse.js": "^0.6.1",
    "svelte-notifications": "^0.9.98",
--- a/ChatQnA/docker/ui/svelte/playwright.config.ts
+++ b/ChatQnA/docker/ui/svelte/playwright.config.ts
@@ -0,0 +1,87 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { defineConfig, devices } from "@playwright/test";
+
+/**
+ * Read environment variables from file.
+ * https://github.com/motdotla/dotenv
+ */
+// require('dotenv').config();
+
+/**
+ * See https://playwright.dev/docs/test-configuration.
+ */
+export default defineConfig({
+	testDir: "./tests",
+	/* Maximum time one test can run for. */
+	timeout: 30 * 1000,
+	expect: {
+		/**
+		 * Maximum time expect() should wait for the condition to be met.
+		 * For example in `await expect(locator).toHaveText();`
+		 */
+		timeout: 5000,
+	},
+	/* Run tests in files in parallel */
+	fullyParallel: true,
+	/* Fail the build on CI if you accidentally left test.only in the source code. */
+	forbidOnly: !!process.env.CI,
+	/* Retry on CI only */
+	retries: process.env.CI ? 2 : 0,
+	/* Opt out of parallel tests on CI. */
+	workers: process.env.CI ? 1 : undefined,
+	/* Reporter to use. See https://playwright.dev/docs/test-reporters */
+	reporter: [["html", { open: "never" }]],
+	/* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
+	use: {
+		/* Maximum time each action such as `click()` can take. Defaults to 0 (no limit). */
+		actionTimeout: 0,
+		/* Base URL to use in actions like `await page.goto('/')`. */
+		baseURL: "http://localhost:5173",
+
+		/* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */
+		trace: "on-first-retry",
+	},
+
+	/* Configure projects for major browsers */
+	projects: [
+		// {
+		// 	name: "chromium",
+		// 	use: { ...devices["Desktop Chrome"] },
+		// },
+
+		/* Test against mobile viewports. */
+		// {
+		//   name: 'Mobile Chrome',
+		//   use: { ...devices['Pixel 5'] },
+		// },
+		// {
+		//   name: 'Mobile Safari',
+		//   use: { ...devices['iPhone 12'] },
+		// },
+
+		/* Test against branded browsers. */
+		// {
+		//   name: 'Microsoft Edge',
+		//   use: { channel: 'msedge' },
+		// },
+		{
+			name: "webkit",
+			use: { ...devices["Desktop Safari"] },
+		},
+		// {
+		//   name: 'Google Chrome',
+		//   use: { channel: 'chrome' },
+		// },
+	],
+
+	/* Folder for test artifacts such as screenshots, videos, traces, etc. */
+	// outputDir: 'test-results/',
+
+	/* Run your local dev server before starting the tests */
+	// webServer: {
+	//   command: 'npm run start',
+	//   port: 3000,
+	// },
+});
--- a/ChatQnA/docker/ui/svelte/postcss.config.cjs
+++ b/ChatQnA/docker/ui/svelte/postcss.config.cjs
--- a/ChatQnA/docker/ui/svelte/src/app.d.ts
+++ b/ChatQnA/docker/ui/svelte/src/app.d.ts
--- a/ChatQnA/docker/ui/svelte/src/app.html
+++ b/ChatQnA/docker/ui/svelte/src/app.html
--- a/ChatQnA/docker/ui/svelte/src/app.postcss
+++ b/ChatQnA/docker/ui/svelte/src/app.postcss
--- a/ChatQnA/docker/ui/svelte/src/lib/assets/avatar/svelte/Delete.svelte
+++ b/ChatQnA/docker/ui/svelte/src/lib/assets/avatar/svelte/Delete.svelte
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`{"pluginSearchDirs": ["."], "overrides": [{"files": "*.svelte", "options": {"parser": "svelte"}}]}`