Update third party images tag

Signed-off-by: NeuralChatBot <grp_neural_chat_bot@intel.com>
update env (#678 )
2024-08-29 02:45:32 +00:00 · 2024-08-29 10:29:35 +08:00 · 2024-08-29 10:27:06 +08:00 · 2024-08-29 09:35:47 +08:00 · 2024-08-28 08:30:49 -07:00 · 2024-08-28 20:45:18 +08:00
60 changed files with 1035 additions and 311 deletions
--- a/.github/workflows/manual-bom-scan.yml
+++ b/.github/workflows/manual-bom-scan.yml
@@ -59,6 +59,7 @@ jobs:

      - name: SBOM Scan Container
        uses: anchore/sbom-action@v0.17.1
+        if: always()
        with:
          image: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }}
          output-file: ${{ matrix.image }}-sbom-scan.txt
@@ -66,6 +67,7 @@ jobs:

      - name: Security Scan Container
        uses: aquasecurity/trivy-action@0.24.0
+        if: always()
        with:
          image-ref: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }}
          output: ${{ matrix.image }}-trivy-scan.txt
@@ -80,6 +82,7 @@ jobs:
        run: docker rmi -f ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}

      - uses: actions/upload-artifact@v4.3.4
+        if: always()
        with:
          name: ${{ matrix.image }}-scan
          path: ${{ matrix.image }}-*-scan.txt
--- a/.github/workflows/manual-docker-publish.yml
+++ b/.github/workflows/manual-docker-publish.yml
@@ -5,28 +5,28 @@ name: Examples publish docker image on manual event
 on:
  workflow_dispatch:
    inputs:
-      nodes:
+      node:
        default: "gaudi"
        description: "Hardware to run test"
        required: true
        type: string
      examples:
-        default: "ChatQnA"
+        default: "Translation"
        description: 'List of examples to test [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
        required: true
        type: string
-      tag:
-        default: "latest"
-        description: "Tag to apply to images"
-        required: true
-        type: string
-      publish:
+      gmc:
        default: false
-        description: 'Publish images to docker hub'
+        description: 'Publish gmc images'
        required: false
        type: boolean
+      tag:
+        default: "v0.9"
+        description: "Tag to publish"
+        required: true
+        type: string
      publish_tags:
-        default: "latest,v1.0"
+        default: "latest,v0.9"
        description: 'Tag list apply to publish images'
        required: false
        type: string
@@ -34,7 +34,7 @@ on:
 permissions: read-all
 jobs:
  get-image-list:
-    runs-on: ${{ inputs.node }}
+    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.scan-matrix.outputs.matrix }}
    steps:
@@ -44,13 +44,20 @@ jobs:
      - name: Set Matrix
        id: scan-matrix
        run: |
+          pip install yq
          examples=($(echo ${{ inputs.examples }} | tr ',' ' '))
          image_list=[]
          for example in ${examples[@]}
          do
+              echo ${example}
              images=$(cat ${{ github.workspace }}/${example}/docker/docker_build_compose.yaml | yq -r '.[]' | jq 'keys' | jq -c '.')
              image_list=$(echo ${image_list} | jq -s '.[0] + .[1] | unique' - <(echo ${images}))
          done
+
+          if [ "${{ inputs.gmc }}" == "true" ]; then
+              image_list=$(echo ${image_list} | jq -c '. + ["gmcmanager","gmcrouter"]')
+          fi
+          echo $image_list
          echo "matrix=$(echo ${image_list} | jq -c '.')" >> $GITHUB_OUTPUT

  publish:
@@ -60,6 +67,11 @@ jobs:
        image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
    runs-on: "docker-build-${{ inputs.node }}"
    steps:
+      - uses: docker/login-action@v3.2.0
+        with:
+          username: ${{ secrets.DOCKERHUB_USER }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
      - name: Image Publish
        uses: opea-project/validation/actions/image-publish@main
        with:
--- a/.github/workflows/manual-freeze-images.yml
+++ b/.github/workflows/manual-freeze-images.yml
@@ -1,43 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-name: Freeze base images and 3rd party images on manual event
-
-on:
-  workflow_dispatch:
-
-jobs:
-  freeze-images:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          ref: ${{ github.ref }}
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.10"
-
-      - name: install skopeo
-        run: |
-          sudo apt update
-          sudo apt -y install skopeo
-
-      - name: Set up Git
-        run: |
-          git config --global user.name "NeuralChatBot"
-          git config --global user.email "grp_neural_chat_bot@intel.com"
-          git remote set-url origin https://NeuralChatBot:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
-
-      - name: Run script
-        run: |
-          bash .github/workflows/scripts/freeze_images.sh
-
-      - name: Commit changes
-        run: |
-          git add .
-          git commit -s -m "Freeze third party images tag"
-          git push
--- a/.github/workflows/scripts/update_images_tag.sh
+++ b/.github/workflows/scripts/update_images_tag.sh
@@ -4,14 +4,18 @@
 # SPDX-License-Identifier: Apache-2.0

 declare -A dict
-dict["langchain/langchain"]="docker://docker.io/langchain/langchain"
-dict["ghcr.io/huggingface/text-generation-inference"]="docker://ghcr.io/huggingface/text-generation-inference"
+dict["ghcr.io/huggingface/text-generation-inference"]="docker://ghcr.io/huggingface/text-generation-inference:latest-intel-cpu"

 function get_latest_version() {
    repo_image=$1
-    versions=$(skopeo list-tags ${dict[$repo_image]} | jq -r '.Tags[]')
-    printf "version list:\n$versions\n"
-    latest_version=$(printf "%s\n" "${versions[@]}" | grep -E '^[\.0-9\-]+$' | sort -V | tail -n 1)
+    if [[ $repo_image == *"huggingface"* ]]; then
+        revision=$(skopeo inspect --config ${dict[$repo_image]} | jq -r '.config.Labels["org.opencontainers.image.revision"][:7]')
+        latest_version="sha-$revision-intel-cpu"
+    else
+        versions=$(skopeo list-tags ${dict[$repo_image]} | jq -r '.Tags[]')
+        printf "version list:\n$versions\n"
+        latest_version=$(printf "%s\n" "${versions[@]}" | grep -E '^[\.0-9\-]+$' | sort -V | tail -n 1)
+    fi
    echo "latest version: $latest_version"
    replace_image_version $repo_image $latest_version
 }
@@ -22,10 +26,10 @@ function replace_image_version() {
    if [[ -z "$version" ]]; then
        echo "version is empty"
    else
-        echo "replace $repo_image:latest with $repo_image:$version"
-        find . -name "Dockerfile" | xargs sed -i "s|$repo_image:latest.*|$repo_image:$version|g"
-        find . -name "*.yaml" | xargs sed -i "s|$repo_image:latest[A-Za-z0-9\-]*|$repo_image:$version|g"
-        find . -name "*.md" | xargs sed -i "s|$repo_image:latest[A-Za-z0-9\-]*|$repo_image:$version|g"
+        echo "replace $repo_image:tag with $repo_image:$version"
+        find . -name "Dockerfile" | xargs sed -i "s|$repo_image:sha[A-Za-z0-9\-]*|$repo_image:$version|g"
+        find . -name "*.yaml" | xargs sed -i "s|$repo_image:sha[A-Za-z0-9\-]*|$repo_image:$version|g"
+        find . -name "*.md" | xargs sed -i "s|$repo_image:sha[A-Za-z0-9\-]*|$repo_image:$version|g"
    fi
 }

--- a/.github/workflows/weekly-update-images.yml
+++ b/.github/workflows/weekly-update-images.yml
@@ -0,0 +1,54 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Weekly update base images and 3rd party images
+
+on:
+  schedule:
+    - cron: "0 0 * * 0"
+  workflow_dispatch:
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  freeze-images:
+    runs-on: ubuntu-latest
+    env:
+      USER_NAME: "NeuralChatBot"
+      USER_EMAIL: "grp_neural_chat_bot@intel.com"
+      BRANCH_NAME: "update_images_tag"
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          ref: "main"
+
+      - name: Install skopeo
+        run: |
+          sudo apt update
+          sudo apt -y install skopeo
+
+      - name: Set up Git
+        run: |
+          git config --global user.name ${{ env.USER_NAME }}
+          git config --global user.email ${{ env.USER_EMAIL }}
+          git remote set-url origin https://${{ env.USER_NAME }}:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
+          git checkout -b ${{ env.BRANCH_NAME }}
+
+      - name: Run script
+        run: |
+          bash .github/workflows/scripts/update_images_tag.sh
+
+      - name: Commit changes
+        run: |
+          git add .
+          git commit -s -m "Update third party images tag"
+          git push --set-upstream origin update_images_tag
+
+      - name: create pull request
+        run: gh pr create -B main -H ${{ env.BRANCH_NAME }} --title 'Update ghcr.io/huggingface/text-generation-inference image tag' --body 'Created by Github action'
+        env:
+          GITHUB_TOKEN: ${{ secrets.ACTION_TOKEN }}
--- a/AgentQnA/README.md
+++ b/AgentQnA/README.md
@@ -26,50 +26,50 @@ This example showcases a hierarchical multi-agent system for question-answering
 1. Build agent docker image </br>
   First, clone the opea GenAIComps repo

-```
-export WORKDIR=<your-work-directory>
-cd $WORKDIR
-git clone https://github.com/opea-project/GenAIComps.git
-```
+   ```
+   export WORKDIR=<your-work-directory>
+   cd $WORKDIR
+   git clone https://github.com/opea-project/GenAIComps.git
+   ```

-Then build the agent docker image. Both the supervisor agent and the worker agent will use the same docker image, but when we launch the two agents we will specify different strategies and register different tools.
+   Then build the agent docker image. Both the supervisor agent and the worker agent will use the same docker image, but when we launch the two agents we will specify different strategies and register different tools.

-```
-cd GenAIComps
-docker build -t opea/comps-agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/docker/Dockerfile .
-```
+   ```
+   cd GenAIComps
+   docker build -t opea/comps-agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/docker/Dockerfile .
+   ```

 2. Launch tool services </br>
   In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.

-```
-docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
-```
+   ```
+   docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
+   ```

 3. Set up environment for this example </br>
   First, clone this repo

-```
-cd $WORKDIR
-git clone https://github.com/opea-project/GenAIExamples.git
-```
+   ```
+   cd $WORKDIR
+   git clone https://github.com/opea-project/GenAIExamples.git
+   ```

-Second, set up env vars
+   Second, set up env vars

-```
-export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
-# optional: OPANAI_API_KEY
-export OPENAI_API_KEY=<your-openai-key>
-```
+   ```
+   export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
+   # optional: OPANAI_API_KEY
+   export OPENAI_API_KEY=<your-openai-key>
+   ```

 4. Launch agent services</br>
   The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and we plan to add support for llama3.1-70B-instruct (served by TGI-Gaudi) in a subsequent release.
   To use openai llm, run command below.

-```
-cd docker/openai/
-bash launch_agent_service_openai.sh
-```
+   ```
+   cd docker/openai/
+   bash launch_agent_service_openai.sh
+   ```

 ## Validate services

--- a/AudioQnA/docker/Dockerfile
+++ b/AudioQnA/docker/Dockerfile
@@ -8,7 +8,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/AudioQnA/docker/xeon/compose.yaml
+++ b/AudioQnA/docker/xeon/compose.yaml
@@ -41,7 +41,7 @@ services:
    environment:
      TTS_ENDPOINT: ${TTS_ENDPOINT}
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-service
    ports:
      - "3006:80"
--- a/AudioQnA/kubernetes/README.md
+++ b/AudioQnA/kubernetes/README.md
@@ -66,7 +66,7 @@ This involves deploying the AudioQnA custom resource. You can use audioQnA_xeon.
   ```sh
   export CLIENT_POD=$(kubectl get pod -n audioqa -l app=client-test -o jsonpath={.items..metadata.name})
   export accessUrl=$(kubectl get gmc -n audioqa -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
-   kubectl exec "$CLIENT_POD" -n audioqa -- curl $accessUrl  -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json'
+   kubectl exec "$CLIENT_POD" -n audioqa -- curl -s --no-buffer $accessUrl  -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json'
   ```

 > [NOTE]
--- a/ChatQnA/README.md
+++ b/ChatQnA/README.md
@@ -10,7 +10,90 @@ ChatQnA architecture shows below:

 ChatQnA is implemented on top of [GenAIComps](https://github.com/opea-project/GenAIComps), the ChatQnA Flow Chart shows below:

-![Flow Chart](./assets/img/chatqna_flow_chart.png)
+```mermaid
+---
+config:
+  flowchart:
+    nodeSpacing: 100
+    rankSpacing: 100
+    curve: linear
+  theme: base
+  themeVariables:
+    fontSize: 42px
+---
+flowchart LR
+    %% Colors %%
+    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef invisible fill:transparent,stroke:transparent;
+    style ChatQnA-MegaService stroke:#000000
+    %% Subgraphs %%
+    subgraph ChatQnA-MegaService["ChatQnA-MegaService"]
+        direction LR
+        EM([Embedding <br>]):::blue
+        RET([Retrieval <br>]):::blue
+        RER([Rerank <br>]):::blue
+        LLM([LLM <br>]):::blue
+    end
+    subgraph User Interface
+        direction TB
+        a([User Input Query]):::orchid
+        Ingest([Ingest data]):::orchid
+        UI([UI server<br>]):::orchid
+    end
+    subgraph ChatQnA GateWay
+        direction LR
+        invisible1[ ]:::invisible
+        GW([ChatQnA GateWay<br>]):::orange
+    end
+    subgraph .
+        X([OPEA Micsrservice]):::blue
+        Y{{Open Source Service}}
+        Z([OPEA Gateway]):::orange
+        Z1([UI]):::orchid
+    end
+
+    TEI_RER{{Reranking service<br>'TEI'<br>}}
+    TEI_EM{{Embedding service <br>'TEI LangChain'<br>}}
+    VDB{{Vector DB<br>'Redis'<br>}}
+    R_RET{{Retriever service <br>'LangChain Redis'<br>}}
+    DP([Data Preparation<br>'LangChain Redis'<br>]):::blue
+    LLM_gen{{LLM Service <br>'TGI'<br>}}
+
+    %% Data Preparation flow
+    %% Ingest data flow
+    direction LR
+    Ingest[Ingest data] -->|a| UI
+    UI -->|b| DP
+    DP <-.->|c| TEI_EM
+
+    %% Questions interaction
+    direction LR
+    a[User Input Query] -->|1| UI
+    UI -->|2| GW
+    GW <==>|3| ChatQnA-MegaService
+    EM ==>|4| RET
+    RET ==>|5| RER
+    RER ==>|6| LLM
+
+
+    %% Embedding service flow
+    direction TB
+    EM <-.->|3'| TEI_EM
+    RET <-.->|4'| R_RET
+    RER <-.->|5'| TEI_RER
+    LLM <-.->|6'| LLM_gen
+
+    direction TB
+    %% Vector DB interaction
+    R_RET <-.->|d|VDB
+    DP <-.->|d|VDB
+
+
+
+
+```

 This ChatQnA use case performs RAG using LangChain, Redis VectorDB and Text Generation Inference on Intel Gaudi2 or Intel XEON Scalable Processors. The Intel Gaudi2 accelerator supports both training and inference for deep learning models in particular for LLMs. Visit [Habana AI products](https://habana.ai/products) for more details.

--- a/ChatQnA/benchmark/README.md
+++ b/ChatQnA/benchmark/README.md
@@ -0,0 +1,546 @@
+# ChatQnA Benchmarking
+
+This folder contains a collection of Kubernetes manifest files for deploying the ChatQnA service across scalable nodes. It includes a comprehensive [benchmarking tool](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/README.md) that enables throughput analysis to assess inference performance.
+
+By following this guide, you can run benchmarks on your deployment and share the results with the OPEA community.
+
+# Purpose
+
+We aim to run these benchmarks and share them with the OPEA community for three primary reasons:
+
+- To offer insights on inference throughput in real-world scenarios, helping you choose the best service or deployment for your needs.
+- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
+- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading llms, serving frameworks etc.
+
+# Metrics
+
+The benchmark will report the below metrics, including:
+
+- Number of Concurrent Requests
+- End-to-End Latency: P50, P90, P99 (in milliseconds)
+- End-to-End First Token Latency: P50, P90, P99 (in milliseconds)
+- Average Next Token Latency (in milliseconds)
+- Average Token Latency (in milliseconds)
+- Requests Per Second (RPS)
+- Output Tokens Per Second
+- Input Tokens Per Second
+
+Results will be displayed in the terminal and saved as CSV file named `1_stats.csv` for easy export to spreadsheets.
+
+# Getting Started
+
+## Prerequisites
+
+- Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md).
+
+- Every node has direct internet access
+- Set up kubectl on the master node with access to the Kubernetes cluster.
+- Install Python 3.8+ on the master node for running the stress tool.
+- Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods.
+
+## Kubernetes Cluster Example
+
+```bash
+$ kubectl get nodes
+NAME                STATUS   ROLES           AGE   VERSION
+k8s-master          Ready    control-plane   35d   v1.29.6
+k8s-work1           Ready    <none>          35d   v1.29.5
+k8s-work2           Ready    <none>          35d   v1.29.6
+k8s-work3           Ready    <none>          35d   v1.29.6
+```
+
+## Manifest preparation
+
+We have created the [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single node, two nodes and four nodes K8s cluster. In order to apply, we need to check out and configure some values.
+
+```bash
+# on k8s-master node
+git clone https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples/ChatQnA/benchmark
+
+# replace the image tag from latest to v0.9 since we want to test with v0.9 release
+IMAGE_TAG=v0.9
+find . -name '*.yaml' -type f -exec sed -i "s#image: opea/\(.*\):latest#image: opea/\1:${IMAGE_TAG}#g" {} \;
+
+# set the huggingface token
+HUGGINGFACE_TOKEN=<your token>
+find . -name '*.yaml' -type f -exec sed -i "s#\${HF_TOKEN}#${HUGGINGFACE_TOKEN}#g" {} \;
+
+# set models
+LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
+EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
+RERANK_MODEL_ID=BAAI/bge-reranker-base
+find . -name '*.yaml' -type f -exec sed -i "s#\$(LLM_MODEL_ID)#${LLM_MODEL_ID}#g" {} \;
+find . -name '*.yaml' -type f -exec sed -i "s#\$(EMBEDDING_MODEL_ID)#${EMBEDDING_MODEL_ID}#g" {} \;
+find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL_ID}#g" {} \;
+```
+
+## Benchmark tool preparation
+
+The test uses the [benchmark tool](https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark) to do performance test. We need to set up benchmark tool at the master node of Kubernetes which is k8s-master.
+
+```bash
+# on k8s-master node
+git clone https://github.com/opea-project/GenAIEval.git
+cd GenAIEval
+python3 -m venv stress_venv
+source stress_venv/bin/activate
+pip install -r requirements.txt
+```
+
+## Test Configurations
+
+Workload configuration:
+
+| Key      | Value   |
+| -------- | ------- |
+| Workload | ChatQnA |
+| Tag      | V0.9    |
+
+Models configuration
+| Key | Value |
+| ---------- | ------------------ |
+| Embedding | BAAI/bge-base-en-v1.5 |
+| Reranking | BAAI/bge-reranker-base |
+| Inference | Intel/neural-chat-7b-v3-3 |
+
+Benchmark parameters
+| Key | Value |
+| ---------- | ------------------ |
+| LLM input tokens | 1024 |
+| LLM output tokens | 128 |
+
+Number of test requests for different scheduled node number:
+| Node count | Concurrency | Query number |
+| ----- | -------- | -------- |
+| 1 | 128 | 640 |
+| 2 | 256 | 1280 |
+| 4 | 512 | 2560 |
+
+More detailed configuration can be found in configuration file [benchmark.yaml](./benchmark.yaml).
+
+## Test Steps
+
+### Single node test
+
+#### 1. Preparation
+
+We add label to 1 Kubernetes node to make sure all pods are scheduled to this node:
+
+```bash
+kubectl label nodes k8s-worker1 node-type=chatqna-opea
+```
+
+#### 2. Install ChatQnA
+
+Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark/single_gaudi) and apply to K8s.
+
+```bash
+# on k8s-master node
+cd GenAIExamples/ChatQnA/benchmark/single_gaudi
+kubectl apply -f .
+```
+
+#### 3. Run tests
+
+We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
+
+```bash
+export USER_QUERIES="[4, 8, 16, 640]"
+export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_1"
+envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
+```
+
+And then run the benchmark tool by:
+
+```bash
+cd GenAIEval/evals/benchmark
+python benchmark.py
+```
+
+#### 4. Data collection
+
+All the test results will come to this folder `/home/sdp/benchmark_output/node_1` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
+
+#### 5. Clean up
+
+```bash
+# on k8s-master node
+cd GenAIExamples/ChatQnA/benchmark/single_gaudi
+kubectl delete -f .
+kubectl label nodes k8s-worker1 node-type-
+```
+
+### Two node test
+
+#### 1. Preparation
+
+We add label to 2 Kubernetes node to make sure all pods are scheduled to this node:
+
+```bash
+kubectl label nodes k8s-worker1 k8s-worker2 node-type=chatqna-opea
+```
+
+#### 2. Install ChatQnA
+
+Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark/two_gaudi) and apply to K8s.
+
+```bash
+# on k8s-master node
+cd GenAIExamples/ChatQnA/benchmark/two_gaudi
+kubectl apply -f .
+```
+
+#### 3. Run tests
+
+We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
+
+```bash
+export USER_QUERIES="[4, 8, 16, 1280]"
+export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_2"
+envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
+```
+
+And then run the benchmark tool by:
+
+```bash
+cd GenAIEval/evals/benchmark
+python benchmark.py
+```
+
+#### 4. Data collection
+
+All the test results will come to this folder `/home/sdp/benchmark_output/node_2` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
+
+#### 5. Clean up
+
+```bash
+# on k8s-master node
+kubectl delete -f .
+kubectl label nodes k8s-worker1 k8s-worker2 node-type-
+```
+
+### Four node test
+
+#### 1. Preparation
+
+We add label to 4 Kubernetes node to make sure all pods are scheduled to this node:
+
+```bash
+kubectl label nodes k8s-master k8s-worker1 k8s-worker2 k8s-worker3 node-type=chatqna-opea
+```
+
+#### 2. Install ChatQnA
+
+Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark/four_gaudi) and apply to K8s.
+
+```bash
+# on k8s-master node
+cd GenAIExamples/ChatQnA/benchmark/four_gaudi
+kubectl apply -f .
+```
+
+#### 3. Run tests
+
+We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
+
+```bash
+export USER_QUERIES="[4, 8, 16, 2560]"
+export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_4"
+envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
+```
+
+And then run the benchmark tool by:
+
+```bash
+cd GenAIEval/evals/benchmark
+python benchmark.py
+```
+
+#### 4. Data collection
+
+All the test results will come to this folder `/home/sdp/benchmark_output/node_4` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
+
+#### 5. Clean up
+
+```bash
+# on k8s-master node
+cd GenAIExamples/ChatQnA/benchmark/single_gaudi
+kubectl delete -f .
+kubectl label nodes k8s-master k8s-worker1 k8s-worker2 k8s-worker3 node-type-
+```
+
+### Example Result
+
+The following is a summary of the test result, with files saved at `TEST_OUTPUT_DIR`.
+
+```statistics
+Concurrency       : 512
+Max request count : 2560
+Http timeout      : 60000
+
+Benchmark target  : chatqnafixed
+
+=================Total statistics=====================
+Succeed Response:  2560 (Total 2560, 100.0% Success), Duration: 26.44s, Input Tokens: 61440, Output Tokens: 255985, RPS: 96.82, Input Tokens per Second: 2323.71, Output Tokens per Second: 9681.57
+End to End latency(ms),    P50: 3576.34,   P90: 4242.19,   P99: 5252.23,   Avg: 3581.55
+First token latency(ms),   P50: 726.64,   P90: 1128.27,   P99: 1796.09,   Avg: 769.58
+Average Next token latency(ms): 28.41
+Average token latency(ms)     : 35.85
+======================================================
+```
+
+```test spec
+benchmarkresult:
+  Average_Next_token_latency: '28.41'
+  Average_token_latency: '35.85'
+  Duration: '26.44'
+  End_to_End_latency_Avg: '3581.55'
+  End_to_End_latency_P50: '3576.34'
+  End_to_End_latency_P90: '4242.19'
+  End_to_End_latency_P99: '5252.23'
+  First_token_latency_Avg: '769.58'
+  First_token_latency_P50: '726.64'
+  First_token_latency_P90: '1128.27'
+  First_token_latency_P99: '1796.09'
+  Input_Tokens: '61440'
+  Input_Tokens_per_Second: '2323.71'
+  Onput_Tokens: '255985'
+  Output_Tokens_per_Second: '9681.57'
+  RPS: '96.82'
+  Succeed_Response: '2560'
+  locust_P50: '160'
+  locust_P99: '810'
+  locust_num_failures: '0'
+  locust_num_requests: '2560'
+benchmarkspec:
+  bench-target: chatqnafixed
+  endtest_time: '2024-08-25T14:19:25.955973'
+  host: http://10.110.105.197:8888
+  llm-model: Intel/neural-chat-7b-v3-3
+  locustfile: /home/sdp/lvl/GenAIEval/evals/benchmark/stresscli/locust/aistress.py
+  max_requests: 2560
+  namespace: default
+  processes: 2
+  run_name: benchmark
+  runtime: 60m
+  starttest_time: '2024-08-25T14:18:50.366514'
+  stop_timeout: 120
+  tool: locust
+  users: 512
+hardwarespec:
+  aise-gaudi-00:
+    architecture: amd64
+    containerRuntimeVersion: containerd://1.7.18
+    cpu: '160'
+    habana.ai/gaudi: '8'
+    kernelVersion: 5.15.0-92-generic
+    kubeProxyVersion: v1.29.7
+    kubeletVersion: v1.29.7
+    memory: 1056375272Ki
+    operatingSystem: linux
+    osImage: Ubuntu 22.04.3 LTS
+  aise-gaudi-01:
+    architecture: amd64
+    containerRuntimeVersion: containerd://1.7.18
+    cpu: '160'
+    habana.ai/gaudi: '8'
+    kernelVersion: 5.15.0-92-generic
+    kubeProxyVersion: v1.29.7
+    kubeletVersion: v1.29.7
+    memory: 1056375256Ki
+    operatingSystem: linux
+    osImage: Ubuntu 22.04.3 LTS
+  aise-gaudi-02:
+    architecture: amd64
+    containerRuntimeVersion: containerd://1.7.18
+    cpu: '160'
+    habana.ai/gaudi: '8'
+    kernelVersion: 5.15.0-92-generic
+    kubeProxyVersion: v1.29.7
+    kubeletVersion: v1.29.7
+    memory: 1056375260Ki
+    operatingSystem: linux
+    osImage: Ubuntu 22.04.3 LTS
+  aise-gaudi-03:
+    architecture: amd64
+    containerRuntimeVersion: containerd://1.6.8
+    cpu: '160'
+    habana.ai/gaudi: '8'
+    kernelVersion: 5.15.0-112-generic
+    kubeProxyVersion: v1.29.7
+    kubeletVersion: v1.29.7
+    memory: 1056374404Ki
+    operatingSystem: linux
+    osImage: Ubuntu 22.04.4 LTS
+workloadspec:
+  aise-gaudi-00:
+    chatqna-backend-server-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '8'
+          memory: 4000Mi
+        requests:
+          cpu: '8'
+          memory: 4000Mi
+    embedding-dependency-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '80'
+          memory: 20000Mi
+        requests:
+          cpu: '80'
+          memory: 20000Mi
+    embedding-deploy:
+      replica: 1
+    llm-dependency-deploy:
+      replica: 8
+      resources:
+        limits:
+          habana.ai/gaudi: '1'
+        requests:
+          habana.ai/gaudi: '1'
+    llm-deploy:
+      replica: 1
+    retriever-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '8'
+          memory: 2500Mi
+        requests:
+          cpu: '8'
+          memory: 2500Mi
+  aise-gaudi-01:
+    chatqna-backend-server-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '8'
+          memory: 4000Mi
+        requests:
+          cpu: '8'
+          memory: 4000Mi
+    embedding-dependency-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '80'
+          memory: 20000Mi
+        requests:
+          cpu: '80'
+          memory: 20000Mi
+    embedding-deploy:
+      replica: 1
+    llm-dependency-deploy:
+      replica: 8
+      resources:
+        limits:
+          habana.ai/gaudi: '1'
+        requests:
+          habana.ai/gaudi: '1'
+    llm-deploy:
+      replica: 1
+    prometheus-operator:
+      replica: 1
+      resources:
+        limits:
+          cpu: 200m
+          memory: 200Mi
+        requests:
+          cpu: 100m
+          memory: 100Mi
+    retriever-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '8'
+          memory: 2500Mi
+        requests:
+          cpu: '8'
+          memory: 2500Mi
+  aise-gaudi-02:
+    chatqna-backend-server-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '8'
+          memory: 4000Mi
+        requests:
+          cpu: '8'
+          memory: 4000Mi
+    embedding-dependency-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '80'
+          memory: 20000Mi
+        requests:
+          cpu: '80'
+          memory: 20000Mi
+    embedding-deploy:
+      replica: 1
+    llm-dependency-deploy:
+      replica: 8
+      resources:
+        limits:
+          habana.ai/gaudi: '1'
+        requests:
+          habana.ai/gaudi: '1'
+    llm-deploy:
+      replica: 1
+    retriever-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '8'
+          memory: 2500Mi
+        requests:
+          cpu: '8'
+          memory: 2500Mi
+  aise-gaudi-03:
+    chatqna-backend-server-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '8'
+          memory: 4000Mi
+        requests:
+          cpu: '8'
+          memory: 4000Mi
+    dataprep-deploy:
+      replica: 1
+    embedding-dependency-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '80'
+          memory: 20000Mi
+        requests:
+          cpu: '80'
+          memory: 20000Mi
+    embedding-deploy:
+      replica: 1
+    llm-dependency-deploy:
+      replica: 8
+      resources:
+        limits:
+          habana.ai/gaudi: '1'
+        requests:
+          habana.ai/gaudi: '1'
+    llm-deploy:
+      replica: 1
+    retriever-deploy:
+      replica: 1
+      resources:
+        limits:
+          cpu: '8'
+          memory: 2500Mi
+        requests:
+          cpu: '8'
+          memory: 2500Mi
+    vector-db:
+      replica: 1
+```
--- a/ChatQnA/benchmark/benchmark.yaml
+++ b/ChatQnA/benchmark/benchmark.yaml
@@ -0,0 +1,55 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+test_suite_config: # Overall configuration settings for the test suite
+  examples: ["chatqna"]  # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
+  concurrent_level: 5  # The concurrency level, adjustable based on requirements
+  user_queries: ${USER_QUERIES}  # Number of test requests at each concurrency level
+  random_prompt: false  # Use random prompts if true, fixed prompts if false
+  run_time: 60m  # The max total run time for the test suite
+  collect_service_metric: false  # Collect service metrics if true, do not collect service metrics if false
+  data_visualization: false # Generate data visualization if true, do not generate data visualization if false
+  llm_model: "Intel/neural-chat-7b-v3-3"  # The LLM model used for the test
+  test_output_dir: "${TEST_OUTPUT_DIR}"  # The directory to store the test output
+
+test_cases:
+  chatqna:
+    embedding:
+      run_test: false
+      service_name: "embedding-svc"  # Replace with your service name
+    embedserve:
+      run_test: false
+      service_name: "embedding-dependency-svc"  # Replace with your service name
+    retriever:
+      run_test: false
+      service_name: "retriever-svc"  # Replace with your service name
+      parameters:
+        search_type: "similarity"
+        k: 4
+        fetch_k: 20
+        lambda_mult: 0.5
+        score_threshold: 0.2
+    reranking:
+      run_test: false
+      service_name: "reranking-svc"  # Replace with your service name
+      parameters:
+        top_n: 1
+    rerankserve:
+      run_test: false
+      service_name: "reranking-dependency-svc"  # Replace with your service name
+    llm:
+      run_test: false
+      service_name: "llm-svc"  # Replace with your service name
+      parameters:
+        max_new_tokens: 128
+        temperature: 0.01
+        top_k: 10
+        top_p: 0.95
+        repetition_penalty: 1.03
+        streaming: true
+    llmserve:
+      run_test: false
+      service_name: "llm-dependency-svc"  # Replace with your service name
+    e2e:
+      run_test: true
+      service_name: "chatqna-backend-server-svc"  # Replace with your service name
--- a/ChatQnA/docker/Dockerfile
+++ b/ChatQnA/docker/Dockerfile
@@ -8,7 +8,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/ChatQnA/docker/Dockerfile_guardrails
+++ b/ChatQnA/docker/Dockerfile_guardrails
@@ -8,7 +8,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/ChatQnA/docker/Dockerfile_without_rerank
+++ b/ChatQnA/docker/Dockerfile_without_rerank
@@ -8,7 +8,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/ChatQnA/docker/aipc/README.md
+++ b/ChatQnA/docker/aipc/README.md
@@ -164,7 +164,7 @@ docker compose up -d

 # let ollama service runs
 # e.g. ollama run llama3
-ollama run $OLLAMA_MODEL
+OLLAMA_HOST=${host_ip}:11434 ollama run $OLLAMA_MODEL
 # for windows
 # ollama run %OLLAMA_MODEL%
 ```
--- a/ChatQnA/docker/ui/svelte/.env
+++ b/ChatQnA/docker/ui/svelte/.env
@@ -1,7 +1,7 @@
-CHAT_BASE_URL = 'http://backend_address:8888/v1/chatqna'
+CHAT_BASE_URL = '/v1/chatqna'

-UPLOAD_FILE_BASE_URL = 'http://backend_address:6007/v1/dataprep'
+UPLOAD_FILE_BASE_URL = '/v1/dataprep'

-GET_FILE = 'http://backend_address:6007/v1/dataprep/get_file'
+GET_FILE = '/v1/dataprep/get_file'

-DELETE_FILE = 'http://backend_address:6007/v1/dataprep/delete_file'
+DELETE_FILE = '/v1/dataprep/delete_file'
--- a/ChatQnA/docker/xeon/compose.yaml
+++ b/ChatQnA/docker/xeon/compose.yaml
@@ -103,7 +103,7 @@ services:
      HF_HUB_ENABLE_HF_TRANSFER: 0
    restart: unless-stopped
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-service
    ports:
      - "9009:80"
--- a/ChatQnA/docker/xeon/compose_qdrant.yaml
+++ b/ChatQnA/docker/xeon/compose_qdrant.yaml
@@ -102,7 +102,7 @@ services:
      HF_HUB_ENABLE_HF_TRANSFER: 0
    restart: unless-stopped
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-service
    ports:
      - "6042:80"
--- a/ChatQnA/docker/xeon/compose_vllm.yaml
+++ b/ChatQnA/docker/xeon/compose_vllm.yaml
@@ -72,6 +72,7 @@ services:
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-retriever-service"
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  tei-reranking-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
--- a/ChatQnA/docker/xeon/compose_without_rerank.yaml
+++ b/ChatQnA/docker/xeon/compose_without_rerank.yaml
@@ -70,7 +70,7 @@ services:
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-service
    ports:
      - "9009:80"
--- a/ChatQnA/kubernetes/README.md
+++ b/ChatQnA/kubernetes/README.md
@@ -20,7 +20,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
 - retriever: opea/retriever-redis:latest
 - tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
 - reranking: opea/reranking-tei:latest
- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
 - llm: opea/llm-tgi:latest
 - chaqna-xeon-backend-server: opea/chatqna:latest

@@ -67,7 +67,7 @@ This involves deploying the ChatQnA custom resource. You can use chatQnA_xeon.ya
   ```sh
   export CLIENT_POD=$(kubectl get pod -n chatqa -l app=client-test -o jsonpath={.items..metadata.name})
   export accessUrl=$(kubectl get gmc -n chatqa -o jsonpath="{.items[?(@.metadata.name=='chatqa')].status.accessUrl}")
-   kubectl exec "$CLIENT_POD" -n chatqa -- curl $accessUrl  -X POST  -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
+   kubectl exec "$CLIENT_POD" -n chatqa -- curl -s --no-buffer $accessUrl  -X POST  -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
   ```

 6. Perhaps you want to try another LLM model? Just modify the application custom resource to use another LLM model
@@ -98,7 +98,7 @@ This involves deploying the ChatQnA custom resource. You can use chatQnA_xeon.ya
 9. Access the updated pipeline using the same URL from above using the client pod

   ```sh
-   kubectl exec "$CLIENT_POD" -n chatqa -- curl $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
+   kubectl exec "$CLIENT_POD" -n chatqa -- curl -s --no-buffer $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
   ```

 > [NOTE]
--- a/ChatQnA/kubernetes/manifests/xeon/chatqna.yaml
+++ b/ChatQnA/kubernetes/manifests/xeon/chatqna.yaml
@@ -1122,7 +1122,7 @@ spec:
                name: chatqna-tgi-config
          securityContext:
            {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/CodeGen/docker/Dockerfile
+++ b/CodeGen/docker/Dockerfile
@@ -10,7 +10,6 @@ ENV LANG=C.UTF-8
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/CodeGen/docker/xeon/compose.yaml
+++ b/CodeGen/docker/xeon/compose.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-service
    ports:
      - "8028:80"
--- a/CodeGen/kubernetes/README.md
+++ b/CodeGen/kubernetes/README.md
@@ -36,5 +36,5 @@ In the below example we illustrate on Xeon.
   ```bash
   export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
   export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}")
-   kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query": "def print_hello_world():"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
+   kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl -s --no-buffer $accessUrl -X POST -d '{"query": "def print_hello_world():"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
   ```
--- a/CodeGen/kubernetes/manifests/README.md
+++ b/CodeGen/kubernetes/manifests/README.md
@@ -6,7 +6,8 @@

 > You can also customize the "MODEL_ID" if needed.

-> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the CodeGEn workload is running. Otherwise, you need to modify the `codegen.yaml` file to change the `model-volume` to a directory that exists on the node.
+> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the CodeGen workload is running. Otherwise, you need to modify the `codegen.yaml` file to change the `model-volume` to a directory that exists on the node.
+> Alternatively, you can change the `codegen.yaml` to use a different type of volume, such as a persistent volume claim.

 ## Deploy On Xeon

@@ -30,10 +31,13 @@ kubectl apply -f codegen.yaml

 To verify the installation, run the command `kubectl get pod` to make sure all pods are running.

-Then run the command `kubectl port-forward svc/codegen 7778:7778` to expose the CodeGEn service for access.
+Then run the command `kubectl port-forward svc/codegen 7778:7778` to expose the CodeGen service for access.

 Open another terminal and run the following command to verify the service if working:

+> Note that it may take a couple of minutes for the service to be ready. If the `curl` command below fails, you
+> can check the logs of the codegen-tgi pod to see its status or check for errors.
+
 ```
 kubectl get pods
 curl http://localhost:7778/v1/codegen -H "Content-Type: application/json" -d '{
--- a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
+++ b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
@@ -271,6 +271,8 @@ spec:
          resources:
            limits:
              habana.ai/gaudi: 1
+              memory: 64Gi
+              hugepages-2Mi: 500Mi
      volumes:
        - name: model-volume
          hostPath:
--- a/CodeGen/kubernetes/manifests/xeon/codegen.yaml
+++ b/CodeGen/kubernetes/manifests/xeon/codegen.yaml
@@ -239,7 +239,7 @@ spec:
                name: codegen-tgi-config
          securityContext:
            {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/CodeGen/kubernetes/manifests/xeon/ui/react-codegen.yaml
+++ b/CodeGen/kubernetes/manifests/xeon/ui/react-codegen.yaml
@@ -126,7 +126,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/CodeTrans/docker/Dockerfile
+++ b/CodeTrans/docker/Dockerfile
@@ -8,7 +8,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/CodeTrans/docker/xeon/compose.yaml
+++ b/CodeTrans/docker/xeon/compose.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: codetrans-tgi-service
    ports:
      - "8008:80"
--- a/CodeTrans/kubernetes/manifests/xeon/codetrans.yaml
+++ b/CodeTrans/kubernetes/manifests/xeon/codetrans.yaml
@@ -239,7 +239,7 @@ spec:
                name: codetrans-tgi-config
          securityContext:
            {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/DocIndexRetriever/docker/Dockerfile
+++ b/DocIndexRetriever/docker/Dockerfile
@@ -8,7 +8,6 @@ COPY GenAIComps /home/user/GenAIComps
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
@@ -27,4 +26,4 @@ USER user

 WORKDIR /home/user

-ENTRYPOINT ["python", "retrieval_tool.py"]
+ENTRYPOINT ["python", "retrieval_tool.py"]
--- a/DocIndexRetriever/docker/README.md
+++ b/DocIndexRetriever/docker/README.md
@@ -1,36 +1,36 @@
-# DocRetriever Application
+# DocRetriever Application with Docker

 DocRetriever are the most widely adopted use case for leveraging the different methodologies to match user query against a set of free-text records. DocRetriever is essential to RAG system, which bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. The core of this architecture are vector databases, which are instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity.

-### 1. Build Images for necessary microservices. (This step will not needed after docker image released)
+## 1. Build Images for necessary microservices. (This step will not needed after docker image released)

 - Embedding TEI Image

-```bash
-git clone https://github.com/opea-project/GenAIComps.git
-cd GenAIComps
-docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile .
-```
+  ```bash
+  git clone https://github.com/opea-project/GenAIComps.git
+  cd GenAIComps
+  docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile .
+  ```

 - Retriever Vector store Image

-```bash
-docker build -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/redis/docker/Dockerfile .
-```
+  ```bash
+  docker build -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/redis/docker/Dockerfile .
+  ```

 - Rerank TEI Image

-```bash
-docker build -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/docker/Dockerfile .
-```
+  ```bash
+  docker build -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/docker/Dockerfile .
+  ```

 - Dataprep Image

-```bash
-docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/docker/Dockerfile .
-```
+  ```bash
+  docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/docker/Dockerfile .
+  ```

-### 2. Build Images for MegaService
+## 2. Build Images for MegaService

 ```bash
 cd ..
@@ -38,7 +38,7 @@ git clone https://github.com/opea-project/GenAIExamples.git
 docker build --no-cache -t opea/doc-index-retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f GenAIExamples/DocIndexRetriever/docker/Dockerfile .
 ```

-### 3. Start all the services Docker Containers
+## 3. Start all the services Docker Containers

 ```bash
 export host_ip="YOUR IP ADDR"
@@ -62,7 +62,7 @@ cd GenAIExamples/DocIndexRetriever/docker/${llm_hardware}/
 docker compose -f docker-compose.yaml up -d
 ```

-### 3. Validation
+## 3. Validation

 Add Knowledge Base via HTTP Links:

@@ -86,41 +86,41 @@ curl http://${host_ip}:8889/v1/retrievaltool -X POST -H "Content-Type: applicati
 {"id":"354e62c703caac8c547b3061433ec5e8","reranked_docs":[{"id":"06d5a5cefc06cf9a9e0b5fa74a9f233c","text":"Close SearchsearchMenu WikiNewsCommunity Daysx-twitter linkedin github searchStreamlining implementation of enterprise-grade Generative AIEfficiently integrate secure, performant, and cost-effective Generative AI workflows into business value.TODAYOPEA..."}],"initial_query":"Explain the OPEA project?"}
 ```

-### 4. Trouble shooting
+## 4. Trouble shooting

 1. check all containers are alive

-```bash
-# redis vector store
-docker container logs redis-vector-db
-# dataprep to redis microservice, input document files
-docker container logs dataprep-redis-server
+   ```bash
+   # redis vector store
+   docker container logs redis-vector-db
+   # dataprep to redis microservice, input document files
+   docker container logs dataprep-redis-server

-# embedding microservice
-curl http://${host_ip}:6000/v1/embeddings \
-  -X POST \
-  -d '{"text":"Explain the OPEA project"}' \
-  -H 'Content-Type: application/json' > query
-docker container logs embedding-tei-server
+   # embedding microservice
+   curl http://${host_ip}:6000/v1/embeddings \
+     -X POST \
+     -d '{"text":"Explain the OPEA project"}' \
+     -H 'Content-Type: application/json' > query
+   docker container logs embedding-tei-server

-# if you used tei-gaudi
-docker container logs tei-embedding-gaudi-server
+   # if you used tei-gaudi
+   docker container logs tei-embedding-gaudi-server

-# retriever microservice, input embedding output docs
-curl http://${host_ip}:7000/v1/retrieval \
-  -X POST \
-  -d @query \
-  -H 'Content-Type: application/json' > rerank_query
-docker container logs retriever-redis-server
+   # retriever microservice, input embedding output docs
+   curl http://${host_ip}:7000/v1/retrieval \
+     -X POST \
+     -d @query \
+     -H 'Content-Type: application/json' > rerank_query
+   docker container logs retriever-redis-server


-# reranking microservice
-curl http://${host_ip}:8000/v1/reranking \
-  -X POST \
-  -d @rerank_query \
-  -H 'Content-Type: application/json' > output
-docker container logs reranking-tei-server
+   # reranking microservice
+   curl http://${host_ip}:8000/v1/reranking \
+     -X POST \
+     -d @rerank_query \
+     -H 'Content-Type: application/json' > output
+   docker container logs reranking-tei-server

-# megaservice gateway
-docker container logs doc-index-retriever-server
-```
+   # megaservice gateway
+   docker container logs doc-index-retriever-server
+   ```
--- a/DocSum/docker/Dockerfile
+++ b/DocSum/docker/Dockerfile
@@ -8,7 +8,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/DocSum/docker/gaudi/README.md
+++ b/DocSum/docker/gaudi/README.md
@@ -73,7 +73,7 @@ export no_proxy=${your_no_proxy}
 export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
+export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
--- a/DocSum/docker/set_env.sh
+++ b/DocSum/docker/set_env.sh
@@ -5,7 +5,7 @@


 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
+export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
--- a/DocSum/docker/xeon/README.md
+++ b/DocSum/docker/xeon/README.md
@@ -60,6 +60,8 @@ Build the frontend Docker image via below command:
 cd GenAIExamples/DocSum/docker/ui/
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
 docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react .
+
+docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy  -f ./docker/Dockerfile.react .
 ```

 Then run the command `docker images`, you will have the following Docker Images:
@@ -80,7 +82,7 @@ export no_proxy=${your_no_proxy}
 export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
+export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
--- a/DocSum/docker/xeon/compose.yaml
+++ b/DocSum/docker/xeon/compose.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-service
    ports:
      - "8008:80"
--- a/DocSum/kubernetes/README.md
+++ b/DocSum/kubernetes/README.md
@@ -8,7 +8,7 @@ Install GMC in your Kubernetes cluster, if you have not already done so, by foll
 The DocSum application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not it starts them and then proceeds to connect them. When the DocSum RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular embedding, retriever, rerank, and llm.

 The DocSum pipeline uses  prebuilt images. The Xeon version uses the prebuilt image llm-docsum-tgi:latest which internally leverages the
-the image ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
+the image ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
 service tgi-gaudi-svc, which uses the image ghcr.io/huggingface/tgi-gaudi:1.2.1. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use Intel/neural-chat-7b-v3-3.

 [NOTE]
@@ -60,7 +60,7 @@ This involves deploying the application pipeline custom resource. You can use do
   ```bash
   export CLIENT_POD=$(kubectl get pod -n ${ns} -l app=client-test -o jsonpath={.items..metadata.name})
   export accessUrl=$(kubectl get gmc -n $ns -o jsonpath="{.items[?(@.metadata.name=='docsum')].status.accessUrl}")
-   kubectl exec "$CLIENT_POD" -n $ns -- curl $accessUrl -X POST -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'  -H 'Content-Type: application/json'
+   kubectl exec "$CLIENT_POD" -n $ns -- curl -s --no-buffer $accessUrl -X POST -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'  -H 'Content-Type: application/json'
   ```

 7. Clean up. Use standard Kubernetes custom resource remove commands. Confirm cleaned by retrieving pods in application namespace.
--- a/DocSum/kubernetes/manifests/xeon/docsum.yaml
+++ b/DocSum/kubernetes/manifests/xeon/docsum.yaml
@@ -239,7 +239,7 @@ spec:
                name: docsum-tgi-config
          securityContext:
            {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/DocSum/kubernetes/manifests/xeon/ui/react-docsum.yaml
+++ b/DocSum/kubernetes/manifests/xeon/ui/react-docsum.yaml
@@ -126,7 +126,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/FaqGen/docker/Dockerfile
+++ b/FaqGen/docker/Dockerfile
@@ -9,7 +9,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/FaqGen/docker/xeon/compose.yaml
+++ b/FaqGen/docker/xeon/compose.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-xeon-server
    ports:
      - "8008:80"
--- a/FaqGen/kubernetes/manifests/xeon/ui/react-faqgen.yaml
+++ b/FaqGen/kubernetes/manifests/xeon/ui/react-faqgen.yaml
@@ -126,7 +126,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
--- a/ProductivitySuite/README.md
+++ b/ProductivitySuite/README.md
@@ -2,7 +2,7 @@

 OPEA Productivity Suite, is a powerful tool designed to streamline your workflow and boost productivity. This application leverages the cutting-edge OPEA microservices to provide a comprehensive suite of features that cater to the diverse needs of modern enterprises.

-### Key Features
+## Key Features

 - Chat with Documents: Engage in intelligent conversations with your documents using our advanced RAG Capabilities. Our Retrieval-Augmented Generation (RAG) model allows you to ask questions, receive relevant information, and gain insights from your documents in real-time.

--- a/ProductivitySuite/docker/ui/react/README.md
+++ b/ProductivitySuite/docker/ui/react/README.md
@@ -1,66 +1,72 @@
-<h1 align="center" id="title"> Productivity Suite React UI</h1>
+# Productivity Suite React UI

-### 📸 Project Screenshots
+## 📸 Project Screenshots

 ![project-screenshot](../../../assets/img/chat_qna_init.png)
 ![project-screenshot](../../../assets/img/Login_page.png)

-<h2>🧐 Features</h2>
+## 🧐 Features

 Here're some of the project's features:

-#### CHAT QNA
+### CHAT QNA

 - Start a Text Chat：Initiate a text chat with the ability to input written conversations, where the dialogue content can also be customized based on uploaded files.
 - Context Awareness: The AI assistant maintains the context of the conversation, understanding references to previous statements or questions. This allows for more natural and coherent exchanges.

-  ##### DATA SOURCE
+#### DATA SOURCE

-  - The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
-  - Uploaded File would get listed and user would be able add or remove file/links
+- The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
+- Uploaded File would get listed and user would be able add or remove file/links

-  ###### Screen Shot
+##### Screen Shot

-  ![project-screenshot](../../../assets/img/data_source.png)
+![project-screenshot](../../../assets/img/data_source.png)

 - Clear: Clear the record of the current dialog box without retaining the contents of the dialog box.
 - Chat history: Historical chat records can still be retained after refreshing, making it easier for users to view the context.
 - Conversational Chat : The application maintains a history of the conversation, allowing users to review previous messages and the AI to refer back to earlier points in the dialogue when necessary.
-  ###### Screen Shots
-  ![project-screenshot](../../../assets/img/chat_qna_init.png)
-  ![project-screenshot](../../../assets/img/chatqna_with_conversation.png)

-#### CODEGEN
+##### Screen Shots
+
+![project-screenshot](../../../assets/img/chat_qna_init.png)
+![project-screenshot](../../../assets/img/chatqna_with_conversation.png)
+
+### CODEGEN

 - Generate code: generate the corresponding code based on the current user's input.
  ###### Screen Shot
  ![project-screenshot](../../../assets/img/codegen.png)

-#### DOC SUMMARY
+### DOC SUMMARY

 - Summarizing Uploaded Files: Upload files from their local device, then click 'Generate Summary' to summarize the content of the uploaded file. The summary will be displayed on the 'Summary' box.
 - Summarizing Text via Pasting: Paste the text to be summarized into the text box, then click 'Generate Summary' to produce a condensed summary of the content, which will be displayed in the 'Summary' box on the right.
 - Scroll to Bottom: The summarized content will automatically scroll to the bottom.
-  ###### Screen Shot
-  ![project-screenshot](../../../assets/img/doc_summary_paste.png)
-  ![project-screenshot](../../../assets/img/doc_summary_file.png)

-#### FAQ Generator
+#### Screen Shot
+
+![project-screenshot](../../../assets/img/doc_summary_paste.png)
+![project-screenshot](../../../assets/img/doc_summary_file.png)
+
+### FAQ Generator

 - Generate FAQs from Text via Pasting: Paste the text to into the text box, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.

 - Generate FAQs from Text via txt file Upload: Upload the file in the Upload bar, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.
-  ###### Screen Shot
-  ![project-screenshot](../../../assets/img/faq_generator.png)

-<h2>🛠️ Get it Running:</h2>
+#### Screen Shot
+
+![project-screenshot](../../../assets/img/faq_generator.png)
+
+## 🛠️ Get it Running:

 1. Clone the repo.

 2. cd command to the current folder.

 3. create a .env file and add the following variables and values.
-   ```env
+   ```
   VITE_BACKEND_SERVICE_ENDPOINT_CHATQNA=''
   VITE_BACKEND_SERVICE_ENDPOINT_CODEGEN=''
   VITE_BACKEND_SERVICE_ENDPOINT_DOCSUM=''
--- a/ProductivitySuite/docker/xeon/README.md
+++ b/ProductivitySuite/docker/xeon/README.md
@@ -63,7 +63,7 @@ cd ..

 The Productivity Suite is composed of multiple GenAIExample reference solutions composed together.

-### 8.1 Build ChatQnA MegaService Docker Images
+#### 8.1 Build ChatQnA MegaService Docker Images

 ```bash
 git clone https://github.com/opea-project/GenAIExamples.git
@@ -72,7 +72,7 @@ docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_pr
 cd ../../..
 ```

-### 8.2 Build DocSum Megaservice Docker Images
+#### 8.2 Build DocSum Megaservice Docker Images

 ```bash
 cd GenAIExamples/DocSum/docker
@@ -80,7 +80,7 @@ docker build --no-cache -t opea/docsum:latest --build-arg https_proxy=$https_pro
 cd ../../..
 ```

-### 8.3 Build CodeGen Megaservice Docker Images
+#### 8.3 Build CodeGen Megaservice Docker Images

 ```bash
 cd GenAIExamples/CodeGen/docker
@@ -88,7 +88,7 @@ docker build --no-cache -t opea/codegen:latest --build-arg https_proxy=$https_pr
 cd ../../..
 ```

-### 8.4 Build FAQGen Megaservice Docker Images
+#### 8.4 Build FAQGen Megaservice Docker Images

 ```bash
 cd GenAIExamples/FaqGen/docker
@@ -206,84 +206,84 @@ Please refer to [keycloak_setup_guide](keycloak_setup_guide.md) for more detail

 1. TEI Embedding Service

-```bash
-curl ${host_ip}:6006/embed \
-    -X POST \
-    -d '{"inputs":"What is Deep Learning?"}' \
-    -H 'Content-Type: application/json'
-```
+   ```bash
+   curl ${host_ip}:6006/embed \
+       -X POST \
+       -d '{"inputs":"What is Deep Learning?"}' \
+       -H 'Content-Type: application/json'
+   ```

 2. Embedding Microservice

-```bash
-curl http://${host_ip}:6000/v1/embeddings\
-  -X POST \
-  -d '{"text":"hello"}' \
-  -H 'Content-Type: application/json'
-```
+   ```bash
+   curl http://${host_ip}:6000/v1/embeddings\
+     -X POST \
+     -d '{"text":"hello"}' \
+     -H 'Content-Type: application/json'
+   ```

 3. Retriever Microservice

-To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
-is determined by the embedding model.
-Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768.
+   To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
+   is determined by the embedding model.
+   Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768.

-Check the vector dimension of your embedding model, set `your_embedding` dimension equals to it.
+   Check the vector dimension of your embedding model, set `your_embedding` dimension equals to it.

-```bash
-export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
-curl http://${host_ip}:7000/v1/retrieval \
-  -X POST \
-  -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \
-  -H 'Content-Type: application/json'
-```
+   ```bash
+   export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
+   curl http://${host_ip}:7000/v1/retrieval \
+     -X POST \
+     -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \
+     -H 'Content-Type: application/json'
+   ```

 4. TEI Reranking Service

-```bash
-curl http://${host_ip}:8808/rerank \
-    -X POST \
-    -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
-    -H 'Content-Type: application/json'
-```
+   ```bash
+   curl http://${host_ip}:8808/rerank \
+       -X POST \
+       -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
+       -H 'Content-Type: application/json'
+   ```

 5. Reranking Microservice

-```bash
-curl http://${host_ip}:8000/v1/reranking\
-  -X POST \
-  -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-  -H 'Content-Type: application/json'
-```
+   ```bash
+   curl http://${host_ip}:8000/v1/reranking\
+     -X POST \
+     -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
+     -H 'Content-Type: application/json'
+   ```

 6. LLM backend Service (ChatQnA, DocSum, FAQGen)

-```bash
-curl http://${host_ip}:9009/generate \
-  -X POST \
-  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-  -H 'Content-Type: application/json'
-```
+   ```bash
+   curl http://${host_ip}:9009/generate \
+     -X POST \
+     -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+     -H 'Content-Type: application/json'
+   ```

-8. LLM backend Service (CodeGen)
+7. LLM backend Service (CodeGen)

-```bash
-curl http://${host_ip}:8028/generate \
-  -X POST \
-  -d '{"inputs":"def print_hello_world():","parameters":{"max_new_tokens":256, "do_sample": true}}' \
-  -H 'Content-Type: application/json'
-```
+   ```bash
+   curl http://${host_ip}:8028/generate \
+     -X POST \
+     -d '{"inputs":"def print_hello_world():","parameters":{"max_new_tokens":256, "do_sample": true}}' \
+     -H 'Content-Type: application/json'
+   ```

-9. ChatQnA LLM Microservice
+8. ChatQnA LLM Microservice

-```bash
-curl http://${host_ip}:9000/v1/chat/completions\
-  -X POST \
-  -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-  -H 'Content-Type: application/json'
-```
+   ```bash
+   curl http://${host_ip}:9000/v1/chat/completions\
+     -X POST \
+     -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+     -H 'Content-Type: application/json'
+   ```

-10. CodeGen LLM Microservice
+9. CodeGen LLM Microservice

 ```bash
 curl http://${host_ip}:9001/v1/chat/completions\
@@ -498,50 +498,56 @@ Here is an example of running Productivity Suite
 ![project-screenshot](../../assets/img/chat_qna_init.png)
 ![project-screenshot](../../assets/img/Login_page.png)

-<h2>🧐 Features</h2>
+## 🧐 Features

 Here're some of the project's features:

-#### CHAT QNA
+### CHAT QNA

 - Start a Text Chat：Initiate a text chat with the ability to input written conversations, where the dialogue content can also be customized based on uploaded files.
 - Context Awareness: The AI assistant maintains the context of the conversation, understanding references to previous statements or questions. This allows for more natural and coherent exchanges.

-  ##### DATA SOURCE
+### DATA SOURCE

-  - The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
-  - Uploaded File would get listed and user would be able add or remove file/links
+- The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
+- Uploaded File would get listed and user would be able add or remove file/links

-  ###### Screen Shot
+#### Screen Shot

-  ![project-screenshot](../../assets/img/data_source.png)
+![project-screenshot](../../assets/img/data_source.png)

 - Clear: Clear the record of the current dialog box without retaining the contents of the dialog box.
 - Chat history: Historical chat records can still be retained after refreshing, making it easier for users to view the context.
 - Conversational Chat : The application maintains a history of the conversation, allowing users to review previous messages and the AI to refer back to earlier points in the dialogue when necessary.
-  ###### Screen Shots
-  ![project-screenshot](../../assets/img/chat_qna_init.png)
-  ![project-screenshot](../../assets/img/chatqna_with_conversation.png)

-#### CODEGEN
+#### Screen Shots
+
+![project-screenshot](../../assets/img/chat_qna_init.png)
+![project-screenshot](../../assets/img/chatqna_with_conversation.png)
+
+### CODEGEN

 - Generate code: generate the corresponding code based on the current user's input.
  ###### Screen Shot
  ![project-screenshot](../../assets/img/codegen.png)

-#### DOC SUMMARY
+### DOC SUMMARY

 - Summarizing Uploaded Files: Upload files from their local device, then click 'Generate Summary' to summarize the content of the uploaded file. The summary will be displayed on the 'Summary' box.
 - Summarizing Text via Pasting: Paste the text to be summarized into the text box, then click 'Generate Summary' to produce a condensed summary of the content, which will be displayed in the 'Summary' box on the right.
 - Scroll to Bottom: The summarized content will automatically scroll to the bottom.
-  ###### Screen Shot
-  ![project-screenshot](../../assets/img/doc_summary_paste.png)
-  ![project-screenshot](../../assets/img/doc_summary_file.png)

-#### FAQ Generator
+#### Screen Shot
+
+![project-screenshot](../../assets/img/doc_summary_paste.png)
+![project-screenshot](../../assets/img/doc_summary_file.png)
+
+### FAQ Generator

 - Generate FAQs from Text via Pasting: Paste the text to into the text box, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.

 - Generate FAQs from Text via txt file Upload: Upload the file in the Upload bar, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.
-  ###### Screen Shot
-  ![project-screenshot](../../assets/img/faq_generator.png)
+
+#### Screen Shot
+
+![project-screenshot](../../assets/img/faq_generator.png)
--- a/ProductivitySuite/kubernetes/manifests/README.md
+++ b/ProductivitySuite/kubernetes/manifests/README.md
@@ -22,24 +22,26 @@ To begin with, ensure that you have following prerequisites in place:
 1. Kubernetes installation: Make sure that you have Kubernetes installed.
 2. Images: Make sure you have all the images ready for the examples and components stated above. You may refer to [README](../../docker/xeon/README.md) for steps to build the images.
 3. Configuration Values: Set the following values in all the yaml files before proceeding with the deployment:
-   #### a. HUGGINGFACEHUB_API_TOKEN (Your HuggingFace token to download your desired model from HuggingFace):
-   ```
-   # You may set the HUGGINGFACEHUB_API_TOKEN via method:
-   export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
-   cd GenAIExamples/ProductivitySuite/kubernetes/manifests/xeon/
-   sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" *.yaml
-   ```

-   #### b. Set the proxies based on your network configuration
-   ```
-   # Look for http_proxy, https_proxy and no_proxy key and fill up the values for all the yaml files with your system proxy configuration.
-   ```
+   a. HUGGINGFACEHUB_API_TOKEN (Your HuggingFace token to download your desired model from HuggingFace):
+      ```
+      # You may set the HUGGINGFACEHUB_API_TOKEN via method:
+      export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
+      cd GenAIExamples/ProductivitySuite/kubernetes/manifests/xeon/
+      sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" *.yaml
+      ```
+
+   b. Set the proxies based on your network configuration
+      ```
+      # Look for http_proxy, https_proxy and no_proxy key and fill up the values for all the yaml files with your system proxy configuration.
+      ```
+
+   c. Set all the backend service endpoint for REACT UI service
+      ```
+      # Setup all the backend service endpoint in productivity_suite_reactui.yaml for UI to consume with.
+      # Look for ENDPOINT in the yaml and insert all the url endpoint for all the required backend service.
+      ```

-   #### c. Set all the backend service endpoint for REACT UI service
-   ```
-   # Setup all the backend service endpoint in productivity_suite_reactui.yaml for UI to consume with.
-   # Look for ENDPOINT in the yaml and insert all the url endpoint for all the required backend service.
-   ```
 4. MODEL_ID and model-volume (OPTIONAL): You may as well customize the "MODEL_ID" to use different model and model-volume for the volume to be mounted.
 5. After finish with steps above, you can proceed with the deployment of the yaml file.

--- a/SearchQnA/docker/Dockerfile
+++ b/SearchQnA/docker/Dockerfile
@@ -8,7 +8,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/SearchQnA/docker/xeon/compose.yaml
+++ b/SearchQnA/docker/xeon/compose.yaml
@@ -73,7 +73,7 @@ services:
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-service
    ports:
      - "3006:80"
--- a/SearchQnA/kubernetes/README.md
+++ b/SearchQnA/kubernetes/README.md
@@ -38,5 +38,5 @@ In the below example we illustrate on Xeon.
   ```bash
   export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
   export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='searchqa')].status.accessUrl}")
-   kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"text":"What is the latest news? Give me also the source link."}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_searchqa.log
+   kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl -s --no-buffer $accessUrl -X POST -d '{"text":"What is the latest news? Give me also the source link."}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_searchqa.log
   ```
--- a/Translation/docker/Dockerfile
+++ b/Translation/docker/Dockerfile
@@ -18,7 +18,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/Translation/docker/xeon/compose.yaml
+++ b/Translation/docker/xeon/compose.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-service
    ports:
      - "8008:80"
--- a/Translation/kubernetes/README.md
+++ b/Translation/kubernetes/README.md
@@ -36,5 +36,5 @@ In the below example we illustrate on Xeon.
   ```bash
   export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
   export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='translation')].status.accessUrl}")
-   kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_translation.log
+   kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl -s --no-buffer $accessUrl -X POST -d '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_translation.log
   ```
--- a/VisualQnA/docker/Dockerfile
+++ b/VisualQnA/docker/Dockerfile
@@ -8,7 +8,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/VisualQnA/docker/xeon/README.md
+++ b/VisualQnA/docker/xeon/README.md
@@ -71,12 +71,12 @@ cd ../../../..
 ### 4. Pull TGI Xeon Image

 ```bash
-docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+docker pull ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
 ```

 Then run the command `docker images`, you will have the following 4 Docker Images:

-1. `ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu`
+1. `ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu`
 2. `opea/lvm-tgi:latest`
 3. `opea/visualqna:latest`
 4. `opea/visualqna-ui:latest`
--- a/VisualQnA/docker/xeon/compose.yaml
+++ b/VisualQnA/docker/xeon/compose.yaml
@@ -3,10 +3,10 @@

 services:
  llava-tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-llava-xeon-server
    ports:
-      - "9399:80"
+      - "8399:80"
    volumes:
      - "./data:/data"
    shm_size: 1g
--- a/VisualQnA/kubernetes/README.md
+++ b/VisualQnA/kubernetes/README.md
@@ -11,9 +11,8 @@ In the below example we illustrate on Xeon.

 1. Create the desired namespace if it does not already exist and deploy the application
   ```bash
-   export APP_NAMESPACE=CT
+   export APP_NAMESPACE=visualqna
   kubectl create ns $APP_NAMESPACE
-   sed -i "s|namespace: visualqna|namespace: $APP_NAMESPACE|g"  ./visualqna_xeon.yaml
   kubectl apply -f ./visualqna_xeon.yaml
   ```
Author	SHA1	Message	Date
NeuralChatBot	6a705ad3d4	Update third party images tag Signed-off-by: NeuralChatBot <grp_neural_chat_bot@intel.com>	2024-08-29 02:45:32 +00:00
WenjiaoYue	32afb6501c	update env (#678 ) Signed-off-by: Yue, Wenjiao <wenjiao.yue@intel.com>	2024-08-29 10:29:35 +08:00
Sun, Xuehao	035f39f0d9	weekly update images tag (#667 ) Signed-off-by: Sun, Xuehao <xuehao.sun@intel.com>	2024-08-29 10:27:06 +08:00
Abolfazl Shahbazi	6f3e54a22a	Always upload scan artifacts (#680 ) Signed-off-by: Abolfazl Shahbazi <abolfazl.shahbazi@intel.com>	2024-08-29 09:35:47 +08:00
Abolfazl Shahbazi	1874dfd148	Remove 'vim' from all Dockerfiles (#663 ) Signed-off-by: Abolfazl Shahbazi <abolfazl.shahbazi@intel.com> Co-authored-by: lvliang-intel <liang1.lv@intel.com>	2024-08-28 08:30:49 -07:00
David Kinder	7a0fca73e6	doc: fix headings (#656 ) * doc: fix headings * Fix incorrect uses of heading levels * fix indenting within lists Signed-off-by: David B. Kinder <david.b.kinder@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: David B. Kinder <david.b.kinder@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>	2024-08-28 20:45:18 +08:00
Ying Hu	beda609b4b	Yinghu5 patch 1 (#676 ) * Update set_env.sh to fix #670 * Update README.md fixed your_ip and host_ip * Update README.md fix your_ip and host_ip	2024-08-28 20:42:44 +08:00
jotpalch	993688ac91	fix: Resolve port conflict in llava-tgi-service (#665 ) - Changed the external port of llava-tgi-service from 9399 to 8399 - This resolves the port conflict with the lvm-tgi service - Internal port mapping remains unchanged (80)	2024-08-28 17:07:15 +08:00
chen, suyue	5fde666c43	enhance image publish action (#659 ) Signed-off-by: chensuyue <suyue.chen@intel.com>	2024-08-28 09:24:57 +08:00
Steve Zhang	4133757642	Change docs of kubernetes for curl commands in README (#661 ) * change docs for curl commands in README. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * The Namespace 'CT' is invalid. Signed-off-by: zhlsunshine <huailong.zhang@intel.com>	2024-08-27 19:36:37 +08:00
lvliang-intel	10c81f1c57	Update ollama run command (#668 ) Signed-off-by: lvliang-intel <liang1.lv@intel.com>	2024-08-27 14:54:53 +08:00
xiguiw	dad8eb4b82	[Doc] Update ChatQnA flow chart (#542 ) * Update flow chart Signed-off-by: Wang, Xigui <xigui.wang@intel.com> * Updated Flowchart Signed-off-by: srinarayan-srikanthan <srinarayan.srikanthan@intel.com> --------- Signed-off-by: Wang, Xigui <xigui.wang@intel.com> Signed-off-by: srinarayan-srikanthan <srinarayan.srikanthan@intel.com> Co-authored-by: Louie Tsai <louie.tsai@intel.com>	2024-08-26 12:20:21 -07:00
lvliang-intel	af21e94a29	Add benchmark README for ChatQnA (#662 ) * Add benchmark README for ChatQnA Signed-off-by: lvliang-intel <liang1.lv@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add benchmark.yaml Signed-off-by: lvliang-intel <liang1.lv@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update yaml path Signed-off-by: lvliang-intel <liang1.lv@intel.com> * fix preci issue Signed-off-by: lvliang-intel <liang1.lv@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update title Signed-off-by: lvliang-intel <liang1.lv@intel.com> --------- Signed-off-by: lvliang-intel <liang1.lv@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>	2024-08-26 22:39:57 +08:00
chen, suyue	f78aa9ee2f	add env for chatqna vllm (#655 ) Signed-off-by: chensuyue <suyue.chen@intel.com>	2024-08-23 22:10:10 +08:00
Dina Suehiro Jones	c25063f4bb	Minor fixes for CodeGen Xeon and Gaudi Kubernetes codegen.yaml and doc updates (#613 ) * Minor fixes for CodeGen Xeon and Gaudi Kubernetes codegen.yaml and doc updates Signed-off-by: dmsuehir <dina.s.jones@intel.com>	2024-08-23 16:04:57 +08:00