Update third party images tag

Signed-off-by: NeuralChatBot <grp_neural_chat_bot@intel.com>
update env (#678 )
2024-08-29 02:45:32 +00:00 · 2024-08-29 10:29:35 +08:00 · 2024-08-29 10:27:06 +08:00 · 2024-08-29 09:35:47 +08:00 · 2024-08-28 08:30:49 -07:00 · 2024-08-28 20:45:18 +08:00
117 changed files with 485 additions and 454 deletions
--- a/.github/workflows/manual-bom-scan.yml
+++ b/.github/workflows/manual-bom-scan.yml
@@ -59,6 +59,7 @@ jobs:

      - name: SBOM Scan Container
        uses: anchore/sbom-action@v0.17.1
+        if: always()
        with:
          image: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }}
          output-file: ${{ matrix.image }}-sbom-scan.txt
@@ -66,6 +67,7 @@ jobs:

      - name: Security Scan Container
        uses: aquasecurity/trivy-action@0.24.0
+        if: always()
        with:
          image-ref: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }}
          output: ${{ matrix.image }}-trivy-scan.txt
@@ -80,6 +82,7 @@ jobs:
        run: docker rmi -f ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}

      - uses: actions/upload-artifact@v4.3.4
+        if: always()
        with:
          name: ${{ matrix.image }}-scan
          path: ${{ matrix.image }}-*-scan.txt
--- a/.github/workflows/manual-docker-publish.yml
+++ b/.github/workflows/manual-docker-publish.yml
@@ -5,28 +5,28 @@ name: Examples publish docker image on manual event
 on:
  workflow_dispatch:
    inputs:
-      nodes:
+      node:
        default: "gaudi"
        description: "Hardware to run test"
        required: true
        type: string
      examples:
-        default: "ChatQnA"
+        default: "Translation"
        description: 'List of examples to test [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
        required: true
        type: string
-      tag:
-        default: "latest"
-        description: "Tag to apply to images"
-        required: true
-        type: string
-      publish:
+      gmc:
        default: false
-        description: 'Publish images to docker hub'
+        description: 'Publish gmc images'
        required: false
        type: boolean
+      tag:
+        default: "v0.9"
+        description: "Tag to publish"
+        required: true
+        type: string
      publish_tags:
-        default: "latest,v1.0"
+        default: "latest,v0.9"
        description: 'Tag list apply to publish images'
        required: false
        type: string
@@ -34,7 +34,7 @@ on:
 permissions: read-all
 jobs:
  get-image-list:
-    runs-on: ${{ inputs.node }}
+    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.scan-matrix.outputs.matrix }}
    steps:
@@ -44,13 +44,20 @@ jobs:
      - name: Set Matrix
        id: scan-matrix
        run: |
+          pip install yq
          examples=($(echo ${{ inputs.examples }} | tr ',' ' '))
          image_list=[]
          for example in ${examples[@]}
          do
+              echo ${example}
              images=$(cat ${{ github.workspace }}/${example}/docker/docker_build_compose.yaml | yq -r '.[]' | jq 'keys' | jq -c '.')
              image_list=$(echo ${image_list} | jq -s '.[0] + .[1] | unique' - <(echo ${images}))
          done
+
+          if [ "${{ inputs.gmc }}" == "true" ]; then
+              image_list=$(echo ${image_list} | jq -c '. + ["gmcmanager","gmcrouter"]')
+          fi
+          echo $image_list
          echo "matrix=$(echo ${image_list} | jq -c '.')" >> $GITHUB_OUTPUT

  publish:
@@ -60,6 +67,11 @@ jobs:
        image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
    runs-on: "docker-build-${{ inputs.node }}"
    steps:
+      - uses: docker/login-action@v3.2.0
+        with:
+          username: ${{ secrets.DOCKERHUB_USER }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
      - name: Image Publish
        uses: opea-project/validation/actions/image-publish@main
        with:
--- a/.github/workflows/manual-freeze-images.yml
+++ b/.github/workflows/manual-freeze-images.yml
@@ -1,43 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-name: Freeze base images and 3rd party images on manual event
-
-on:
-  workflow_dispatch:
-
-jobs:
-  freeze-images:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          ref: ${{ github.ref }}
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.10"
-
-      - name: install skopeo
-        run: |
-          sudo apt update
-          sudo apt -y install skopeo
-
-      - name: Set up Git
-        run: |
-          git config --global user.name "NeuralChatBot"
-          git config --global user.email "grp_neural_chat_bot@intel.com"
-          git remote set-url origin https://NeuralChatBot:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
-
-      - name: Run script
-        run: |
-          bash .github/workflows/scripts/freeze_images.sh
-
-      - name: Commit changes
-        run: |
-          git add .
-          git commit -s -m "Freeze third party images tag"
-          git push
--- a/.github/workflows/scripts/update_images_tag.sh
+++ b/.github/workflows/scripts/update_images_tag.sh
@@ -4,14 +4,18 @@
 # SPDX-License-Identifier: Apache-2.0

 declare -A dict
-dict["langchain/langchain"]="docker://docker.io/langchain/langchain"
-dict["ghcr.io/huggingface/text-generation-inference"]="docker://ghcr.io/huggingface/text-generation-inference"
+dict["ghcr.io/huggingface/text-generation-inference"]="docker://ghcr.io/huggingface/text-generation-inference:latest-intel-cpu"

 function get_latest_version() {
    repo_image=$1
-    versions=$(skopeo list-tags ${dict[$repo_image]} | jq -r '.Tags[]')
-    printf "version list:\n$versions\n"
-    latest_version=$(printf "%s\n" "${versions[@]}" | grep -E '^[\.0-9\-]+$' | sort -V | tail -n 1)
+    if [[ $repo_image == *"huggingface"* ]]; then
+        revision=$(skopeo inspect --config ${dict[$repo_image]} | jq -r '.config.Labels["org.opencontainers.image.revision"][:7]')
+        latest_version="sha-$revision-intel-cpu"
+    else
+        versions=$(skopeo list-tags ${dict[$repo_image]} | jq -r '.Tags[]')
+        printf "version list:\n$versions\n"
+        latest_version=$(printf "%s\n" "${versions[@]}" | grep -E '^[\.0-9\-]+$' | sort -V | tail -n 1)
+    fi
    echo "latest version: $latest_version"
    replace_image_version $repo_image $latest_version
 }
@@ -22,10 +26,10 @@ function replace_image_version() {
    if [[ -z "$version" ]]; then
        echo "version is empty"
    else
-        echo "replace $repo_image:latest with $repo_image:$version"
-        find . -name "Dockerfile" | xargs sed -i "s|$repo_image:latest.*|$repo_image:$version|g"
-        find . -name "*.yaml" | xargs sed -i "s|$repo_image:latest[A-Za-z0-9\-]*|$repo_image:$version|g"
-        find . -name "*.md" | xargs sed -i "s|$repo_image:latest[A-Za-z0-9\-]*|$repo_image:$version|g"
+        echo "replace $repo_image:tag with $repo_image:$version"
+        find . -name "Dockerfile" | xargs sed -i "s|$repo_image:sha[A-Za-z0-9\-]*|$repo_image:$version|g"
+        find . -name "*.yaml" | xargs sed -i "s|$repo_image:sha[A-Za-z0-9\-]*|$repo_image:$version|g"
+        find . -name "*.md" | xargs sed -i "s|$repo_image:sha[A-Za-z0-9\-]*|$repo_image:$version|g"
    fi
 }

--- a/.github/workflows/weekly-update-images.yml
+++ b/.github/workflows/weekly-update-images.yml
@@ -0,0 +1,54 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Weekly update base images and 3rd party images
+
+on:
+  schedule:
+    - cron: "0 0 * * 0"
+  workflow_dispatch:
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  freeze-images:
+    runs-on: ubuntu-latest
+    env:
+      USER_NAME: "NeuralChatBot"
+      USER_EMAIL: "grp_neural_chat_bot@intel.com"
+      BRANCH_NAME: "update_images_tag"
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          ref: "main"
+
+      - name: Install skopeo
+        run: |
+          sudo apt update
+          sudo apt -y install skopeo
+
+      - name: Set up Git
+        run: |
+          git config --global user.name ${{ env.USER_NAME }}
+          git config --global user.email ${{ env.USER_EMAIL }}
+          git remote set-url origin https://${{ env.USER_NAME }}:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
+          git checkout -b ${{ env.BRANCH_NAME }}
+
+      - name: Run script
+        run: |
+          bash .github/workflows/scripts/update_images_tag.sh
+
+      - name: Commit changes
+        run: |
+          git add .
+          git commit -s -m "Update third party images tag"
+          git push --set-upstream origin update_images_tag
+
+      - name: create pull request
+        run: gh pr create -B main -H ${{ env.BRANCH_NAME }} --title 'Update ghcr.io/huggingface/text-generation-inference image tag' --body 'Created by Github action'
+        env:
+          GITHUB_TOKEN: ${{ secrets.ACTION_TOKEN }}
--- a/AgentQnA/README.md
+++ b/AgentQnA/README.md
@@ -26,50 +26,50 @@ This example showcases a hierarchical multi-agent system for question-answering
 1. Build agent docker image </br>
   First, clone the opea GenAIComps repo

-```
-export WORKDIR=<your-work-directory>
-cd $WORKDIR
-git clone https://github.com/opea-project/GenAIComps.git
-```
+   ```
+   export WORKDIR=<your-work-directory>
+   cd $WORKDIR
+   git clone https://github.com/opea-project/GenAIComps.git
+   ```

-Then build the agent docker image. Both the supervisor agent and the worker agent will use the same docker image, but when we launch the two agents we will specify different strategies and register different tools.
+   Then build the agent docker image. Both the supervisor agent and the worker agent will use the same docker image, but when we launch the two agents we will specify different strategies and register different tools.

-```
-cd GenAIComps
-docker build -t opea/comps-agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/docker/Dockerfile .
-```
+   ```
+   cd GenAIComps
+   docker build -t opea/comps-agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/docker/Dockerfile .
+   ```

 2. Launch tool services </br>
   In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.

-```
-docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
-```
+   ```
+   docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
+   ```

 3. Set up environment for this example </br>
   First, clone this repo

-```
-cd $WORKDIR
-git clone https://github.com/opea-project/GenAIExamples.git
-```
+   ```
+   cd $WORKDIR
+   git clone https://github.com/opea-project/GenAIExamples.git
+   ```

-Second, set up env vars
+   Second, set up env vars

-```
-export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
-# optional: OPANAI_API_KEY
-export OPENAI_API_KEY=<your-openai-key>
-```
+   ```
+   export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
+   # optional: OPANAI_API_KEY
+   export OPENAI_API_KEY=<your-openai-key>
+   ```

 4. Launch agent services</br>
   The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and we plan to add support for llama3.1-70B-instruct (served by TGI-Gaudi) in a subsequent release.
   To use openai llm, run command below.

-```
-cd docker/openai/
-bash launch_agent_service_openai.sh
-```
+   ```
+   cd docker/openai/
+   bash launch_agent_service_openai.sh
+   ```

 ## Validate services

--- a/AudioQnA/docker/Dockerfile
+++ b/AudioQnA/docker/Dockerfile
@@ -8,7 +8,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/AudioQnA/docker/gaudi/README.md
+++ b/AudioQnA/docker/gaudi/README.md
@@ -81,7 +81,7 @@ export LLM_SERVICE_PORT=3007

 ```bash
 cd GenAIExamples/AudioQnA/docker/gaudi/
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 ## 🚀 Test MicroServices
--- a/AudioQnA/docker/xeon/README.md
+++ b/AudioQnA/docker/xeon/README.md
@@ -81,7 +81,7 @@ export LLM_SERVICE_PORT=3007

 ```bash
 cd GenAIExamples/AudioQnA/docker/xeon/
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 ## 🚀 Test MicroServices
--- a/AudioQnA/docker/xeon/compose.yaml
+++ b/AudioQnA/docker/xeon/compose.yaml
@@ -41,7 +41,7 @@ services:
    environment:
      TTS_ENDPOINT: ${TTS_ENDPOINT}
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-service
    ports:
      - "3006:80"
--- a/AudioQnA/kubernetes/README.md
+++ b/AudioQnA/kubernetes/README.md
@@ -15,19 +15,19 @@ The AudioQnA application is defined as a Custom Resource (CR) file that the abov
 The AudioQnA uses the below prebuilt images if you choose a Xeon deployment

 - tgi-service: ghcr.io/huggingface/text-generation-inference:1.4
- llm: opea/llm-tgi:v0.9
- asr: opea/asr:v0.9
- whisper: opea/whisper:v0.9
- tts: opea/tts:v0.9
- speecht5: opea/speecht5:v0.9
+- llm: opea/llm-tgi:latest
+- asr: opea/asr:latest
+- whisper: opea/whisper:latest
+- tts: opea/tts:latest
+- speecht5: opea/speecht5:latest


 Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
 For Gaudi:

 - tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
- whisper-gaudi: opea/whisper-gaudi:v0.9
- speecht5-gaudi: opea/speecht5-gaudi:v0.9
+- whisper-gaudi: opea/whisper-gaudi:latest
+- speecht5-gaudi: opea/speecht5-gaudi:latest

 > [NOTE]  
 > Please refer to [Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/docker/xeon/README.md) or [Gaudi README](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/docker/gaudi/README.md) to build the OPEA images. These too will be available on Docker Hub soon to simplify use.
@@ -66,7 +66,7 @@ This involves deploying the AudioQnA custom resource. You can use audioQnA_xeon.
   ```sh
   export CLIENT_POD=$(kubectl get pod -n audioqa -l app=client-test -o jsonpath={.items..metadata.name})
   export accessUrl=$(kubectl get gmc -n audioqa -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
-   kubectl exec "$CLIENT_POD" -n audioqa -- curl $accessUrl  -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json'
+   kubectl exec "$CLIENT_POD" -n audioqa -- curl -s --no-buffer $accessUrl  -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json'
   ```

 > [NOTE]
--- a/AudioQnA/kubernetes/manifests/gaudi/audioqna.yaml
+++ b/AudioQnA/kubernetes/manifests/gaudi/audioqna.yaml
@@ -50,7 +50,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: opea/asr:v0.9
+        image: opea/asr:latest
        imagePullPolicy: IfNotPresent
        name: asr-deploy
        args: null
@@ -101,7 +101,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: opea/whisper-gaudi:v0.9
+        image: opea/whisper-gaudi:latest
        imagePullPolicy: IfNotPresent
        name: whisper-deploy
        args: null
@@ -164,7 +164,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: opea/tts:v0.9
+        image: opea/tts:latest
        imagePullPolicy: IfNotPresent
        name: tts-deploy
        args: null
@@ -215,7 +215,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: opea/speecht5-gaudi:v0.9
+        image: opea/speecht5-gaudi:latest
        imagePullPolicy: IfNotPresent
        name: speecht5-deploy
        args: null
@@ -365,7 +365,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: opea/llm-tgi:v0.9
+        image: opea/llm-tgi:latest
        imagePullPolicy: IfNotPresent
        name: llm-deploy
        args: null
@@ -416,7 +416,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: opea/audioqna:v0.9
+        image: opea/audioqna:latest
        imagePullPolicy: IfNotPresent
        name: audioqna-backend-server-deploy
        args: null
--- a/AudioQnA/kubernetes/manifests/xeon/audioqna.yaml
+++ b/AudioQnA/kubernetes/manifests/xeon/audioqna.yaml
@@ -50,7 +50,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: opea/asr:v0.9
+        image: opea/asr:latest
        imagePullPolicy: IfNotPresent
        name: asr-deploy
        args: null
@@ -101,7 +101,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: opea/whisper:v0.9
+        image: opea/whisper:latest
        imagePullPolicy: IfNotPresent
        name: whisper-deploy
        args: null
@@ -152,7 +152,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: opea/tts:v0.9
+        image: opea/tts:latest
        imagePullPolicy: IfNotPresent
        name: tts-deploy
        args: null
@@ -203,7 +203,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: opea/speecht5:v0.9
+        image: opea/speecht5:latest
        imagePullPolicy: IfNotPresent
        name: speecht5-deploy
        args: null
@@ -321,7 +321,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: opea/llm-tgi:v0.9
+        image: opea/llm-tgi:latest
        imagePullPolicy: IfNotPresent
        name: llm-deploy
        args: null
@@ -372,7 +372,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: audio-qna-config
-        image: opea/audioqna:v0.9
+        image: opea/audioqna:latest
        imagePullPolicy: IfNotPresent
        name: audioqna-backend-server-deploy
        args: null
--- a/ChatQnA/README.md
+++ b/ChatQnA/README.md
@@ -161,7 +161,7 @@ Find the corresponding [compose.yaml](./docker/gaudi/compose.yaml).

 ```bash
 cd GenAIExamples/ChatQnA/docker/gaudi/
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 > Notice: Currently only the **Habana Driver 1.16.x** is supported for Gaudi.
@@ -174,7 +174,7 @@ Find the corresponding [compose.yaml](./docker/xeon/compose.yaml).

 ```bash
 cd GenAIExamples/ChatQnA/docker/xeon/
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on building docker images from source.
@@ -183,7 +183,7 @@ Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on buil

 ```bash
 cd GenAIExamples/ChatQnA/docker/gpu/
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 Refer to the [NVIDIA GPU Guide](./docker/gpu/README.md) for more instructions on building docker images from source.
--- a/ChatQnA/benchmark/four_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/chatqna_mega_service_run.yaml
@@ -32,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/chatqna:v0.9
+        image: opea/chatqna:latest
        imagePullPolicy: IfNotPresent
        name: chatqna-backend-server-deploy
        args: null
--- a/ChatQnA/benchmark/four_gaudi/dataprep-microservice_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/dataprep-microservice_run.yaml
@@ -40,7 +40,7 @@ spec:
            configMapKeyRef:
              name: qna-config
              key: INDEX_NAME
-        image: opea/dataprep-redis:v0.9
+        image: opea/dataprep-redis:latest
        imagePullPolicy: IfNotPresent
        name: dataprep-deploy
        args: null
--- a/ChatQnA/benchmark/four_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/embedding-microservice_run.yaml
@@ -32,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/embedding-tei:v0.9
+        image: opea/embedding-tei:latest
        imagePullPolicy: IfNotPresent
        name: embedding-deploy
        args: null
--- a/ChatQnA/benchmark/four_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/llm-microservice_run.yaml
@@ -32,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/llm-tgi:v0.9
+        image: opea/llm-tgi:latest
        imagePullPolicy: IfNotPresent
        name: llm-deploy
        args: null
--- a/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml
@@ -31,7 +31,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/tei-gaudi:v0.9
+        image: opea/tei-gaudi:latest
        name: reranking-dependency-deploy
        args:
        - --model-id
--- a/ChatQnA/benchmark/four_gaudi/reranking-microservice_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/reranking-microservice_run.yaml
@@ -32,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/reranking-tei:v0.9
+        image: opea/reranking-tei:latest
        imagePullPolicy: IfNotPresent
        name: reranking-deploy
        args: null
--- a/ChatQnA/benchmark/four_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/four_gaudi/retrieval-microservice_run.yaml
@@ -40,7 +40,7 @@ spec:
            configMapKeyRef:
              name: qna-config
              key: INDEX_NAME
-        image: opea/retriever-redis:v0.9
+        image: opea/retriever-redis:latest
        imagePullPolicy: IfNotPresent
        name: retriever-deploy
        args: null
--- a/ChatQnA/benchmark/single_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/chatqna_mega_service_run.yaml
@@ -32,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/chatqna:v0.9
+        image: opea/chatqna:latest
        imagePullPolicy: IfNotPresent
        name: chatqna-backend-server-deploy
        args: null
--- a/ChatQnA/benchmark/single_gaudi/dataprep-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/dataprep-microservice_run.yaml
@@ -40,7 +40,7 @@ spec:
            configMapKeyRef:
              name: qna-config
              key: INDEX_NAME
-        image: opea/dataprep-redis:v0.9
+        image: opea/dataprep-redis:latest
        imagePullPolicy: IfNotPresent
        name: dataprep-deploy
        args: null
--- a/ChatQnA/benchmark/single_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/embedding-microservice_run.yaml
@@ -32,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/embedding-tei:v0.9
+        image: opea/embedding-tei:latest
        imagePullPolicy: IfNotPresent
        name: embedding-deploy
        args: null
--- a/ChatQnA/benchmark/single_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/llm-microservice_run.yaml
@@ -32,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/llm-tgi:v0.9
+        image: opea/llm-tgi:latest
        imagePullPolicy: IfNotPresent
        name: llm-deploy
        args: null
--- a/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml
@@ -31,7 +31,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/tei-gaudi:v0.9
+        image: opea/tei-gaudi:latest
        name: reranking-dependency-deploy
        args:
        - --model-id
--- a/ChatQnA/benchmark/single_gaudi/reranking-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/reranking-microservice_run.yaml
@@ -32,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/reranking-tei:v0.9
+        image: opea/reranking-tei:latest
        imagePullPolicy: IfNotPresent
        name: reranking-deploy
        args: null
--- a/ChatQnA/benchmark/single_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/retrieval-microservice_run.yaml
@@ -40,7 +40,7 @@ spec:
            configMapKeyRef:
              name: qna-config
              key: INDEX_NAME
-        image: opea/retriever-redis:v0.9
+        image: opea/retriever-redis:latest
        imagePullPolicy: IfNotPresent
        name: retriever-deploy
        args: null
--- a/ChatQnA/benchmark/two_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/chatqna_mega_service_run.yaml
@@ -32,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/chatqna:v0.9
+        image: opea/chatqna:latest
        imagePullPolicy: IfNotPresent
        name: chatqna-backend-server-deploy
        args: null
--- a/ChatQnA/benchmark/two_gaudi/dataprep-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/dataprep-microservice_run.yaml
@@ -40,7 +40,7 @@ spec:
            configMapKeyRef:
              name: qna-config
              key: INDEX_NAME
-        image: opea/dataprep-redis:v0.9
+        image: opea/dataprep-redis:latest
        imagePullPolicy: IfNotPresent
        name: dataprep-deploy
        args: null
--- a/ChatQnA/benchmark/two_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/embedding-microservice_run.yaml
@@ -32,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/embedding-tei:v0.9
+        image: opea/embedding-tei:latest
        imagePullPolicy: IfNotPresent
        name: embedding-deploy
        args: null
--- a/ChatQnA/benchmark/two_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/llm-microservice_run.yaml
@@ -32,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/llm-tgi:v0.9
+        image: opea/llm-tgi:latest
        imagePullPolicy: IfNotPresent
        name: llm-deploy
        args: null
--- a/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml
@@ -31,7 +31,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/tei-gaudi:v0.9
+        image: opea/tei-gaudi:latest
        name: reranking-dependency-deploy
        args:
        - --model-id
--- a/ChatQnA/benchmark/two_gaudi/reranking-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/reranking-microservice_run.yaml
@@ -32,7 +32,7 @@ spec:
      - envFrom:
        - configMapRef:
            name: qna-config
-        image: opea/reranking-tei:v0.9
+        image: opea/reranking-tei:latest
        imagePullPolicy: IfNotPresent
        name: reranking-deploy
        args: null
--- a/ChatQnA/benchmark/two_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/retrieval-microservice_run.yaml
@@ -40,7 +40,7 @@ spec:
            configMapKeyRef:
              name: qna-config
              key: INDEX_NAME
-        image: opea/retriever-redis:v0.9
+        image: opea/retriever-redis:latest
        imagePullPolicy: IfNotPresent
        name: retriever-deploy
        args: null
--- a/ChatQnA/docker/Dockerfile
+++ b/ChatQnA/docker/Dockerfile
@@ -8,7 +8,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/ChatQnA/docker/Dockerfile_guardrails
+++ b/ChatQnA/docker/Dockerfile_guardrails
@@ -8,7 +8,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/ChatQnA/docker/Dockerfile_without_rerank
+++ b/ChatQnA/docker/Dockerfile_without_rerank
@@ -8,7 +8,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/ChatQnA/docker/aipc/README.md
+++ b/ChatQnA/docker/aipc/README.md
@@ -160,11 +160,11 @@ Note: Please replace with `host_ip` with you external IP address, do not use loc

 ```bash
 cd GenAIExamples/ChatQnA/docker/aipc/
-TAG=v0.9 docker compose up -d
+docker compose up -d

 # let ollama service runs
 # e.g. ollama run llama3
-ollama run $OLLAMA_MODEL
+OLLAMA_HOST=${host_ip}:11434 ollama run $OLLAMA_MODEL
 # for windows
 # ollama run %OLLAMA_MODEL%
 ```
--- a/ChatQnA/docker/gaudi/README.md
+++ b/ChatQnA/docker/gaudi/README.md
@@ -211,26 +211,26 @@ cd GenAIExamples/ChatQnA/docker/gaudi/
 If use tgi for llm backend.

 ```bash
-TAG=v0.9 docker compose -f compose.yaml up -d
+docker compose -f compose.yaml up -d
 ```

 If use vllm for llm backend.

 ```bash
-TAG=v0.9 docker compose -f compose_vllm.yaml up -d
+docker compose -f compose_vllm.yaml up -d
 ```

 If use vllm-on-ray for llm backend.

 ```bash
-TAG=v0.9 docker compose -f compose_vllm_ray.yaml up -d
+docker compose -f compose_vllm_ray.yaml up -d
 ```

 If you want to enable guardrails microservice in the pipeline, please follow the below command instead:

 ```bash
 cd GenAIExamples/ChatQnA/docker/gaudi/
-TAG=v0.9 docker compose -f compose_guardrails.yaml up -d
+docker compose -f compose_guardrails.yaml up -d
 ```

 > **_NOTE:_** Users need at least two Gaudi cards to run the ChatQnA successfully.
--- a/ChatQnA/docker/gaudi/how_to_validate_service.md
+++ b/ChatQnA/docker/gaudi/how_to_validate_service.md
@@ -17,7 +17,7 @@ start the docker containers

 ```
 cd ./GenAIExamples/ChatQnA/docker/gaudi
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 Check the start up log by `docker compose -f ./docker/gaudi/compose.yaml logs`.
@@ -149,7 +149,7 @@ Set the LLM_MODEL_ID then restart the containers.
 Also you can check overall logs with the following command, where the compose.yaml is the mega service docker-compose configuration file.

 ```
-TAG=v0.9 docker compose -f ./docker-composer/gaudi/compose.yaml logs
+docker compose -f ./docker-composer/gaudi/compose.yaml logs
 ```

 ## 4. Check each micro service used by the Mega Service
--- a/ChatQnA/docker/gpu/README.md
+++ b/ChatQnA/docker/gpu/README.md
@@ -121,7 +121,7 @@ Note: Please replace with `host_ip` with you external IP address, do **NOT** use

 ```bash
 cd GenAIExamples/ChatQnA/docker/gpu/
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 ### Validate MicroServices and MegaService
--- a/ChatQnA/docker/ui/svelte/.env
+++ b/ChatQnA/docker/ui/svelte/.env
@@ -1,7 +1,7 @@
-CHAT_BASE_URL = 'http://backend_address:8888/v1/chatqna'
+CHAT_BASE_URL = '/v1/chatqna'

-UPLOAD_FILE_BASE_URL = 'http://backend_address:6007/v1/dataprep'
+UPLOAD_FILE_BASE_URL = '/v1/dataprep'

-GET_FILE = 'http://backend_address:6007/v1/dataprep/get_file'
+GET_FILE = '/v1/dataprep/get_file'

-DELETE_FILE = 'http://backend_address:6007/v1/dataprep/delete_file'
+DELETE_FILE = '/v1/dataprep/delete_file'
--- a/ChatQnA/docker/xeon/README.md
+++ b/ChatQnA/docker/xeon/README.md
@@ -226,13 +226,13 @@ cd GenAIExamples/ChatQnA/docker/xeon/
 If use TGI backend.

 ```bash
-TAG=v0.9 docker compose -f compose.yaml up -d
+docker compose -f compose.yaml up -d
 ```

 If use vLLM backend.

 ```bash
-TAG=v0.9 docker compose -f compose_vllm.yaml up -d
+docker compose -f compose_vllm.yaml up -d
 ```

 ### Validate Microservices
--- a/ChatQnA/docker/xeon/README_qdrant.md
+++ b/ChatQnA/docker/xeon/README_qdrant.md
@@ -205,7 +205,7 @@ Note: Please replace with `host_ip` with you external IP address, do not use loc

 ```bash
 cd GenAIExamples/ChatQnA/docker/xeon/
-TAG=v0.9 docker compose -f compose_qdrant.yaml up -d
+docker compose -f compose_qdrant.yaml up -d
 ```

 ### Validate Microservices
--- a/ChatQnA/docker/xeon/compose.yaml
+++ b/ChatQnA/docker/xeon/compose.yaml
@@ -103,7 +103,7 @@ services:
      HF_HUB_ENABLE_HF_TRANSFER: 0
    restart: unless-stopped
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-service
    ports:
      - "9009:80"
--- a/ChatQnA/docker/xeon/compose_qdrant.yaml
+++ b/ChatQnA/docker/xeon/compose_qdrant.yaml
@@ -102,7 +102,7 @@ services:
      HF_HUB_ENABLE_HF_TRANSFER: 0
    restart: unless-stopped
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-service
    ports:
      - "6042:80"
--- a/ChatQnA/docker/xeon/compose_without_rerank.yaml
+++ b/ChatQnA/docker/xeon/compose_without_rerank.yaml
@@ -70,7 +70,7 @@ services:
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-service
    ports:
      - "9009:80"
--- a/ChatQnA/kubernetes/README.md
+++ b/ChatQnA/kubernetes/README.md
@@ -16,18 +16,18 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment

 - redis-vector-db: redis/redis-stack:7.2.0-v9
 - tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
- embedding: opea/embedding-tei:v0.9
- retriever: opea/retriever-redis:v0.9
+- embedding: opea/embedding-tei:latest
+- retriever: opea/retriever-redis:latest
 - tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
- reranking: opea/reranking-tei:v0.9
- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
- llm: opea/llm-tgi:v0.9
- chaqna-xeon-backend-server: opea/chatqna:v0.9
+- reranking: opea/reranking-tei:latest
+- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
+- llm: opea/llm-tgi:latest
+- chaqna-xeon-backend-server: opea/chatqna:latest

 Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
 For Gaudi:

- tei-embedding-service: opea/tei-gaudi:v0.9
+- tei-embedding-service: opea/tei-gaudi:latest
 - tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1

 > [NOTE]  
@@ -67,7 +67,7 @@ This involves deploying the ChatQnA custom resource. You can use chatQnA_xeon.ya
   ```sh
   export CLIENT_POD=$(kubectl get pod -n chatqa -l app=client-test -o jsonpath={.items..metadata.name})
   export accessUrl=$(kubectl get gmc -n chatqa -o jsonpath="{.items[?(@.metadata.name=='chatqa')].status.accessUrl}")
-   kubectl exec "$CLIENT_POD" -n chatqa -- curl $accessUrl  -X POST  -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
+   kubectl exec "$CLIENT_POD" -n chatqa -- curl -s --no-buffer $accessUrl  -X POST  -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
   ```

 6. Perhaps you want to try another LLM model? Just modify the application custom resource to use another LLM model
@@ -98,7 +98,7 @@ This involves deploying the ChatQnA custom resource. You can use chatQnA_xeon.ya
 9. Access the updated pipeline using the same URL from above using the client pod

   ```sh
-   kubectl exec "$CLIENT_POD" -n chatqa -- curl $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
+   kubectl exec "$CLIENT_POD" -n chatqa -- curl -s --no-buffer $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json'
   ```

 > [NOTE]
--- a/ChatQnA/kubernetes/manifests/gaudi/chatqna.yaml
+++ b/ChatQnA/kubernetes/manifests/gaudi/chatqna.yaml
@@ -501,7 +501,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/dataprep-redis:v0.9"
+          image: "opea/dataprep-redis:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: data-prep
@@ -579,7 +579,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/embedding-tei:v0.9"
+          image: "opea/embedding-tei:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: embedding-usvc
@@ -657,7 +657,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/llm-tgi:v0.9"
+          image: "opea/llm-tgi:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: llm-uservice
@@ -807,7 +807,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/reranking-tei:v0.9"
+          image: "opea/reranking-tei:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: reranking-usvc
@@ -885,7 +885,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/retriever-redis:v0.9"
+          image: "opea/retriever-redis:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: retriever-usvc
@@ -1212,7 +1212,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/chatqna:v0.9"
+          image: "opea/chatqna:latest"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /tmp
--- a/ChatQnA/kubernetes/manifests/xeon/chatqna.yaml
+++ b/ChatQnA/kubernetes/manifests/xeon/chatqna.yaml
@@ -500,7 +500,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/dataprep-redis:v0.9"
+          image: "opea/dataprep-redis:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: data-prep
@@ -578,7 +578,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/embedding-tei:v0.9"
+          image: "opea/embedding-tei:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: embedding-usvc
@@ -656,7 +656,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/llm-tgi:v0.9"
+          image: "opea/llm-tgi:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: llm-uservice
@@ -806,7 +806,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/reranking-tei:v0.9"
+          image: "opea/reranking-tei:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: reranking-usvc
@@ -884,7 +884,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/retriever-redis:v0.9"
+          image: "opea/retriever-redis:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: retriever-usvc
@@ -1122,7 +1122,7 @@ spec:
                name: chatqna-tgi-config
          securityContext:
            {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
@@ -1209,7 +1209,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/chatqna:v0.9"
+          image: "opea/chatqna:latest"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /tmp
--- a/CodeGen/README.md
+++ b/CodeGen/README.md
@@ -71,7 +71,7 @@ Find the corresponding [compose.yaml](./docker/gaudi/compose.yaml).

 ```bash
 cd GenAIExamples/CodeGen/docker/gaudi
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 > Notice: Currently only the **Habana Driver 1.16.x** is supported for Gaudi.
@@ -84,7 +84,7 @@ Find the corresponding [compose.yaml](./docker/xeon/compose.yaml).

 ```bash
 cd GenAIExamples/CodeGen/docker/xeon
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on building docker images from source.
--- a/CodeGen/docker/Dockerfile
+++ b/CodeGen/docker/Dockerfile
@@ -10,7 +10,6 @@ ENV LANG=C.UTF-8
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/CodeGen/docker/gaudi/README.md
+++ b/CodeGen/docker/gaudi/README.md
@@ -103,7 +103,7 @@ export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen"

 ```bash
 cd GenAIExamples/CodeGen/docker/gaudi
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 ### Validate the MicroServices and MegaService
--- a/CodeGen/docker/xeon/README.md
+++ b/CodeGen/docker/xeon/README.md
@@ -106,7 +106,7 @@ Note: Please replace the `host_ip` with you external IP address, do not use `loc

 ```bash
 cd GenAIExamples/CodeGen/docker/xeon
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 ### Validate the MicroServices and MegaService
--- a/CodeGen/docker/xeon/compose.yaml
+++ b/CodeGen/docker/xeon/compose.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-service
    ports:
      - "8028:80"
--- a/CodeGen/kubernetes/README.md
+++ b/CodeGen/kubernetes/README.md
@@ -36,5 +36,5 @@ In the below example we illustrate on Xeon.
   ```bash
   export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
   export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}")
-   kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query": "def print_hello_world():"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
+   kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl -s --no-buffer $accessUrl -X POST -d '{"query": "def print_hello_world():"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
   ```
--- a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
+++ b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
@@ -170,7 +170,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/llm-tgi:v0.9"
+          image: "opea/llm-tgi:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: llm-uservice
@@ -326,7 +326,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/codegen:v0.9"
+          image: "opea/codegen:latest"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /tmp
--- a/CodeGen/kubernetes/manifests/xeon/codegen.yaml
+++ b/CodeGen/kubernetes/manifests/xeon/codegen.yaml
@@ -169,7 +169,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/llm-tgi:v0.9"
+          image: "opea/llm-tgi:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: llm-uservice
@@ -239,7 +239,7 @@ spec:
                name: codegen-tgi-config
          securityContext:
            {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
@@ -322,7 +322,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/codegen:v0.9"
+          image: "opea/codegen:latest"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /tmp
--- a/CodeGen/kubernetes/manifests/xeon/ui/react-codegen.yaml
+++ b/CodeGen/kubernetes/manifests/xeon/ui/react-codegen.yaml
@@ -126,7 +126,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
@@ -179,7 +179,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: {}
-          image: "opea/llm-tgi:v0.9"
+          image: "opea/llm-tgi:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: llm-uservice
@@ -230,7 +230,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: null
-          image: "opea/codegen:v0.9"
+          image: "opea/codegen:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: codegen
@@ -273,7 +273,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: null
-          image: "opea/codegen-react-ui:v0.9"
+          image: "opea/codegen-react-ui:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: react-ui
--- a/CodeTrans/README.md
+++ b/CodeTrans/README.md
@@ -57,7 +57,7 @@ Find the corresponding [compose.yaml](./docker/gaudi/compose.yaml).

 ```bash
 cd GenAIExamples/CodeTrans/docker/gaudi
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 > Notice: Currently only the **Habana Driver 1.16.x** is supported for Gaudi.
@@ -70,7 +70,7 @@ Find the corresponding [compose.yaml](./docker/xeon/compose.yaml).

 ```bash
 cd GenAIExamples/CodeTrans/docker/xeon
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on building docker images from source.
--- a/CodeTrans/docker/Dockerfile
+++ b/CodeTrans/docker/Dockerfile
@@ -8,7 +8,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/CodeTrans/docker/gaudi/README.md
+++ b/CodeTrans/docker/gaudi/README.md
@@ -62,7 +62,7 @@ export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7777/v1/codetrans"

 ```bash
 cd GenAIExamples/CodeTrans/docker/gaudi
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 ### Validate Microservices
--- a/CodeTrans/docker/xeon/README.md
+++ b/CodeTrans/docker/xeon/README.md
@@ -70,7 +70,7 @@ export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7777/v1/codetrans"

 ```bash
 cd GenAIExamples/CodeTrans/docker/xeon
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 ### Validate Microservices
--- a/CodeTrans/docker/xeon/compose.yaml
+++ b/CodeTrans/docker/xeon/compose.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: codetrans-tgi-service
    ports:
      - "8008:80"
--- a/CodeTrans/kubernetes/manifests/gaudi/codetrans.yaml
+++ b/CodeTrans/kubernetes/manifests/gaudi/codetrans.yaml
@@ -170,7 +170,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/llm-tgi:v0.9"
+          image: "opea/llm-tgi:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: llm-uservice
@@ -324,7 +324,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/codetrans:v0.9"
+          image: "opea/codetrans:latest"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /tmp
--- a/CodeTrans/kubernetes/manifests/xeon/codetrans.yaml
+++ b/CodeTrans/kubernetes/manifests/xeon/codetrans.yaml
@@ -169,7 +169,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/llm-tgi:v0.9"
+          image: "opea/llm-tgi:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: llm-uservice
@@ -239,7 +239,7 @@ spec:
                name: codetrans-tgi-config
          securityContext:
            {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
@@ -322,7 +322,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/codetrans:v0.9"
+          image: "opea/codetrans:latest"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /tmp
--- a/DocIndexRetriever/docker/Dockerfile
+++ b/DocIndexRetriever/docker/Dockerfile
@@ -8,7 +8,6 @@ COPY GenAIComps /home/user/GenAIComps
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
@@ -27,4 +26,4 @@ USER user

 WORKDIR /home/user

-ENTRYPOINT ["python", "retrieval_tool.py"]
+ENTRYPOINT ["python", "retrieval_tool.py"]
--- a/DocIndexRetriever/docker/README.md
+++ b/DocIndexRetriever/docker/README.md
@@ -1,36 +1,36 @@
-# DocRetriever Application
+# DocRetriever Application with Docker

 DocRetriever are the most widely adopted use case for leveraging the different methodologies to match user query against a set of free-text records. DocRetriever is essential to RAG system, which bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. The core of this architecture are vector databases, which are instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity.

-### 1. Build Images for necessary microservices. (This step will not needed after docker image released)
+## 1. Build Images for necessary microservices. (This step will not needed after docker image released)

 - Embedding TEI Image

-```bash
-git clone https://github.com/opea-project/GenAIComps.git
-cd GenAIComps
-docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile .
-```
+  ```bash
+  git clone https://github.com/opea-project/GenAIComps.git
+  cd GenAIComps
+  docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile .
+  ```

 - Retriever Vector store Image

-```bash
-docker build -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/redis/docker/Dockerfile .
-```
+  ```bash
+  docker build -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/redis/docker/Dockerfile .
+  ```

 - Rerank TEI Image

-```bash
-docker build -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/docker/Dockerfile .
-```
+  ```bash
+  docker build -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/docker/Dockerfile .
+  ```

 - Dataprep Image

-```bash
-docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/docker/Dockerfile .
-```
+  ```bash
+  docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/docker/Dockerfile .
+  ```

-### 2. Build Images for MegaService
+## 2. Build Images for MegaService

 ```bash
 cd ..
@@ -38,7 +38,7 @@ git clone https://github.com/opea-project/GenAIExamples.git
 docker build --no-cache -t opea/doc-index-retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f GenAIExamples/DocIndexRetriever/docker/Dockerfile .
 ```

-### 3. Start all the services Docker Containers
+## 3. Start all the services Docker Containers

 ```bash
 export host_ip="YOUR IP ADDR"
@@ -59,10 +59,10 @@ export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8000/v1/retrievaltool"
 export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
 export llm_hardware='xeon' #xeon, xpu, gaudi
 cd GenAIExamples/DocIndexRetriever/docker/${llm_hardware}/
-TAG=v0.9 docker compose -f docker-compose.yaml up -d
+docker compose -f docker-compose.yaml up -d
 ```

-### 3. Validation
+## 3. Validation

 Add Knowledge Base via HTTP Links:

@@ -86,41 +86,41 @@ curl http://${host_ip}:8889/v1/retrievaltool -X POST -H "Content-Type: applicati
 {"id":"354e62c703caac8c547b3061433ec5e8","reranked_docs":[{"id":"06d5a5cefc06cf9a9e0b5fa74a9f233c","text":"Close SearchsearchMenu WikiNewsCommunity Daysx-twitter linkedin github searchStreamlining implementation of enterprise-grade Generative AIEfficiently integrate secure, performant, and cost-effective Generative AI workflows into business value.TODAYOPEA..."}],"initial_query":"Explain the OPEA project?"}
 ```

-### 4. Trouble shooting
+## 4. Trouble shooting

 1. check all containers are alive

-```bash
-# redis vector store
-docker container logs redis-vector-db
-# dataprep to redis microservice, input document files
-docker container logs dataprep-redis-server
+   ```bash
+   # redis vector store
+   docker container logs redis-vector-db
+   # dataprep to redis microservice, input document files
+   docker container logs dataprep-redis-server

-# embedding microservice
-curl http://${host_ip}:6000/v1/embeddings \
-  -X POST \
-  -d '{"text":"Explain the OPEA project"}' \
-  -H 'Content-Type: application/json' > query
-docker container logs embedding-tei-server
+   # embedding microservice
+   curl http://${host_ip}:6000/v1/embeddings \
+     -X POST \
+     -d '{"text":"Explain the OPEA project"}' \
+     -H 'Content-Type: application/json' > query
+   docker container logs embedding-tei-server

-# if you used tei-gaudi
-docker container logs tei-embedding-gaudi-server
+   # if you used tei-gaudi
+   docker container logs tei-embedding-gaudi-server

-# retriever microservice, input embedding output docs
-curl http://${host_ip}:7000/v1/retrieval \
-  -X POST \
-  -d @query \
-  -H 'Content-Type: application/json' > rerank_query
-docker container logs retriever-redis-server
+   # retriever microservice, input embedding output docs
+   curl http://${host_ip}:7000/v1/retrieval \
+     -X POST \
+     -d @query \
+     -H 'Content-Type: application/json' > rerank_query
+   docker container logs retriever-redis-server


-# reranking microservice
-curl http://${host_ip}:8000/v1/reranking \
-  -X POST \
-  -d @rerank_query \
-  -H 'Content-Type: application/json' > output
-docker container logs reranking-tei-server
+   # reranking microservice
+   curl http://${host_ip}:8000/v1/reranking \
+     -X POST \
+     -d @rerank_query \
+     -H 'Content-Type: application/json' > output
+   docker container logs reranking-tei-server

-# megaservice gateway
-docker container logs doc-index-retriever-server
-```
+   # megaservice gateway
+   docker container logs doc-index-retriever-server
+   ```
--- a/DocSum/README.md
+++ b/DocSum/README.md
@@ -58,7 +58,7 @@ Find the corresponding [compose.yaml](./docker/gaudi/compose.yaml).

 ```bash
 cd GenAIExamples/DocSum/docker/gaudi/
-TAG=v0.9 docker compose -f compose.yaml up -d
+docker compose -f compose.yaml up -d
 ```

 > Notice: Currently only the **Habana Driver 1.16.x** is supported for Gaudi.
@@ -71,7 +71,7 @@ Find the corresponding [compose.yaml](./docker/xeon/compose.yaml).

 ```bash
 cd GenAIExamples/DocSum/docker/xeon/
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on building docker images from source.
--- a/DocSum/docker/Dockerfile
+++ b/DocSum/docker/Dockerfile
@@ -8,7 +8,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/DocSum/docker/gaudi/README.md
+++ b/DocSum/docker/gaudi/README.md
@@ -73,7 +73,7 @@ export no_proxy=${your_no_proxy}
 export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
+export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
@@ -86,7 +86,7 @@ Note: Please replace with `host_ip` with your external IP address, do not use lo

 ```bash
 cd GenAIExamples/DocSum/docker/gaudi
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 ### Validate Microservices
--- a/DocSum/docker/set_env.sh
+++ b/DocSum/docker/set_env.sh
@@ -5,7 +5,7 @@


 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
+export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
--- a/DocSum/docker/xeon/README.md
+++ b/DocSum/docker/xeon/README.md
@@ -82,7 +82,7 @@ export no_proxy=${your_no_proxy}
 export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
+export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
@@ -95,7 +95,7 @@ Note: Please replace with `host_ip` with your external IP address, do not use lo

 ```bash
 cd GenAIExamples/DocSum/docker/xeon
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 ### Validate Microservices
--- a/DocSum/docker/xeon/compose.yaml
+++ b/DocSum/docker/xeon/compose.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-service
    ports:
      - "8008:80"
--- a/DocSum/kubernetes/README.md
+++ b/DocSum/kubernetes/README.md
@@ -8,7 +8,7 @@ Install GMC in your Kubernetes cluster, if you have not already done so, by foll
 The DocSum application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not it starts them and then proceeds to connect them. When the DocSum RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular embedding, retriever, rerank, and llm.

 The DocSum pipeline uses  prebuilt images. The Xeon version uses the prebuilt image llm-docsum-tgi:latest which internally leverages the
-the image ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
+the image ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
 service tgi-gaudi-svc, which uses the image ghcr.io/huggingface/tgi-gaudi:1.2.1. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use Intel/neural-chat-7b-v3-3.

 [NOTE]
@@ -60,7 +60,7 @@ This involves deploying the application pipeline custom resource. You can use do
   ```bash
   export CLIENT_POD=$(kubectl get pod -n ${ns} -l app=client-test -o jsonpath={.items..metadata.name})
   export accessUrl=$(kubectl get gmc -n $ns -o jsonpath="{.items[?(@.metadata.name=='docsum')].status.accessUrl}")
-   kubectl exec "$CLIENT_POD" -n $ns -- curl $accessUrl -X POST -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'  -H 'Content-Type: application/json'
+   kubectl exec "$CLIENT_POD" -n $ns -- curl -s --no-buffer $accessUrl -X POST -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'  -H 'Content-Type: application/json'
   ```

 7. Clean up. Use standard Kubernetes custom resource remove commands. Confirm cleaned by retrieving pods in application namespace.
--- a/DocSum/kubernetes/manifests/gaudi/docsum.yaml
+++ b/DocSum/kubernetes/manifests/gaudi/docsum.yaml
@@ -170,7 +170,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/llm-docsum-tgi:v0.9"
+          image: "opea/llm-docsum-tgi:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: llm-uservice
@@ -324,7 +324,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/docsum:v0.9"
+          image: "opea/docsum:latest"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /tmp
--- a/DocSum/kubernetes/manifests/xeon/docsum.yaml
+++ b/DocSum/kubernetes/manifests/xeon/docsum.yaml
@@ -169,7 +169,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/llm-docsum-tgi:v0.9"
+          image: "opea/llm-docsum-tgi:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: llm-uservice
@@ -239,7 +239,7 @@ spec:
                name: docsum-tgi-config
          securityContext:
            {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
@@ -322,7 +322,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/docsum:v0.9"
+          image: "opea/docsum:latest"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /tmp
--- a/DocSum/kubernetes/manifests/xeon/ui/react-docsum.yaml
+++ b/DocSum/kubernetes/manifests/xeon/ui/react-docsum.yaml
@@ -126,7 +126,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
@@ -180,7 +180,7 @@ spec:
              value:

          securityContext: {}
-          image: "opea/llm-docsum-tgi:v0.9"
+          image: "opea/llm-docsum-tgi:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: llm-uservice
@@ -231,7 +231,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: null
-          image: "opea/docsum:v0.9"
+          image: "opea/docsum:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: docsum
@@ -274,7 +274,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: null
-          image: "opea/docsum-react-ui:v0.9"
+          image: "opea/docsum-react-ui:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: react-ui
--- a/FaqGen/docker/Dockerfile
+++ b/FaqGen/docker/Dockerfile
@@ -9,7 +9,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/FaqGen/docker/gaudi/README.md
+++ b/FaqGen/docker/gaudi/README.md
@@ -86,7 +86,7 @@ Note: Please replace with `host_ip` with your external IP address, do not use lo

 ```bash
 cd GenAIExamples/FaqGen/docker/gaudi
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 ### Validate Microservices
--- a/FaqGen/docker/xeon/README.md
+++ b/FaqGen/docker/xeon/README.md
@@ -85,7 +85,7 @@ Note: Please replace with `host_ip` with your external IP address, do not use lo

 ```bash
 cd GenAIExamples/FaqGen/docker/xeon
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 ### Validate Microservices
--- a/FaqGen/docker/xeon/compose.yaml
+++ b/FaqGen/docker/xeon/compose.yaml
@@ -3,7 +3,7 @@

 services:
  tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu
    container_name: tgi-xeon-server
    ports:
      - "8008:80"
--- a/FaqGen/kubernetes/manifests/gaudi/faqgen.yaml
+++ b/FaqGen/kubernetes/manifests/gaudi/faqgen.yaml
@@ -117,7 +117,7 @@ spec:
              value: "http://faq-tgi-svc.default.svc.cluster.local:8010"
            - name: HUGGINGFACEHUB_API_TOKEN
              value: "insert-your-huggingface-token-here"
-          image: opea/llm-faqgen-tgi:v0.9
+          image: opea/llm-faqgen-tgi:latest
          imagePullPolicy: IfNotPresent
          args: null
          ports:
@@ -166,7 +166,7 @@ spec:
              value: faq-mega-server-svc
            - name: MEGA_SERVICE_PORT
              value: "7777"
-          image: opea/faqgen:v0.9
+          image: opea/faqgen:latest
          imagePullPolicy: IfNotPresent
          args: null
          ports:
--- a/FaqGen/kubernetes/manifests/ui.yaml
+++ b/FaqGen/kubernetes/manifests/ui.yaml
@@ -24,7 +24,7 @@ spec:
          env:
            - name: DOC_BASE_URL
              value: http://{insert_your_ip_here}:7779/v1/faqgen
-          image: opea/faqgen-ui:v0.9
+          image: opea/faqgen-ui:latest
          imagePullPolicy: IfNotPresent
          args: null
          ports:
--- a/FaqGen/kubernetes/manifests/xeon/faqgen.yaml
+++ b/FaqGen/kubernetes/manifests/xeon/faqgen.yaml
@@ -96,7 +96,7 @@ spec:
              value: "http://faq-tgi-cpu-svc.default.svc.cluster.local:8011"
            - name: HUGGINGFACEHUB_API_TOKEN
              value: "insert-your-huggingface-token-here"
-          image: opea/llm-faqgen-tgi:v0.9
+          image: opea/llm-faqgen-tgi:latest
          imagePullPolicy: IfNotPresent
          args: null
          ports:
@@ -145,7 +145,7 @@ spec:
              value: faq-mega-server-cpu-svc
            - name: MEGA_SERVICE_PORT
              value: "7777"
-          image: opea/faqgen:v0.9
+          image: opea/faqgen:latest
          imagePullPolicy: IfNotPresent
          args: null
          ports:
--- a/FaqGen/kubernetes/manifests/xeon/ui/react-faqgen.yaml
+++ b/FaqGen/kubernetes/manifests/xeon/ui/react-faqgen.yaml
@@ -126,7 +126,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:sha-8f99f16-intel-cpu"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /data
@@ -179,7 +179,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: {}
-          image: "opea/llm-faqgen-tgi:v0.9"
+          image: "opea/llm-faqgen-tgi:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: llm-uservice
@@ -230,7 +230,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: null
-          image: "opea/faqgen:v0.9"
+          image: "opea/faqgen:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: faqgen
@@ -273,7 +273,7 @@ spec:
            - name: no_proxy
              value:
          securityContext: null
-          image: "opea/faqgen-react-ui:v0.9"
+          image: "opea/faqgen-react-ui:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: react-ui
--- a/ProductivitySuite/README.md
+++ b/ProductivitySuite/README.md
@@ -2,7 +2,7 @@

 OPEA Productivity Suite, is a powerful tool designed to streamline your workflow and boost productivity. This application leverages the cutting-edge OPEA microservices to provide a comprehensive suite of features that cater to the diverse needs of modern enterprises.

-### Key Features
+## Key Features

 - Chat with Documents: Engage in intelligent conversations with your documents using our advanced RAG Capabilities. Our Retrieval-Augmented Generation (RAG) model allows you to ask questions, receive relevant information, and gain insights from your documents in real-time.

--- a/ProductivitySuite/docker/ui/react/README.md
+++ b/ProductivitySuite/docker/ui/react/README.md
@@ -1,66 +1,72 @@
-<h1 align="center" id="title"> Productivity Suite React UI</h1>
+# Productivity Suite React UI

-### 📸 Project Screenshots
+## 📸 Project Screenshots

 ![project-screenshot](../../../assets/img/chat_qna_init.png)
 ![project-screenshot](../../../assets/img/Login_page.png)

-<h2>🧐 Features</h2>
+## 🧐 Features

 Here're some of the project's features:

-#### CHAT QNA
+### CHAT QNA

 - Start a Text Chat：Initiate a text chat with the ability to input written conversations, where the dialogue content can also be customized based on uploaded files.
 - Context Awareness: The AI assistant maintains the context of the conversation, understanding references to previous statements or questions. This allows for more natural and coherent exchanges.

-  ##### DATA SOURCE
+#### DATA SOURCE

-  - The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
-  - Uploaded File would get listed and user would be able add or remove file/links
+- The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
+- Uploaded File would get listed and user would be able add or remove file/links

-  ###### Screen Shot
+##### Screen Shot

-  ![project-screenshot](../../../assets/img/data_source.png)
+![project-screenshot](../../../assets/img/data_source.png)

 - Clear: Clear the record of the current dialog box without retaining the contents of the dialog box.
 - Chat history: Historical chat records can still be retained after refreshing, making it easier for users to view the context.
 - Conversational Chat : The application maintains a history of the conversation, allowing users to review previous messages and the AI to refer back to earlier points in the dialogue when necessary.
-  ###### Screen Shots
-  ![project-screenshot](../../../assets/img/chat_qna_init.png)
-  ![project-screenshot](../../../assets/img/chatqna_with_conversation.png)

-#### CODEGEN
+##### Screen Shots
+
+![project-screenshot](../../../assets/img/chat_qna_init.png)
+![project-screenshot](../../../assets/img/chatqna_with_conversation.png)
+
+### CODEGEN

 - Generate code: generate the corresponding code based on the current user's input.
  ###### Screen Shot
  ![project-screenshot](../../../assets/img/codegen.png)

-#### DOC SUMMARY
+### DOC SUMMARY

 - Summarizing Uploaded Files: Upload files from their local device, then click 'Generate Summary' to summarize the content of the uploaded file. The summary will be displayed on the 'Summary' box.
 - Summarizing Text via Pasting: Paste the text to be summarized into the text box, then click 'Generate Summary' to produce a condensed summary of the content, which will be displayed in the 'Summary' box on the right.
 - Scroll to Bottom: The summarized content will automatically scroll to the bottom.
-  ###### Screen Shot
-  ![project-screenshot](../../../assets/img/doc_summary_paste.png)
-  ![project-screenshot](../../../assets/img/doc_summary_file.png)

-#### FAQ Generator
+#### Screen Shot
+
+![project-screenshot](../../../assets/img/doc_summary_paste.png)
+![project-screenshot](../../../assets/img/doc_summary_file.png)
+
+### FAQ Generator

 - Generate FAQs from Text via Pasting: Paste the text to into the text box, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.

 - Generate FAQs from Text via txt file Upload: Upload the file in the Upload bar, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.
-  ###### Screen Shot
-  ![project-screenshot](../../../assets/img/faq_generator.png)

-<h2>🛠️ Get it Running:</h2>
+#### Screen Shot
+
+![project-screenshot](../../../assets/img/faq_generator.png)
+
+## 🛠️ Get it Running:

 1. Clone the repo.

 2. cd command to the current folder.

 3. create a .env file and add the following variables and values.
-   ```env
+   ```
   VITE_BACKEND_SERVICE_ENDPOINT_CHATQNA=''
   VITE_BACKEND_SERVICE_ENDPOINT_CODEGEN=''
   VITE_BACKEND_SERVICE_ENDPOINT_DOCSUM=''
--- a/ProductivitySuite/docker/xeon/README.md
+++ b/ProductivitySuite/docker/xeon/README.md
@@ -63,7 +63,7 @@ cd ..

 The Productivity Suite is composed of multiple GenAIExample reference solutions composed together.

-### 8.1 Build ChatQnA MegaService Docker Images
+#### 8.1 Build ChatQnA MegaService Docker Images

 ```bash
 git clone https://github.com/opea-project/GenAIExamples.git
@@ -72,7 +72,7 @@ docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_pr
 cd ../../..
 ```

-### 8.2 Build DocSum Megaservice Docker Images
+#### 8.2 Build DocSum Megaservice Docker Images

 ```bash
 cd GenAIExamples/DocSum/docker
@@ -80,7 +80,7 @@ docker build --no-cache -t opea/docsum:latest --build-arg https_proxy=$https_pro
 cd ../../..
 ```

-### 8.3 Build CodeGen Megaservice Docker Images
+#### 8.3 Build CodeGen Megaservice Docker Images

 ```bash
 cd GenAIExamples/CodeGen/docker
@@ -88,7 +88,7 @@ docker build --no-cache -t opea/codegen:latest --build-arg https_proxy=$https_pr
 cd ../../..
 ```

-### 8.4 Build FAQGen Megaservice Docker Images
+#### 8.4 Build FAQGen Megaservice Docker Images

 ```bash
 cd GenAIExamples/FaqGen/docker
@@ -195,7 +195,7 @@ cd GenAIExamples/ProductivitySuite/docker/xeon/
 ```

 ```bash
-TAG=v0.9 docker compose -f compose.yaml up -d
+docker compose -f compose.yaml up -d
 ```

 ### Setup Keycloak
@@ -206,84 +206,84 @@ Please refer to [keycloak_setup_guide](keycloak_setup_guide.md) for more detail

 1. TEI Embedding Service

-```bash
-curl ${host_ip}:6006/embed \
-    -X POST \
-    -d '{"inputs":"What is Deep Learning?"}' \
-    -H 'Content-Type: application/json'
-```
+   ```bash
+   curl ${host_ip}:6006/embed \
+       -X POST \
+       -d '{"inputs":"What is Deep Learning?"}' \
+       -H 'Content-Type: application/json'
+   ```

 2. Embedding Microservice

-```bash
-curl http://${host_ip}:6000/v1/embeddings\
-  -X POST \
-  -d '{"text":"hello"}' \
-  -H 'Content-Type: application/json'
-```
+   ```bash
+   curl http://${host_ip}:6000/v1/embeddings\
+     -X POST \
+     -d '{"text":"hello"}' \
+     -H 'Content-Type: application/json'
+   ```

 3. Retriever Microservice

-To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
-is determined by the embedding model.
-Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768.
+   To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
+   is determined by the embedding model.
+   Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768.

-Check the vector dimension of your embedding model, set `your_embedding` dimension equals to it.
+   Check the vector dimension of your embedding model, set `your_embedding` dimension equals to it.

-```bash
-export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
-curl http://${host_ip}:7000/v1/retrieval \
-  -X POST \
-  -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \
-  -H 'Content-Type: application/json'
-```
+   ```bash
+   export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
+   curl http://${host_ip}:7000/v1/retrieval \
+     -X POST \
+     -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \
+     -H 'Content-Type: application/json'
+   ```

 4. TEI Reranking Service

-```bash
-curl http://${host_ip}:8808/rerank \
-    -X POST \
-    -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
-    -H 'Content-Type: application/json'
-```
+   ```bash
+   curl http://${host_ip}:8808/rerank \
+       -X POST \
+       -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
+       -H 'Content-Type: application/json'
+   ```

 5. Reranking Microservice

-```bash
-curl http://${host_ip}:8000/v1/reranking\
-  -X POST \
-  -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-  -H 'Content-Type: application/json'
-```
+   ```bash
+   curl http://${host_ip}:8000/v1/reranking\
+     -X POST \
+     -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
+     -H 'Content-Type: application/json'
+   ```

 6. LLM backend Service (ChatQnA, DocSum, FAQGen)

-```bash
-curl http://${host_ip}:9009/generate \
-  -X POST \
-  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-  -H 'Content-Type: application/json'
-```
+   ```bash
+   curl http://${host_ip}:9009/generate \
+     -X POST \
+     -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+     -H 'Content-Type: application/json'
+   ```

-8. LLM backend Service (CodeGen)
+7. LLM backend Service (CodeGen)

-```bash
-curl http://${host_ip}:8028/generate \
-  -X POST \
-  -d '{"inputs":"def print_hello_world():","parameters":{"max_new_tokens":256, "do_sample": true}}' \
-  -H 'Content-Type: application/json'
-```
+   ```bash
+   curl http://${host_ip}:8028/generate \
+     -X POST \
+     -d '{"inputs":"def print_hello_world():","parameters":{"max_new_tokens":256, "do_sample": true}}' \
+     -H 'Content-Type: application/json'
+   ```

-9. ChatQnA LLM Microservice
+8. ChatQnA LLM Microservice

-```bash
-curl http://${host_ip}:9000/v1/chat/completions\
-  -X POST \
-  -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-  -H 'Content-Type: application/json'
-```
+   ```bash
+   curl http://${host_ip}:9000/v1/chat/completions\
+     -X POST \
+     -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+     -H 'Content-Type: application/json'
+   ```

-10. CodeGen LLM Microservice
+9. CodeGen LLM Microservice

 ```bash
 curl http://${host_ip}:9001/v1/chat/completions\
@@ -498,50 +498,56 @@ Here is an example of running Productivity Suite
 ![project-screenshot](../../assets/img/chat_qna_init.png)
 ![project-screenshot](../../assets/img/Login_page.png)

-<h2>🧐 Features</h2>
+## 🧐 Features

 Here're some of the project's features:

-#### CHAT QNA
+### CHAT QNA

 - Start a Text Chat：Initiate a text chat with the ability to input written conversations, where the dialogue content can also be customized based on uploaded files.
 - Context Awareness: The AI assistant maintains the context of the conversation, understanding references to previous statements or questions. This allows for more natural and coherent exchanges.

-  ##### DATA SOURCE
+### DATA SOURCE

-  - The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
-  - Uploaded File would get listed and user would be able add or remove file/links
+- The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
+- Uploaded File would get listed and user would be able add or remove file/links

-  ###### Screen Shot
+#### Screen Shot

-  ![project-screenshot](../../assets/img/data_source.png)
+![project-screenshot](../../assets/img/data_source.png)

 - Clear: Clear the record of the current dialog box without retaining the contents of the dialog box.
 - Chat history: Historical chat records can still be retained after refreshing, making it easier for users to view the context.
 - Conversational Chat : The application maintains a history of the conversation, allowing users to review previous messages and the AI to refer back to earlier points in the dialogue when necessary.
-  ###### Screen Shots
-  ![project-screenshot](../../assets/img/chat_qna_init.png)
-  ![project-screenshot](../../assets/img/chatqna_with_conversation.png)

-#### CODEGEN
+#### Screen Shots
+
+![project-screenshot](../../assets/img/chat_qna_init.png)
+![project-screenshot](../../assets/img/chatqna_with_conversation.png)
+
+### CODEGEN

 - Generate code: generate the corresponding code based on the current user's input.
  ###### Screen Shot
  ![project-screenshot](../../assets/img/codegen.png)

-#### DOC SUMMARY
+### DOC SUMMARY

 - Summarizing Uploaded Files: Upload files from their local device, then click 'Generate Summary' to summarize the content of the uploaded file. The summary will be displayed on the 'Summary' box.
 - Summarizing Text via Pasting: Paste the text to be summarized into the text box, then click 'Generate Summary' to produce a condensed summary of the content, which will be displayed in the 'Summary' box on the right.
 - Scroll to Bottom: The summarized content will automatically scroll to the bottom.
-  ###### Screen Shot
-  ![project-screenshot](../../assets/img/doc_summary_paste.png)
-  ![project-screenshot](../../assets/img/doc_summary_file.png)

-#### FAQ Generator
+#### Screen Shot
+
+![project-screenshot](../../assets/img/doc_summary_paste.png)
+![project-screenshot](../../assets/img/doc_summary_file.png)
+
+### FAQ Generator

 - Generate FAQs from Text via Pasting: Paste the text to into the text box, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.

 - Generate FAQs from Text via txt file Upload: Upload the file in the Upload bar, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.
-  ###### Screen Shot
-  ![project-screenshot](../../assets/img/faq_generator.png)
+
+#### Screen Shot
+
+![project-screenshot](../../assets/img/faq_generator.png)
--- a/ProductivitySuite/kubernetes/manifests/README.md
+++ b/ProductivitySuite/kubernetes/manifests/README.md
@@ -22,24 +22,26 @@ To begin with, ensure that you have following prerequisites in place:
 1. Kubernetes installation: Make sure that you have Kubernetes installed.
 2. Images: Make sure you have all the images ready for the examples and components stated above. You may refer to [README](../../docker/xeon/README.md) for steps to build the images.
 3. Configuration Values: Set the following values in all the yaml files before proceeding with the deployment:
-   #### a. HUGGINGFACEHUB_API_TOKEN (Your HuggingFace token to download your desired model from HuggingFace):
-   ```
-   # You may set the HUGGINGFACEHUB_API_TOKEN via method:
-   export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
-   cd GenAIExamples/ProductivitySuite/kubernetes/manifests/xeon/
-   sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" *.yaml
-   ```

-   #### b. Set the proxies based on your network configuration
-   ```
-   # Look for http_proxy, https_proxy and no_proxy key and fill up the values for all the yaml files with your system proxy configuration.
-   ```
+   a. HUGGINGFACEHUB_API_TOKEN (Your HuggingFace token to download your desired model from HuggingFace):
+      ```
+      # You may set the HUGGINGFACEHUB_API_TOKEN via method:
+      export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
+      cd GenAIExamples/ProductivitySuite/kubernetes/manifests/xeon/
+      sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" *.yaml
+      ```
+
+   b. Set the proxies based on your network configuration
+      ```
+      # Look for http_proxy, https_proxy and no_proxy key and fill up the values for all the yaml files with your system proxy configuration.
+      ```
+
+   c. Set all the backend service endpoint for REACT UI service
+      ```
+      # Setup all the backend service endpoint in productivity_suite_reactui.yaml for UI to consume with.
+      # Look for ENDPOINT in the yaml and insert all the url endpoint for all the required backend service.
+      ```

-   #### c. Set all the backend service endpoint for REACT UI service
-   ```
-   # Setup all the backend service endpoint in productivity_suite_reactui.yaml for UI to consume with.
-   # Look for ENDPOINT in the yaml and insert all the url endpoint for all the required backend service.
-   ```
 4. MODEL_ID and model-volume (OPTIONAL): You may as well customize the "MODEL_ID" to use different model and model-volume for the volume to be mounted.
 5. After finish with steps above, you can proceed with the deployment of the yaml file.

--- a/ProductivitySuite/kubernetes/manifests/xeon/chat_history.yaml
+++ b/ProductivitySuite/kubernetes/manifests/xeon/chat_history.yaml
@@ -65,7 +65,7 @@ spec:
          - configMapRef:
              name: chat-history-config
          securityContext: null
-          image: "opea/chathistory-mongo-server:v0.9"
+          image: "opea/chathistory-mongo-server:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: chat-history
--- a/ProductivitySuite/kubernetes/manifests/xeon/chatqna.yaml
+++ b/ProductivitySuite/kubernetes/manifests/xeon/chatqna.yaml
@@ -499,7 +499,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/dataprep-redis:v0.9"
+          image: "opea/dataprep-redis:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: data-prep
@@ -557,7 +557,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/embedding-tei:v0.9"
+          image: "opea/embedding-tei:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: embedding-usvc
@@ -615,7 +615,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/llm-tgi:v0.9"
+          image: "opea/llm-tgi:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: llm-uservice
@@ -753,7 +753,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/reranking-tei:v0.9"
+          image: "opea/reranking-tei:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: reranking-usvc
@@ -811,7 +811,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/retriever-redis:v0.9"
+          image: "opea/retriever-redis:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: retriever-usvc
@@ -1069,7 +1069,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/chatqna:v0.9"
+          image: "opea/chatqna:latest"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /tmp
--- a/ProductivitySuite/kubernetes/manifests/xeon/codegen.yaml
+++ b/ProductivitySuite/kubernetes/manifests/xeon/codegen.yaml
@@ -171,7 +171,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/llm-tgi:v0.9"
+          image: "opea/llm-tgi:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: llm-uservice
@@ -301,7 +301,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/codegen:v0.9"
+          image: "opea/codegen:latest"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /tmp
--- a/ProductivitySuite/kubernetes/manifests/xeon/docsum.yaml
+++ b/ProductivitySuite/kubernetes/manifests/xeon/docsum.yaml
@@ -171,7 +171,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/llm-docsum-tgi:v0.9"
+          image: "opea/llm-docsum-tgi:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: llm-uservice
@@ -301,7 +301,7 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/docsum:v0.9"
+          image: "opea/docsum:latest"
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - mountPath: /tmp
--- a/ProductivitySuite/kubernetes/manifests/xeon/faqgen.yaml
+++ b/ProductivitySuite/kubernetes/manifests/xeon/faqgen.yaml
@@ -183,7 +183,7 @@ spec:
            - configMapRef:
                name: faqgen-llm-uservice-config
          securityContext: {}
-          image: "opea/llm-faqgen-tgi:v0.9"
+          image: "opea/llm-faqgen-tgi:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: llm-uservice
@@ -234,7 +234,7 @@ spec:
            - name: no_proxy
              value: ""
          securityContext: null
-          image: "opea/faqgen:v0.9"
+          image: "opea/faqgen:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: faqgen
--- a/ProductivitySuite/kubernetes/manifests/xeon/productivity_suite_reactui.yaml
+++ b/ProductivitySuite/kubernetes/manifests/xeon/productivity_suite_reactui.yaml
@@ -82,7 +82,7 @@ spec:
            - name: APP_KEYCLOAK_SERVICE_ENDPOINT
              value: ""
          securityContext: null
-          image: "opea/productivity-suite-react-ui-server:v0.9"
+          image: "opea/productivity-suite-react-ui-server:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: react-ui
--- a/ProductivitySuite/kubernetes/manifests/xeon/prompt_registry.yaml
+++ b/ProductivitySuite/kubernetes/manifests/xeon/prompt_registry.yaml
@@ -65,7 +65,7 @@ spec:
          - configMapRef:
              name: prompt-registry-config
          securityContext: null
-          image: "opea/promptregistry-mongo-server:v0.9"
+          image: "opea/promptregistry-mongo-server:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: prompt-registry
--- a/SearchQnA/README.md
+++ b/SearchQnA/README.md
@@ -69,7 +69,7 @@ If your version of `Habana Driver` < 1.16.0 (check with `hl-smi`), run the follo

 ```bash
 cd GenAIExamples/SearchQnA/docker/gaudi/
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 > Notice: Currently only the **Habana Driver 1.16.x** is supported for Gaudi.
@@ -82,7 +82,7 @@ Find the corresponding [compose.yaml](./docker/xeon/compose.yaml).

 ```bash
 cd GenAIExamples/SearchQnA/docker/xeon/
-TAG=v0.9 docker compose up -d
+docker compose up -d
 ```

 Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on building docker images from source.
--- a/SearchQnA/docker/Dockerfile
+++ b/SearchQnA/docker/Dockerfile
@@ -8,7 +8,6 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
    libgl1-mesa-glx \
    libjemalloc-dev \
-    vim \
    git

 RUN useradd -m -s /bin/bash user && \
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
NeuralChatBot	6a705ad3d4	Update third party images tag Signed-off-by: NeuralChatBot <grp_neural_chat_bot@intel.com>	2024-08-29 02:45:32 +00:00
WenjiaoYue	32afb6501c	update env (#678 ) Signed-off-by: Yue, Wenjiao <wenjiao.yue@intel.com>	2024-08-29 10:29:35 +08:00
Sun, Xuehao	035f39f0d9	weekly update images tag (#667 ) Signed-off-by: Sun, Xuehao <xuehao.sun@intel.com>	2024-08-29 10:27:06 +08:00
Abolfazl Shahbazi	6f3e54a22a	Always upload scan artifacts (#680 ) Signed-off-by: Abolfazl Shahbazi <abolfazl.shahbazi@intel.com>	2024-08-29 09:35:47 +08:00
Abolfazl Shahbazi	1874dfd148	Remove 'vim' from all Dockerfiles (#663 ) Signed-off-by: Abolfazl Shahbazi <abolfazl.shahbazi@intel.com> Co-authored-by: lvliang-intel <liang1.lv@intel.com>	2024-08-28 08:30:49 -07:00
David Kinder	7a0fca73e6	doc: fix headings (#656 ) * doc: fix headings * Fix incorrect uses of heading levels * fix indenting within lists Signed-off-by: David B. Kinder <david.b.kinder@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: David B. Kinder <david.b.kinder@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>	2024-08-28 20:45:18 +08:00
Ying Hu	beda609b4b	Yinghu5 patch 1 (#676 ) * Update set_env.sh to fix #670 * Update README.md fixed your_ip and host_ip * Update README.md fix your_ip and host_ip	2024-08-28 20:42:44 +08:00
jotpalch	993688ac91	fix: Resolve port conflict in llava-tgi-service (#665 ) - Changed the external port of llava-tgi-service from 9399 to 8399 - This resolves the port conflict with the lvm-tgi service - Internal port mapping remains unchanged (80)	2024-08-28 17:07:15 +08:00
chen, suyue	5fde666c43	enhance image publish action (#659 ) Signed-off-by: chensuyue <suyue.chen@intel.com>	2024-08-28 09:24:57 +08:00
Steve Zhang	4133757642	Change docs of kubernetes for curl commands in README (#661 ) * change docs for curl commands in README. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * The Namespace 'CT' is invalid. Signed-off-by: zhlsunshine <huailong.zhang@intel.com>	2024-08-27 19:36:37 +08:00
lvliang-intel	10c81f1c57	Update ollama run command (#668 ) Signed-off-by: lvliang-intel <liang1.lv@intel.com>	2024-08-27 14:54:53 +08:00
xiguiw	dad8eb4b82	[Doc] Update ChatQnA flow chart (#542 ) * Update flow chart Signed-off-by: Wang, Xigui <xigui.wang@intel.com> * Updated Flowchart Signed-off-by: srinarayan-srikanthan <srinarayan.srikanthan@intel.com> --------- Signed-off-by: Wang, Xigui <xigui.wang@intel.com> Signed-off-by: srinarayan-srikanthan <srinarayan.srikanthan@intel.com> Co-authored-by: Louie Tsai <louie.tsai@intel.com>	2024-08-26 12:20:21 -07:00
lvliang-intel	af21e94a29	Add benchmark README for ChatQnA (#662 ) * Add benchmark README for ChatQnA Signed-off-by: lvliang-intel <liang1.lv@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add benchmark.yaml Signed-off-by: lvliang-intel <liang1.lv@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update yaml path Signed-off-by: lvliang-intel <liang1.lv@intel.com> * fix preci issue Signed-off-by: lvliang-intel <liang1.lv@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update title Signed-off-by: lvliang-intel <liang1.lv@intel.com> --------- Signed-off-by: lvliang-intel <liang1.lv@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>	2024-08-26 22:39:57 +08:00
chen, suyue	f78aa9ee2f	add env for chatqna vllm (#655 ) Signed-off-by: chensuyue <suyue.chen@intel.com>	2024-08-23 22:10:10 +08:00
Dina Suehiro Jones	c25063f4bb	Minor fixes for CodeGen Xeon and Gaudi Kubernetes codegen.yaml and doc updates (#613 ) * Minor fixes for CodeGen Xeon and Gaudi Kubernetes codegen.yaml and doc updates Signed-off-by: dmsuehir <dina.s.jones@intel.com>	2024-08-23 16:04:57 +08:00