Compare commits

..

1 Commits

Author SHA1 Message Date
Jaswanth Karani
1e8c34c574 fixed ISSUE-528 (#590)
* fixed ISSUE-528

Signed-off-by: jaswanth8888 <karani.jaswanth@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: jaswanth8888 <karani.jaswanth@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

(cherry picked from commit 45cf553d36)
Signed-off-by: chensuyue <suyue.chen@intel.com>
2024-08-15 10:40:16 +08:00
391 changed files with 3474 additions and 19429 deletions

50
.github/workflows/VisualQnA.yml vendored Normal file
View File

@@ -0,0 +1,50 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: VisualQnA-test
on:
pull_request_target:
branches: [main]
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- VisualQnA/**
- "!**.md"
- "!**/ui/**"
- .github/workflows/VisualQnA.yml
workflow_dispatch:
# If there is a new commit, the previous jobs will be canceled
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
VisualQnA:
runs-on: aise-cluster
strategy:
matrix:
job_name: ["basic"]
fail-fast: false
steps:
- name: Clean Up Working Directory
run: sudo rm -rf ${{github.workspace}}/*
- name: Checkout out Repo
uses: actions/checkout@v4
with:
ref: "refs/pull/${{ github.event.number }}/merge"
- name: Run Test
env:
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
run: |
cd ${{ github.workspace }}/VisualQnA/tests
bash test_${{ matrix.job_name }}_inference.sh
- name: Publish pipeline artifact
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.job_name }}
path: ${{ github.workspace }}/VisualQnA/tests/*.log

View File

@@ -1,166 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Example jobs
permissions: read-all
on:
workflow_call:
inputs:
node:
required: true
type: string
example:
required: true
type: string
tag:
default: "latest"
required: false
type: string
build:
default: true
required: false
type: boolean
scan:
default: true
required: false
type: boolean
test_compose:
default: false
required: false
type: boolean
test_k8s:
default: false
required: false
type: boolean
test_gmc:
default: false
required: false
type: boolean
opea_branch:
default: "main"
required: false
type: string
jobs:
####################################################################################################
# Image Build
####################################################################################################
build-images:
runs-on: "docker-build-${{ inputs.node }}"
steps:
- name: Clean Up Working Directory
run: sudo rm -rf ${{github.workspace}}/*
- name: Checkout out Repo
uses: actions/checkout@v4
- name: Clone required Repo
run: |
cd ${{ github.workspace }}/${{ inputs.example }}/docker
docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker/docker_build_compose.yaml
if [[ $(grep -c "tei-gaudi:" ${docker_compose_path}) != 0 ]]; then
git clone https://github.com/huggingface/tei-gaudi.git
fi
if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
git clone https://github.com/vllm-project/vllm.git
fi
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps && git checkout ${{ inputs.opea_branch }} && cd ../
- name: Build Image
if: ${{ fromJSON(inputs.build) }}
uses: opea-project/validation/actions/image-build@main
with:
work_dir: ${{ github.workspace }}/${{ inputs.example }}/docker
docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker/docker_build_compose.yaml
registry: ${OPEA_IMAGE_REPO}opea
tag: ${{ inputs.tag }}
####################################################################################################
# Trivy Scan
####################################################################################################
get-image-list:
needs: [build-images]
if: ${{ fromJSON(inputs.scan) && inputs.node == 'gaudi' }}
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.scan-matrix.outputs.matrix }}
steps:
- name: Checkout out Repo
uses: actions/checkout@v4
- name: Set Matrix
id: scan-matrix
run: |
pip install yq
compose_path=${{ github.workspace }}/${{ inputs.example }}/docker/docker_build_compose.yaml
echo "matrix=$(cat ${compose_path} | yq -r '.[]' | jq 'keys' | jq -c '.')" >> $GITHUB_OUTPUT
scan-images:
needs: [get-image-list, build-images]
if: ${{ fromJSON(inputs.scan) && inputs.node == 'gaudi'}}
runs-on: "docker-build-${{ inputs.node }}"
strategy:
matrix:
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
fail-fast: false
steps:
- name: Pull Image
run: |
docker pull ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}
echo "OPEA_IMAGE_REPO=${OPEA_IMAGE_REPO}" >> $GITHUB_ENV
- name: Scan Container
uses: opea-project/validation/actions/trivy-scan@main
with:
image-ref: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }}
output: ${{ matrix.image }}-scan.txt
- name: Cleanup
if: always()
run: docker rmi -f ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}
- uses: actions/upload-artifact@v4.3.4
with:
name: ${{ matrix.image }}-scan
path: ${{ matrix.image }}-scan.txt
overwrite: true
####################################################################################################
# Docker Compose Test
####################################################################################################
test-example-compose:
needs: [build-images]
if: ${{ fromJSON(inputs.test_compose) }}
uses: ./.github/workflows/_run-docker-compose.yml
with:
tag: ${{ inputs.tag }}
example: ${{ inputs.example }}
hardware: ${{ inputs.node }}
secrets: inherit
####################################################################################################
# K8S Test
####################################################################################################
test-k8s-manifest:
needs: [build-images]
if: ${{ fromJSON(inputs.test_k8s) }}
uses: ./.github/workflows/_manifest-e2e.yml
with:
example: ${{ inputs.example }}
hardware: ${{ inputs.node }}
tag: ${{ inputs.tag }}
context: "CD"
secrets: inherit
####################################################################################################
# GMC Test
####################################################################################################
test-gmc-pipeline:
needs: [build-images]
if: ${{ fromJSON(inputs.test_gmc) }}
uses: ./.github/workflows/_gmc-e2e.yml
with:
example: ${{ inputs.example }}
hardware: ${{ inputs.node }}
secrets: inherit

View File

@@ -1,146 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Build and deploy GMC system on call and manual
on:
workflow_dispatch:
inputs:
tag:
default: "latest"
required: true
type: string
description: "Tag to apply to images"
node:
default: "xeon"
required: true
type: string
description: "Hardware to run test"
opea_branch:
default: "main"
required: false
type: string
description: 'OPEA branch for image build'
workflow_call:
inputs:
tag:
default: "latest"
required: true
type: string
description: "Tag to apply to images"
node:
default: "xeon"
required: true
type: string
description: "Hardware to run test"
opea_branch:
default: "main"
required: false
type: string
description: 'OPEA branch for image build'
jobs:
####################################################################################################
# Image Build and Scan
####################################################################################################
image-build:
runs-on: "docker-build-${{ inputs.node }}"
steps:
- name: Checkout GenAIInfra repository
uses: actions/checkout@v4
with:
repository: opea-project/GenAIInfra
ref: ${{ inputs.opea_branch }}
path: GenAIInfra
- name: Set variables
id: set_variables
run: |
echo "DOCKER_REGISTRY=${OPEA_IMAGE_REPO}opea" >> $GITHUB_ENV
echo "IMAGE_REPO=${OPEA_IMAGE_REPO}" >> $GITHUB_OUTPUT
echo "VERSION=${{ inputs.tag }}" >> $GITHUB_ENV
echo "VERSION=${{ inputs.tag }}" >> $GITHUB_OUTPUT
- name: Build image and push
run: |
cd ${{github.workspace}}/GenAIInfra/microservices-connector
make docker.build
make docker.push
- name: Scan gmcmanager
if: ${{ inputs.node == 'gaudi' }}
uses: opea-project/validation/actions/trivy-scan@main
with:
image-ref: ${{ env.DOCKER_REGISTRY }}/gmcmanager:${{ env.VERSION }}
output: gmcmanager-scan.txt
- name: Upload gmcmanager scan result
if: ${{ inputs.node == 'gaudi' }}
uses: actions/upload-artifact@v4.3.4
with:
name: gmcmanager-scan
path: gmcmanager-scan.txt
overwrite: true
- name: Scan gmcrouter
if: ${{ inputs.node == 'gaudi' }}
uses: opea-project/validation/actions/trivy-scan@main
with:
image-ref: ${{ env.DOCKER_REGISTRY }}/gmcrouter:${{ env.VERSION }}
output: gmcrouter-scan.txt
- name: Upload gmcrouter scan result
if: ${{ inputs.node == 'gaudi' }}
uses: actions/upload-artifact@v4.3.4
with:
name: gmcrouter-scan
path: gmcrouter-scan.txt
overwrite: true
- name: Clean up images
if: always()
run: |
docker rmi ${{ env.DOCKER_REGISTRY }}/gmcrouter:${{ env.VERSION }}
docker rmi ${{ env.DOCKER_REGISTRY }}/gmcmanager:${{ env.VERSION }}
- name: Clean up GenAIInfra source codes
if: always()
run: |
rm -rf ${{github.workspace}}/GenAIInfra
####################################################################################################
# GMC Install
####################################################################################################
gmc-install:
needs: image-build
runs-on: "k8s-${{ inputs.node }}"
steps:
- name: Checkout GenAIInfra repository
uses: actions/checkout@v4
with:
repository: opea-project/GenAIInfra
ref: ${{ inputs.opea_branch }}
path: GenAIInfra
- name: Set variables
run: |
echo "SYSTEM_NAMESPACE=opea-system" >> $GITHUB_ENV
echo "VERSION=${{ inputs.tag }}" >> $GITHUB_ENV
echo "SET_VERSION=true" >> $GITHUB_ENV # to change the tag of microservice images
- name: Cleanup existing GMC
run: |
cd GenAIInfra
.github/workflows/scripts/e2e/gmc_install.sh cleanup_gmc
cd ..
- name: Install GMC
run: |
cd GenAIInfra
.github/workflows/scripts/e2e/gmc_install.sh install_gmc
cd ..
- name: Clean up GenAIInfra source codes
if: always()
run: |
rm -rf ${{github.workspace}}/GenAIInfra

View File

@@ -1,105 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Single Kubernetes Manifest E2e Test For Call
on:
workflow_call:
inputs:
example:
default: "ChatQnA"
description: "The example to test on K8s"
required: true
type: string
hardware:
default: "xeon"
description: "Nodes to run the test, xeon or gaudi"
required: true
type: string
tag:
default: "latest"
description: "Tag to apply to images, default is latest"
required: false
type: string
context:
default: "CI"
description: "CI or CD"
required: false
type: string
jobs:
manifest-test:
runs-on: "k8s-${{ inputs.hardware }}"
continue-on-error: true
steps:
- name: Clean Up Working Directory
run: sudo rm -rf ${{github.workspace}}/*
- name: Get checkout ref
run: |
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
else
echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
fi
echo "checkout ref ${{ env.CHECKOUT_REF }}"
- name: Checkout out Repo
uses: actions/checkout@v4
with:
ref: ${{ env.CHECKOUT_REF }}
fetch-depth: 0
- name: Set variables
run: |
echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
echo "IMAGE_TAG=${{ inputs.tag }}" >> $GITHUB_ENV
lower_example=$(echo "${{ inputs.example }}" | tr '[:upper:]' '[:lower:]')
echo "NAMESPACE=$lower_example-$(tr -dc a-z0-9 </dev/urandom | head -c 16)" >> $GITHUB_ENV
echo "ROLLOUT_TIMEOUT_SECONDS=1800s" >> $GITHUB_ENV
echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
echo "continue_test=true" >> $GITHUB_ENV
echo "should_cleanup=false" >> $GITHUB_ENV
echo "skip_validate=true" >> $GITHUB_ENV
echo "CONTEXT=${{ inputs.context }}" >> $GITHUB_ENV
echo "NAMESPACE=$NAMESPACE"
- name: Kubectl install
id: install
run: |
if [[ ! -f ${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh ]]; then
echo "No test script found, exist test!"
exit 0
else
${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh init_${{ inputs.example }}
echo "should_cleanup=true" >> $GITHUB_ENV
kubectl create ns $NAMESPACE
${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh install_${{ inputs.example }} $NAMESPACE
echo "Testing ${{ inputs.example }}, waiting for pod ready..."
if kubectl rollout status deployment --namespace "$NAMESPACE" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
echo "Testing manifests ${{ inputs.example }}, waiting for pod ready done!"
echo "skip_validate=false" >> $GITHUB_ENV
else
echo "Timeout waiting for pods in namespace $NAMESPACE to be ready!"
exit 1
fi
sleep 60
fi
- name: Validate e2e test
if: always()
run: |
if $skip_validate; then
echo "Skip validate"
else
${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh validate_${{ inputs.example }} $NAMESPACE
fi
- name: Kubectl uninstall
if: always()
run: |
if $should_cleanup; then
if ! kubectl delete ns $NAMESPACE --timeout=$KUBECTL_TIMEOUT_SECONDS; then
kubectl delete pods --namespace $NAMESPACE --force --grace-period=0 --all
kubectl delete ns $NAMESPACE --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
fi
fi

View File

@@ -1,117 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Image Build
permissions: read-all
on:
workflow_call:
inputs:
registry:
description: Container Registry URL
required: false
default: ""
type: string
tag:
description: Container Tag
required: false
default: "latest"
type: string
example:
description: Example to test
required: true
type: string
hardware:
description: Hardware to run the test on
required: true
type: string
jobs:
get-test-case:
runs-on: ubuntu-latest
outputs:
test_cases: ${{ steps.test-case-matrix.outputs.test_cases }}
CHECKOUT_REF: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }}
steps:
- name: Get checkout ref
id: get-checkout-ref
run: |
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge
else
CHECKOUT_REF=${{ github.ref }}
fi
echo "CHECKOUT_REF=${CHECKOUT_REF}" >> $GITHUB_OUTPUT
echo "checkout ref ${CHECKOUT_REF}"
- name: Checkout out Repo
uses: actions/checkout@v4
with:
ref: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }}
fetch-depth: 0
- name: Get test matrix
shell: bash
id: test-case-matrix
run: |
set -x
example_l=$(echo ${{ inputs.example }} | tr '[:upper:]' '[:lower:]')
cd ${{ github.workspace }}/${{ inputs.example }}/tests
test_cases=$(find . -type f -name "test_${example_l}*on_${{ inputs.hardware }}.sh" -print | cut -d/ -f2 | jq -R '.' | jq -sc '.')
echo "test_cases=$test_cases" >> $GITHUB_OUTPUT
run-test:
needs: [get-test-case]
strategy:
matrix:
test_case: ${{ fromJSON(needs.get-test-case.outputs.test_cases) }}
fail-fast: false
runs-on: ${{ inputs.hardware }}
continue-on-error: true
steps:
- name: Clean up Working Directory
run: |
sudo rm -rf ${{github.workspace}}/* || true
docker system prune -f
docker rmi $(docker images --filter reference="*/*/*:latest" -q) || true
docker rmi $(docker images --filter reference="*/*:ci" -q) || true
- name: Checkout out Repo
uses: actions/checkout@v4
with:
ref: ${{ needs.get-test-case.outputs.CHECKOUT_REF }}
fetch-depth: 0
- name: Run test
shell: bash
env:
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
IMAGE_REPO: ${{ inputs.registry }}
IMAGE_TAG: ${{ inputs.tag }}
example: ${{ inputs.example }}
hardware: ${{ inputs.hardware }}
test_case: ${{ matrix.test_case }}
run: |
cd ${{ github.workspace }}/$example/tests
if [[ "$IMAGE_REPO" == "" ]]; then export IMAGE_REPO="${OPEA_IMAGE_REPO}opea"; fi
if [ -f ${test_case} ]; then timeout 30m bash ${test_case}; else echo "Test script {${test_case}} not found, skip test!"; fi
- name: Clean up container
shell: bash
if: cancelled() || failure()
run: |
cd ${{ github.workspace }}/${{ inputs.example }}/docker/${{ inputs.hardware }}
yaml_files=$(find . -type f -name "*compose*yaml")
for file in $yaml_files; do
docker compose -f ${file} stop && docker compose -f ${file} rm -f || true
done
docker system prune -f
docker rmi $(docker images --filter reference="*:5000/*/*" -q) || true
- name: Publish pipeline artifact
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.test_case }}
path: ${{ github.workspace }}/${{ inputs.example }}/tests/*.log

View File

@@ -0,0 +1,91 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: E2E test with docker compose
on:
pull_request_target:
branches: [main]
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- "**/docker/**"
- "**/tests/**"
- "**/ui/**"
- "!**.md"
- "!**.txt"
- .github/workflows/docker-compose-e2e.yml
workflow_dispatch:
# If there is a new commit, the previous jobs will be canceled
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
job1:
uses: ./.github/workflows/reuse-get-test-matrix.yml
with:
diff_excluded_files: '.github|README.md|*.txt|deprecate|kubernetes|manifest|gmc|assets'
mega-image-build:
needs: job1
strategy:
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
uses: ./.github/workflows/reuse-image-build.yml
with:
image_tag: ${{ github.event.pull_request.head.sha }}
mega_service: "${{ matrix.example }}"
runner_label: "docker-build-${{ matrix.hardware }}"
Example-test:
needs: [job1, mega-image-build]
strategy:
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
runs-on: ${{ matrix.hardware }}
continue-on-error: true
steps:
- name: Test example
run: |
echo "Matrix - example ${{ matrix.example }}, hardware ${{ matrix.hardware }}"
- name: Clean Up Working Directory
run: sudo rm -rf ${{github.workspace}}/*
- name: Checkout out Repo
uses: actions/checkout@v4
with:
ref: "refs/pull/${{ github.event.number }}/merge"
- name: Run test
env:
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
example: ${{ matrix.example }}
hardware: ${{ matrix.hardware }}
IMAGE_TAG: ${{ needs.mega-image-build.outputs.image_tag }}
IMAGE_REPO_GAUDI: ${{ vars.IMAGE_REPO_GAUDI }}
IMAGE_REPO_XEON: ${{ vars.IMAGE_REPO_XEON }}
run: |
cd ${{ github.workspace }}/$example/tests
if [ "$hardware" == "gaudi" ]; then IMAGE_REPO=$IMAGE_REPO_GAUDI; else IMAGE_REPO=$IMAGE_REPO_XEON; fi
export IMAGE_REPO=${IMAGE_REPO}
example_l=$(echo $example | tr '[:upper:]' '[:lower:]')
if [ -f test_${example_l}_on_${hardware}.sh ]; then timeout 30m bash test_${example_l}_on_${hardware}.sh; else echo "Test script not found, skip test!"; fi
- name: Clean up container
env:
example: ${{ matrix.example }}
hardware: ${{ matrix.hardware }}
if: cancelled() || failure()
run: |
cd ${{ github.workspace }}/$example/docker/$hardware
docker compose stop && docker compose rm -f
echo y | docker system prune
- name: Publish pipeline artifact
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.example }}-${{ matrix.hardware }}
path: ${{ github.workspace }}/${{ matrix.example }}/tests/*.log

View File

@@ -1,10 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# this file should be run in the root of the repo
services:
audioqna:
build:
context: docker
dockerfile: ./Dockerfile
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}

View File

@@ -1,20 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# this file should be run in the root of the repo
services:
chatqna:
build:
context: docker
dockerfile: ./Dockerfile
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
chatqna-ui:
build:
context: docker/ui
dockerfile: ./docker/Dockerfile
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
chatqna-conversation-ui:
build:
context: docker/ui
dockerfile: ./docker/Dockerfile.react
image: ${REGISTRY:-opea}/chatqna-conversation-ui:${TAG:-latest}

View File

@@ -1,20 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# this file should be run in the root of the repo
services:
codegen:
build:
context: docker
dockerfile: ./Dockerfile
image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
codegen-ui:
build:
context: docker/ui
dockerfile: ./docker/Dockerfile
image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest}
codegen-react-ui:
build:
context: docker/ui
dockerfile: ./docker/Dockerfile.react
image: ${REGISTRY:-opea}/codegen-conversation-ui:${TAG:-latest}

View File

@@ -1,15 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# this file should be run in the root of the repo
services:
codetrans:
build:
context: docker
dockerfile: ./Dockerfile
image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}
codetrans-ui:
build:
context: docker/ui
dockerfile: ./docker/Dockerfile
image: ${REGISTRY:-opea}/codetrans-ui:${TAG:-latest}

View File

@@ -1,20 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# this file should be run in the root of the repo
services:
docsum:
build:
context: docker
dockerfile: ./Dockerfile
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
docsum-ui:
build:
context: docker/ui
dockerfile: ./docker/Dockerfile
image: ${REGISTRY:-opea}/docsum-ui:${TAG:-latest}
docsum-react-ui:
build:
context: docker/ui
dockerfile: ./docker/Dockerfile.react
image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest}

View File

@@ -1,20 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# this file should be run in the root of the repo
services:
faqgen:
build:
context: docker
dockerfile: ./Dockerfile
image: ${REGISTRY:-opea}/faqgen:${TAG:-latest}
faqgen-ui:
build:
context: docker/ui
dockerfile: ./docker/Dockerfile
image: ${REGISTRY:-opea}/faqgen-ui:${TAG:-latest}
faqgen-react-ui:
build:
context: docker/ui
dockerfile: ./docker/Dockerfile.react
image: ${REGISTRY:-opea}/faqgen-react-ui:${TAG:-latest}

View File

@@ -1,15 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# this file should be run in the root of the repo
services:
searchqna:
build:
context: docker
dockerfile: ./Dockerfile
image: ${REGISTRY:-opea}/searchqna:${TAG:-latest}
searchqna-ui:
build:
context: docker/ui
dockerfile: ./docker/Dockerfile
image: ${REGISTRY:-opea}/searchqna-ui:${TAG:-latest}

View File

@@ -1,15 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# this file should be run in the root of the repo
services:
translation:
build:
context: docker
dockerfile: ./Dockerfile
image: ${REGISTRY:-opea}/translation:${TAG:-latest}
translation-ui:
build:
context: docker/ui
dockerfile: ./docker/Dockerfile
image: ${REGISTRY:-opea}/translation-ui:${TAG:-latest}

View File

@@ -1,51 +1,57 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# This workflow will only test GMC pipeline and will not install GMC any more
name: Single GMC E2e Test For CD Workflow Call
name: E2E test with GMC
on:
workflow_call:
inputs:
example:
default: "ChatQnA"
description: "The example to test on K8s"
required: true
type: string
hardware:
default: "xeon"
description: "Nodes to run the test, xeon or gaudi"
required: true
type: string
pull_request_target:
branches: [main]
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- "**/kubernetes/**"
- "**/tests/test_gmc**"
- "!**.md"
- "!**.txt"
- "!**/kubernetes/manifests/**"
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
job1:
uses: ./.github/workflows/reuse-get-test-matrix.yml
with:
diff_excluded_files: '.github|deprecated|docker|assets|*.md|*.txt'
xeon_server_label: 'xeon'
gaudi_server_label: 'gaudi'
gmc-test:
runs-on: "k8s-${{ inputs.hardware }}"
needs: [job1]
strategy:
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
runs-on: "k8s-${{ matrix.hardware }}"
continue-on-error: true
steps:
- name: E2e test gmc
run: |
echo "Matrix - gmc: ${{ matrix.example }}"
- name: Clean Up Working Directory
run: sudo rm -rf ${{github.workspace}}/*
- name: Get checkout ref
run: |
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
else
echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
fi
echo "checkout ref ${{ env.CHECKOUT_REF }}"
- name: Checkout out Repo
uses: actions/checkout@v4
with:
ref: ${{ env.CHECKOUT_REF }}
fetch-depth: 0
ref: "refs/pull/${{ github.event.number }}/merge"
- name: Set variables
run: |
if [ ${{ matrix.hardware }} == "gaudi" ]; then IMAGE_REPO=${{ vars.IMAGE_REPO_GAUDI }}; else IMAGE_REPO=${{ vars.IMAGE_REPO_XEON }}; fi
echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
lower_example=$(echo "${{ inputs.example }}" | tr '[:upper:]' '[:lower:]')
echo "APP_NAMESPACE=$lower_example-$(tr -dc a-z0-9 </dev/urandom | head -c 16)" >> $GITHUB_ENV
lower_example=$(echo "${{ matrix.example }}" | tr '[:upper:]' '[:lower:]')
echo "APP_NAMESPACE=$lower_example-$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
echo "ROLLOUT_TIMEOUT_SECONDS=1800s" >> $GITHUB_ENV
echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
echo "continue_test=true" >> $GITHUB_ENV
@@ -59,16 +65,16 @@ jobs:
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
run: |
if [[ ! -f ${{ github.workspace }}/${{ inputs.example }}/tests/test_gmc_on_${{ inputs.hardware }}.sh ]]; then
if [[ ! -f ${{ github.workspace }}/${{ matrix.example }}/tests/test_gmc_on_${{ matrix.hardware }}.sh ]]; then
echo "No test script found, exist test!"
exit 0
else
echo "should_cleanup=true" >> $GITHUB_ENV
${{ github.workspace }}/${{ inputs.example }}/tests/test_gmc_on_${{ inputs.hardware }}.sh install_${{ inputs.example }}
echo "Testing ${{ inputs.example }}, waiting for pod ready..."
${{ github.workspace }}/${{ matrix.example }}/tests/test_gmc_on_${{ matrix.hardware }}.sh install_${{ matrix.example }}
echo "Testing ${{ matrix.example }}, waiting for pod ready..."
if kubectl rollout status deployment --namespace "$APP_NAMESPACE" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
echo "Testing gmc ${{ inputs.example }}, running validation test..."
${{ github.workspace }}/${{ inputs.example }}/tests/test_gmc_on_${{ inputs.hardware }}.sh validate_${{ inputs.example }}
echo "Testing gmc ${{ matrix.example }}, running validation test..."
${{ github.workspace }}/${{ matrix.example }}/tests/test_gmc_on_${{ matrix.hardware }}.sh validate_${{ matrix.example }}
else
echo "Timeout waiting for pods in namespace $APP_NAMESPACE to be ready!"
exit 1

View File

@@ -0,0 +1,33 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# Test
name: Build latest images on push event
on:
push:
branches: [ 'main' ]
paths:
- "**/docker/*.py"
- "**/docker/Dockerfile"
- "**/docker/ui/**"
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-on-push
cancel-in-progress: true
jobs:
job1:
uses: ./.github/workflows/reuse-get-test-matrix.yml
mega-image-build:
needs: job1
strategy:
matrix:
workload: ${{ fromJSON(needs.job1.outputs.run_matrix).include.*.example }}
hardware: ["gaudi","xeon"]
uses: ./.github/workflows/reuse-image-build.yml
with:
image_tag: latest
mega_service: "${{ matrix.workload }}"
runner_label: docker-build-${{ matrix.hardware }}

111
.github/workflows/manifest-e2e.yml vendored Normal file
View File

@@ -0,0 +1,111 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: E2E test with manifests
on:
pull_request:
branches: [main]
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- "**/kubernetes/manifests/**"
- "**/tests/test_manifest**"
- "!**.md"
- "!**.txt"
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
job1:
uses: ./.github/workflows/reuse-get-test-matrix.yml
with:
diff_excluded_files: '.github|deprecated|docker|assets|*.md|*.txt'
xeon_server_label: 'xeon'
gaudi_server_label: 'gaudi'
mega-image-build:
needs: job1
strategy:
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
uses: ./.github/workflows/reuse-image-build.yml
with:
image_tag: ${{ github.event.pull_request.head.sha }}
mega_service: "${{ matrix.example }}"
runner_label: "docker-build-${{ matrix.hardware }}"
manifest-test:
needs: [job1, mega-image-build]
strategy:
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
runs-on: "k8s-${{ matrix.hardware }}"
continue-on-error: true
steps:
- name: E2e test manifest
run: |
echo "Matrix - manifest: ${{ matrix.example }}"
- name: Clean Up Working Directory
run: sudo rm -rf ${{github.workspace}}/*
- name: Checkout out Repo
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set variables
run: |
if [ ${{ matrix.hardware }} == "gaudi" ]; then IMAGE_REPO=${{ vars.IMAGE_REPO_GAUDI }}; else IMAGE_REPO=${{ vars.IMAGE_REPO_XEON }}; fi
echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
echo "IMAGE_TAG=${{needs.mega-image-build.outputs.image_tag}}" >> $GITHUB_ENV
lower_example=$(echo "${{ matrix.example }}" | tr '[:upper:]' '[:lower:]')
echo "NAMESPACE=$lower_example-$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
echo "ROLLOUT_TIMEOUT_SECONDS=1800s" >> $GITHUB_ENV
echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
echo "continue_test=true" >> $GITHUB_ENV
echo "should_cleanup=false" >> $GITHUB_ENV
echo "skip_validate=true" >> $GITHUB_ENV
echo "NAMESPACE=$NAMESPACE"
- name: Kubectl install
id: install
run: |
if [[ ! -f ${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh ]]; then
echo "No test script found, exist test!"
exit 0
else
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh init_${{ matrix.example }}
echo "should_cleanup=true" >> $GITHUB_ENV
kubectl create ns $NAMESPACE
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh install_${{ matrix.example }} $NAMESPACE
echo "Testing ${{ matrix.example }}, waiting for pod ready..."
if kubectl rollout status deployment --namespace "$NAMESPACE" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
echo "Testing manifests ${{ matrix.example }}, waiting for pod ready done!"
echo "skip_validate=false" >> $GITHUB_ENV
else
echo "Timeout waiting for pods in namespace $NAMESPACE to be ready!"
exit 1
fi
sleep 60
fi
- name: Validate e2e test
if: always()
run: |
if $skip_validate; then
echo "Skip validate"
else
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh validate_${{ matrix.example }} $NAMESPACE
fi
- name: Kubectl uninstall
if: always()
run: |
if $should_cleanup; then
if ! kubectl delete ns $NAMESPACE --timeout=$KUBECTL_TIMEOUT_SECONDS; then
kubectl delete pods --namespace $NAMESPACE --force --grace-period=0 --all
kubectl delete ns $NAMESPACE --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
fi
fi

View File

@@ -1,86 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Examples docker images BoM scan on manual event
on:
workflow_dispatch:
inputs:
node:
default: "gaudi"
description: "Hardware to run test"
required: true
type: string
examples:
default: "ChatQnA"
description: 'List of examples to test [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
required: true
type: string
tag:
default: "latest"
description: "Tag to apply to images"
required: true
type: string
permissions: read-all
jobs:
get-image-list:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.scan-matrix.outputs.matrix }}
steps:
- name: Checkout out Repo
uses: actions/checkout@v4
- name: Set Matrix
id: scan-matrix
run: |
pip install yq
examples=($(echo ${{ inputs.examples }} | tr ',' ' '))
image_list=[]
for example in ${examples[@]}
do
images=$(cat ${{ github.workspace }}/${example}/docker/docker_build_compose.yaml | yq -r '.[]' | jq 'keys' | jq -c '.')
image_list=$(echo ${image_list} | jq -s '.[0] + .[1] | unique' - <(echo ${images}))
done
echo "matrix=$(echo ${image_list} | jq -c '.')" >> $GITHUB_OUTPUT
scan-license:
needs: get-image-list
runs-on: "docker-build-${{ inputs.node }}"
strategy:
matrix:
image: ${{ fromJson(needs.get-image-list.outputs.matrix) }}
fail-fast: false
steps:
- name: Pull Image
run: |
docker pull ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}
echo "OPEA_IMAGE_REPO=${OPEA_IMAGE_REPO}" >> $GITHUB_ENV
- name: SBOM Scan Container
uses: anchore/sbom-action@v0.17.1
with:
image: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }}
output-file: ${{ matrix.image }}-sbom-scan.txt
format: 'spdx-json'
- name: Security Scan Container
uses: aquasecurity/trivy-action@0.24.0
with:
image-ref: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }}
output: ${{ matrix.image }}-trivy-scan.txt
format: 'table'
exit-code: '1'
ignore-unfixed: true
vuln-type: 'os,library'
severity: 'CRITICAL,HIGH'
- name: Cleanup
if: always()
run: docker rmi -f ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}
- uses: actions/upload-artifact@v4.3.4
with:
name: ${{ matrix.image }}-scan
path: ${{ matrix.image }}-*-scan.txt
overwrite: true

View File

@@ -1,68 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Examples publish docker image on manual event
on:
workflow_dispatch:
inputs:
nodes:
default: "gaudi"
description: "Hardware to run test"
required: true
type: string
examples:
default: "ChatQnA"
description: 'List of examples to test [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
required: true
type: string
tag:
default: "latest"
description: "Tag to apply to images"
required: true
type: string
publish:
default: false
description: 'Publish images to docker hub'
required: false
type: boolean
publish_tags:
default: "latest,v1.0"
description: 'Tag list apply to publish images'
required: false
type: string
permissions: read-all
jobs:
get-image-list:
runs-on: ${{ inputs.node }}
outputs:
matrix: ${{ steps.scan-matrix.outputs.matrix }}
steps:
- name: Checkout out Repo
uses: actions/checkout@v4
- name: Set Matrix
id: scan-matrix
run: |
examples=($(echo ${{ inputs.examples }} | tr ',' ' '))
image_list=[]
for example in ${examples[@]}
do
images=$(cat ${{ github.workspace }}/${example}/docker/docker_build_compose.yaml | yq -r '.[]' | jq 'keys' | jq -c '.')
image_list=$(echo ${image_list} | jq -s '.[0] + .[1] | unique' - <(echo ${images}))
done
echo "matrix=$(echo ${image_list} | jq -c '.')" >> $GITHUB_OUTPUT
publish:
needs: [get-image-list]
strategy:
matrix:
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
runs-on: "docker-build-${{ inputs.node }}"
steps:
- name: Image Publish
uses: opea-project/validation/actions/image-publish@main
with:
local_image_ref: ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }}
image_name: opea/${{ matrix.image }}
publish_tags: ${{ inputs.publish_tags }}

View File

@@ -1,110 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Examples CD workflow on manual event
on:
workflow_dispatch:
inputs:
nodes:
default: "gaudi,xeon"
description: "Hardware to run test"
required: true
type: string
examples:
default: "ChatQnA"
description: 'List of examples to test [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
required: true
type: string
tag:
default: "latest"
description: "Tag to apply to images"
required: true
type: string
deploy_gmc:
default: false
description: 'Whether to deploy gmc'
required: true
type: boolean
build:
default: true
description: 'Build test required images for Examples'
required: false
type: boolean
scan:
default: true
description: 'Scan all images with Trivy'
required: false
type: boolean
test_compose:
default: true
description: 'Test examples with docker compose'
required: false
type: boolean
test_k8s:
default: false
description: 'Test examples with k8s'
required: false
type: boolean
test_gmc:
default: false
description: 'Test examples with gmc'
required: false
type: boolean
opea_branch:
default: "main"
description: 'OPEA branch for image build'
required: false
type: string
permissions: read-all
jobs:
get-test-matrix:
runs-on: ubuntu-latest
outputs:
examples: ${{ steps.get-matrix.outputs.examples }}
nodes: ${{ steps.get-matrix.outputs.nodes }}
steps:
- name: Create Matrix
id: get-matrix
run: |
examples=($(echo ${{ inputs.examples }} | tr ',' ' '))
examples_json=$(printf '%s\n' "${examples[@]}" | sort -u | jq -R '.' | jq -sc '.')
echo "examples=$examples_json" >> $GITHUB_OUTPUT
nodes=($(echo ${{ inputs.nodes }} | tr ',' ' '))
nodes_json=$(printf '%s\n' "${nodes[@]}" | sort -u | jq -R '.' | jq -sc '.')
echo "nodes=$nodes_json" >> $GITHUB_OUTPUT
build-deploy-gmc:
needs: [get-test-matrix]
if: ${{ fromJSON(inputs.deploy_gmc) }}
strategy:
matrix:
node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
fail-fast: false
uses: ./.github/workflows/_gmc-workflow.yml
with:
node: ${{ matrix.node }}
tag: ${{ inputs.tag }}
opea_branch: ${{ inputs.opea_branch }}
secrets: inherit
run-examples:
needs: [get-test-matrix, build-deploy-gmc]
if: always()
strategy:
matrix:
example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }}
node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
fail-fast: false
uses: ./.github/workflows/_example-workflow.yml
with:
node: ${{ matrix.node }}
example: ${{ matrix.example }}
tag: ${{ inputs.tag }}
build: ${{ fromJSON(inputs.build) }}
scan: ${{ fromJSON(inputs.scan) }}
test_compose: ${{ fromJSON(inputs.test_compose) }}
test_k8s: ${{ fromJSON(inputs.test_k8s) }}
test_gmc: ${{ fromJSON(inputs.test_gmc) }}
opea_branch: ${{ inputs.opea_branch }}
secrets: inherit

View File

@@ -1,43 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Freeze base images and 3rd party images on manual event
on:
workflow_dispatch:
jobs:
freeze-images:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.ref }}
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: install skopeo
run: |
sudo apt update
sudo apt -y install skopeo
- name: Set up Git
run: |
git config --global user.name "NeuralChatBot"
git config --global user.email "grp_neural_chat_bot@intel.com"
git remote set-url origin https://NeuralChatBot:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
- name: Run script
run: |
bash .github/workflows/scripts/freeze_images.sh
- name: Commit changes
run: |
git add .
git commit -s -m "Freeze third party images tag"
git push

View File

@@ -1,46 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Freeze OPEA images release tag in readme on manual event
on:
workflow_dispatch:
inputs:
tag:
default: "latest"
description: "Tag to apply to images"
required: true
type: string
jobs:
freeze-tag:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.ref }}
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Set up Git
run: |
git config --global user.name "NeuralChatBot"
git config --global user.email "grp_neural_chat_bot@intel.com"
git remote set-url origin https://NeuralChatBot:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
- name: Run script
run: |
find . -name "*.md" | xargs sed -i "s|^docker\ compose|TAG=${{ github.event.inputs.tag }}\ docker\ compose|g"
find . -type f -name "*.yaml" \( -path "*/benchmark/*" -o -path "*/kubernetes/*" \) | xargs sed -i -E 's/(opea\/[A-Za-z0-9\-]*:)latest/\1${{ github.event.inputs.tag }}/g'
find . -type f -name "*.md" \( -path "*/benchmark/*" -o -path "*/kubernetes/*" \) | xargs sed -i -E 's/(opea\/[A-Za-z0-9\-]*:)latest/\1${{ github.event.inputs.tag }}/g'
- name: Commit changes
run: |
git add .
git commit -s -m "Freeze OPEA images tag"
git push

View File

@@ -1,78 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Build latest images on manual event
on:
workflow_dispatch:
inputs:
registry:
default: ""
description: "Registry to store images,e.g., docker.io, default is empty"
required: false
type: string
services:
default: "AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation"
description: "List of examples to build"
required: true
type: string
tag:
default: "latest"
description: "Tag to apply to images"
required: true
type: string
nodes:
default: "docker-build-xeon,docker-build-gaudi"
description: "List of node to run the build on"
required: true
type: string
jobs:
get-build-matrix:
runs-on: ubuntu-latest
outputs:
services: ${{ steps.get-services.outputs.services }}
nodes: ${{ steps.get-services.outputs.nodes }}
steps:
- name: Get test Services
id: get-services
run: |
set -x
service_list=($(echo ${{ github.event.inputs.services }} | tr ',' ' '))
services=$(printf '%s\n' "${service_list[@]}" | sort -u | jq -R '.' | jq -sc '.')
echo "services=$services" >> $GITHUB_OUTPUT
node_list=($(echo ${{ github.event.inputs.nodes }} | tr ',' ' '))
nodes=$(printf '%s\n' "${node_list[@]}" | sort -u | jq -R '.' | jq -sc '.')
echo "nodes=$nodes" >> $GITHUB_OUTPUT
image-build:
needs: get-build-matrix
strategy:
matrix:
service: ${{ fromJSON(needs.get-build-matrix.outputs.services) }}
node: ${{ fromJSON(needs.get-build-matrix.outputs.nodes) }}
runs-on: ${{ matrix.node }}
continue-on-error: true
steps:
- name: Clean Up Working Directory
run: |
sudo rm -rf ${{github.workspace}}/*
- name: Checkout out Repo
uses: actions/checkout@v4
- name: Config image repo
run: |
if [[ -z "${{ github.event.inputs.registry }}" ]]; then
echo "image_repo=${OPEA_IMAGE_REPO}" >> $GITHUB_ENV
else
echo "image_repo=${{ github.event.inputs.registry }}/" >> $GITHUB_ENV
fi
- name: Build image
uses: opea-project/validation/actions/image-build@main
with:
work_dir: ${{ github.workspace }}/${{ matrix.service }}
docker_compose_path: ${{ github.workspace }}/.github/workflows/docker/compose/${{ matrix.service }}-compose.yaml
registry: ${{ env.image_repo }}opea
tag: ${{ github.event.inputs.tag }}

View File

@@ -1,40 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: E2E test with docker compose
on:
pull_request_target:
branches: [main]
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- "**/docker/**"
- "**/tests/**"
- "**/ui/**"
- "!**.md"
- "!**.txt"
- .github/workflows/pr-docker-compose-e2e.yml
# If there is a new commit, the previous jobs will be canceled
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
get-test-matrix:
uses: ./.github/workflows/_get-test-matrix.yml
with:
diff_excluded_files: '.github|README.md|*.txt|deprecate|kubernetes|manifest|gmc|assets'
example-test:
needs: [get-test-matrix]
strategy:
matrix: ${{ fromJSON(needs.get-test-matrix.outputs.run_matrix) }}
fail-fast: false
uses: ./.github/workflows/_run-docker-compose.yml
with:
registry: "opea"
tag: "ci"
example: ${{ matrix.example }}
hardware: ${{ matrix.hardware }}
secrets: inherit

View File

@@ -1,38 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: E2E test with GMC
on:
pull_request_target:
branches: [main]
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- "**/kubernetes/**"
- "**/tests/test_gmc**"
- "!**.md"
- "!**.txt"
- "!**/kubernetes/manifests/**"
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
job1:
uses: ./.github/workflows/_get-test-matrix.yml
with:
diff_excluded_files: '.github|deprecated|docker|assets|*.md|*.txt'
xeon_server_label: 'xeon'
gaudi_server_label: 'gaudi'
gmc-test:
needs: [job1]
strategy:
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
uses: ./.github/workflows/_gmc-e2e.yml
with:
example: ${{ matrix.example }}
hardware: ${{ matrix.hardware }}
secrets: inherit

View File

@@ -1,48 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: E2E test with manifests
on:
pull_request:
branches: [main]
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- "**/kubernetes/manifests/**"
- "**/tests/test_manifest**"
- "!**.md"
- "!**.txt"
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
job1:
uses: ./.github/workflows/_get-test-matrix.yml
with:
diff_excluded_files: '.github|deprecated|docker|assets|*.md|*.txt'
xeon_server_label: 'xeon'
gaudi_server_label: 'gaudi'
mega-image-build:
needs: job1
strategy:
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
uses: ./.github/workflows/_image-build.yml
with:
image_tag: ${{ github.event.pull_request.head.sha }}
mega_service: "${{ matrix.example }}"
runner_label: "docker-build-${{ matrix.hardware }}"
manifest-test:
needs: [job1, mega-image-build]
strategy:
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
uses: ./.github/workflows/_manifest-e2e.yml
with:
example: ${{ matrix.example }}
hardware: ${{ matrix.hardware }}
tag: ${{ needs.mega-image-build.outputs.image_tag }}
secrets: inherit

View File

@@ -1,57 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# Test
name: Build latest images on push event
on:
push:
branches: [ 'main' ]
paths:
- "**/docker/*.py"
- "**/docker/Dockerfile"
- "**/docker/ui/**"
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-on-push
cancel-in-progress: true
jobs:
job1:
uses: ./.github/workflows/_get-test-matrix.yml
mega-image-build:
needs: job1
strategy:
matrix:
workload: ${{ fromJSON(needs.job1.outputs.run_matrix).include.*.example }}
hardware: ["gaudi","xeon"]
runs-on: docker-build-${{ matrix.hardware }}
steps:
- name: Clean up Working Directory
run: |
sudo rm -rf ${{github.workspace}}/*
- name: Checkout out Repo
uses: actions/checkout@v4
- name: Check Docker Compose File Exists
env:
service: ${{ matrix.workload }}
run: |
docker_compose_path="${{ github.workspace }}/.github/workflows/docker/compose/${service}-compose.yaml"
if [ -e $docker_compose_path ]; then
echo "file_exists=true" >> $GITHUB_ENV
echo "docker_compose_path=${docker_compose_path}" >> $GITHUB_ENV
else
echo "file_exists=false" >> $GITHUB_ENV
echo "docker_compose_path=${docker_compose_path} for this service does not exist, so skipping image build for this service!!!"
fi
- name: Build Image
if: env.file_exists == 'true'
uses: opea-project/validation/actions/image-build@main
with:
work_dir: ${{ github.workspace }}/${{ matrix.workload }}
docker_compose_path: ${{ env.docker_compose_path }}
registry: ${OPEA_IMAGE_REPO}opea

View File

@@ -51,10 +51,7 @@ jobs:
run: |
set -xe
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
LATEST_COMMIT_SHA=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
"https://api.github.com/repos/opea-project/GenAIExamples/commits?sha=main" | jq -r '.[0].sha')
echo "Latest commit SHA is $LATEST_COMMIT_SHA"
base_commit=$LATEST_COMMIT_SHA
base_commit=${{ github.event.pull_request.base.sha }}
else
base_commit=$(git rev-parse HEAD~1) # push event
fi

View File

@@ -34,10 +34,6 @@ jobs:
image_repo: ${{ steps.build-megaservice-image.outputs.image_repo }}
image_tag: ${{ steps.build-megaservice-image.outputs.image_tag }}
steps:
- name: Clean up Working Directory
run: |
sudo rm -rf ${{github.workspace}}/* || true
- name: Get checkout ref
run: |
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
@@ -66,4 +62,3 @@ jobs:
fi
echo "IMAGE_TAG=${IMAGE_TAG}"
echo "image_tag=$IMAGE_TAG" >> $GITHUB_OUTPUT
echo "image_repo=${IMAGE_REPO}" >> $GITHUB_OUTPUT

View File

@@ -1,50 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
declare -A dict
dict["langchain/langchain"]="docker://docker.io/langchain/langchain"
dict["ghcr.io/huggingface/text-generation-inference"]="docker://ghcr.io/huggingface/text-generation-inference"
function get_latest_version() {
repo_image=$1
versions=$(skopeo list-tags ${dict[$repo_image]} | jq -r '.Tags[]')
printf "version list:\n$versions\n"
latest_version=$(printf "%s\n" "${versions[@]}" | grep -E '^[\.0-9\-]+$' | sort -V | tail -n 1)
echo "latest version: $latest_version"
replace_image_version $repo_image $latest_version
}
function replace_image_version() {
repo_image=$1
version=$2
if [[ -z "$version" ]]; then
echo "version is empty"
else
echo "replace $repo_image:latest with $repo_image:$version"
find . -name "Dockerfile" | xargs sed -i "s|$repo_image:latest.*|$repo_image:$version|g"
find . -name "*.yaml" | xargs sed -i "s|$repo_image:latest[A-Za-z0-9\-]*|$repo_image:$version|g"
find . -name "*.md" | xargs sed -i "s|$repo_image:latest[A-Za-z0-9\-]*|$repo_image:$version|g"
fi
}
function check_branch_name() {
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
echo "$GITHUB_REF_NAME is protected branch"
exit 0
else
echo "branch name is $GITHUB_REF_NAME"
fi
}
function main() {
check_branch_name
for repo_image in "${!dict[@]}"; do
echo "::group::check $repo_image"
get_latest_version $repo_image
echo "::endgroup::"
done
}
main

View File

@@ -1,106 +0,0 @@
# Agents for Question Answering
## Overview
This example showcases a hierarchical multi-agent system for question-answering applications. The architecture diagram is shown below. The supervisor agent interfaces with the user and dispatch tasks to the worker agent and other tools to gather information and come up with answers. The worker agent uses the retrieval tool to generate answers to the queries posted by the supervisor agent. Other tools used by the supervisor agent may include APIs to interface knowledge graphs, SQL databases, external knowledge bases, etc.
![Architecture Overview](assets/agent_qna_arch.png)
### Why Agent for question answering?
1. Improve relevancy of retrieved context.
Agent can rephrase user queries, decompose user queries, and iterate to get the most relevant context for answering user's questions. Compared to conventional RAG, RAG agent can significantly improve the correctness and relevancy of the answer.
2. Use tools to get additional knowledge.
For example, knowledge graphs and SQL databases can be exposed as APIs for Agents to gather knowledge that may be missing in the retrieval vector database.
3. Hierarchical agent can further improve performance.
Expert worker agents, such as retrieval agent, knowledge graph agent, SQL agent, etc., can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information together to provide a comprehensive answer.
### Roadmap
- v0.9: Worker agent uses open-source websearch tool (duckduckgo), agents use OpenAI GPT-4o-mini as llm backend.
- v1.0: Worker agent uses OPEA retrieval megaservice as tool.
- v1.0 or later: agents use open-source llm backend.
- v1.1 or later: add safeguards
## Getting started
1. Build agent docker image </br>
First, clone the opea GenAIComps repo
```
export WORKDIR=<your-work-directory>
cd $WORKDIR
git clone https://github.com/opea-project/GenAIComps.git
```
Then build the agent docker image. Both the supervisor agent and the worker agent will use the same docker image, but when we launch the two agents we will specify different strategies and register different tools.
```
cd GenAIComps
docker build -t opea/comps-agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/docker/Dockerfile .
```
2. Launch tool services </br>
In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
```
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
```
3. Set up environment for this example </br>
First, clone this repo
```
cd $WORKDIR
git clone https://github.com/opea-project/GenAIExamples.git
```
Second, set up env vars
```
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
# optional: OPANAI_API_KEY
export OPENAI_API_KEY=<your-openai-key>
```
4. Launch agent services</br>
The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and we plan to add support for llama3.1-70B-instruct (served by TGI-Gaudi) in a subsequent release.
To use openai llm, run command below.
```
cd docker/openai/
bash launch_agent_service_openai.sh
```
## Validate services
First look at logs of the agent docker containers:
```
docker logs docgrader-agent-endpoint
```
```
docker logs react-agent-endpoint
```
You should see something like "HTTP server setup successful" if the docker containers are started successfully.</p>
Second, validate worker agent:
```
curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
"query": "Most recent album by Taylor Swift"
}'
```
Third, validate supervisor agent:
```
curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
"query": "Most recent album by Taylor Swift"
}'
```
## How to register your own tools with agent
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/minmin-intel/GenAIComps/tree/agent-comp-dev/comps/agent/langchain#-4-provide-your-own-tools).

Binary file not shown.

Before

Width:  |  Height:  |  Size: 69 KiB

View File

@@ -1,63 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
worker-docgrader-agent:
image: opea/comps-agent-langchain:latest
container_name: docgrader-agent-endpoint
volumes:
- ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
- ${TOOLSET_PATH}:/home/user/tools/
ports:
- "9095:9095"
ipc: host
environment:
ip_address: ${ip_address}
strategy: rag_agent
recursion_limit: ${recursion_limit}
llm_engine: openai
OPENAI_API_KEY: ${OPENAI_API_KEY}
model: ${model}
temperature: ${temperature}
max_new_tokens: ${max_new_tokens}
streaming: false
tools: /home/user/tools/worker_agent_tools.yaml
require_human_feedback: false
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-worker-agent-service"
port: 9095
supervisor-react-agent:
image: opea/comps-agent-langchain:latest
container_name: react-agent-endpoint
volumes:
- ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
- ${TOOLSET_PATH}:/home/user/tools/
ports:
- "9090:9090"
ipc: host
environment:
ip_address: ${ip_address}
strategy: react_langgraph
recursion_limit: ${recursion_limit}
llm_engine: openai
OPENAI_API_KEY: ${OPENAI_API_KEY}
model: ${model}
temperature: ${temperature}
max_new_tokens: ${max_new_tokens}
streaming: ${streaming}
tools: /home/user/tools/supervisor_agent_tools.yaml
require_human_feedback: false
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-supervisor-agent-service"
CRAG_SERVER: $CRAG_SERVER
WORKER_AGENT_URL: $WORKER_AGENT_URL
port: 9090

View File

@@ -1,13 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
export ip_address=$(hostname -I | awk '{print $1}')
export recursion_limit=12
export model="gpt-4o-mini-2024-07-18"
export temperature=0
export max_new_tokens=512
export OPENAI_API_KEY=${OPENAI_API_KEY}
export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
export CRAG_SERVER=http://${ip_address}:8080
docker compose -f docker-compose-agent-openai.yaml up -d

View File

@@ -1,75 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -e
echo "IMAGE_REPO=${IMAGE_REPO}"
echo "OPENAI_API_KEY=${OPENAI_API_KEY}"
WORKPATH=$(dirname "$PWD")
export WORKDIR=$WORKPATH/../../
echo "WORKDIR=${WORKDIR}"
export ip_address=$(hostname -I | awk '{print $1}')
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
function build_agent_docker_image() {
cd $WORKDIR
if [ ! -d "GenAIComps" ] ; then
git clone https://github.com/opea-project/GenAIComps.git
fi
cd GenAIComps
echo PWD: $(pwd)
docker build -t opea/comps-agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/docker/Dockerfile .
}
function start_services() {
echo "Starting CRAG server"
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
echo "Starting Agent services"
cd $WORKDIR/GenAIExamples/AgentQnA/docker/openai
bash launch_agent_service_openai.sh
}
function validate() {
local CONTENT="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT"
echo 0
else
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
echo 1
fi
}
function run_tests() {
echo "----------------Test supervisor agent ----------------"
local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
"query": "Most recent album by Taylor Swift"
}')
local EXIT_CODE=$(validate "$CONTENT" "Taylor" "react-agent-endpoint")
docker logs react-agent-endpoint
if [ "$EXIT_CODE" == "1" ]; then
exit 1
fi
}
function stop_services() {
echo "Stopping CRAG server"
docker stop $(docker ps -q --filter ancestor=docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0)
echo "Stopping Agent services"
docker stop $(docker ps -q --filter ancestor=opea/comps-agent-langchain:latest)
}
function main() {
build_agent_docker_image
start_services
run_tests
stop_services
}
main

View File

@@ -1,330 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import json
import os
from typing import List
import requests
class CRAG(object):
"""A client for interacting with the CRAG server, offering methods to query various domains such as Open, Movie, Finance, Music, and Sports. Each method corresponds to an API endpoint on the CRAG server.
Attributes:
server (str): The base URL of the CRAG server. Defaults to "http://127.0.0.1:8080".
Methods:
open_search_entity_by_name(query: str) -> dict: Search for entities by name in the Open domain.
open_get_entity(entity: str) -> dict: Retrieve detailed information about an entity in the Open domain.
movie_get_person_info(person_name: str) -> dict: Get information about a person related to movies.
movie_get_movie_info(movie_name: str) -> dict: Get information about a movie.
movie_get_year_info(year: str) -> dict: Get information about movies released in a specific year.
movie_get_movie_info_by_id(movie_id: int) -> dict: Get movie information by its unique ID.
movie_get_person_info_by_id(person_id: int) -> dict: Get person information by their unique ID.
finance_get_company_name(query: str) -> dict: Search for company names in the finance domain.
finance_get_ticker_by_name(query: str) -> dict: Retrieve the ticker symbol for a given company name.
finance_get_price_history(ticker_name: str) -> dict: Get the price history for a given ticker symbol.
finance_get_detailed_price_history(ticker_name: str) -> dict: Get detailed price history for a ticker symbol.
finance_get_dividends_history(ticker_name: str) -> dict: Get dividend history for a ticker symbol.
finance_get_market_capitalization(ticker_name: str) -> dict: Retrieve market capitalization for a ticker symbol.
finance_get_eps(ticker_name: str) -> dict: Get earnings per share (EPS) for a ticker symbol.
finance_get_pe_ratio(ticker_name: str) -> dict: Get the price-to-earnings (PE) ratio for a ticker symbol.
finance_get_info(ticker_name: str) -> dict: Get financial information for a ticker symbol.
music_search_artist_entity_by_name(artist_name: str) -> dict: Search for music artists by name.
music_search_song_entity_by_name(song_name: str) -> dict: Search for songs by name.
music_get_billboard_rank_date(rank: int, date: str = None) -> dict: Get Billboard ranking for a specific rank and date.
music_get_billboard_attributes(date: str, attribute: str, song_name: str) -> dict: Get attributes of a song from Billboard rankings.
music_grammy_get_best_artist_by_year(year: int) -> dict: Get the Grammy Best New Artist for a specific year.
music_grammy_get_award_count_by_artist(artist_name: str) -> dict: Get the total Grammy awards won by an artist.
music_grammy_get_award_count_by_song(song_name: str) -> dict: Get the total Grammy awards won by a song.
music_grammy_get_best_song_by_year(year: int) -> dict: Get the Grammy Song of the Year for a specific year.
music_grammy_get_award_date_by_artist(artist_name: str) -> dict: Get the years an artist won a Grammy award.
music_grammy_get_best_album_by_year(year: int) -> dict: Get the Grammy Album of the Year for a specific year.
music_grammy_get_all_awarded_artists() -> dict: Get all artists awarded the Grammy Best New Artist.
music_get_artist_birth_place(artist_name: str) -> dict: Get the birthplace of an artist.
music_get_artist_birth_date(artist_name: str) -> dict: Get the birth date of an artist.
music_get_members(band_name: str) -> dict: Get the member list of a band.
music_get_lifespan(artist_name: str) -> dict: Get the lifespan of an artist.
music_get_song_author(song_name: str) -> dict: Get the author of a song.
music_get_song_release_country(song_name: str) -> dict: Get the release country of a song.
music_get_song_release_date(song_name: str) -> dict: Get the release date of a song.
music_get_artist_all_works(artist_name: str) -> dict: Get all works by an artist.
sports_soccer_get_games_on_date(team_name: str, date: str) -> dict: Get soccer games on a specific date.
sports_nba_get_games_on_date(team_name: str, date: str) -> dict: Get NBA games on a specific date.
sports_nba_get_play_by_play_data_by_game_ids(game_ids: List[str]) -> dict: Get NBA play by play data for a set of game ids.
Note:
Each method performs a POST request to the corresponding API endpoint and returns the response as a JSON dictionary.
"""
def __init__(self):
self.server = os.environ.get("CRAG_SERVER", "http://127.0.0.1:8080")
def open_search_entity_by_name(self, query: str):
url = self.server + "/open/search_entity_by_name"
headers = {"accept": "application/json"}
data = {"query": query}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def open_get_entity(self, entity: str):
url = self.server + "/open/get_entity"
headers = {"accept": "application/json"}
data = {"query": entity}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def movie_get_person_info(self, person_name: str):
url = self.server + "/movie/get_person_info"
headers = {"accept": "application/json"}
data = {"query": person_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def movie_get_movie_info(self, movie_name: str):
url = self.server + "/movie/get_movie_info"
headers = {"accept": "application/json"}
data = {"query": movie_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def movie_get_year_info(self, year: str):
url = self.server + "/movie/get_year_info"
headers = {"accept": "application/json"}
data = {"query": year}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def movie_get_movie_info_by_id(self, movid_id: int):
url = self.server + "/movie/get_movie_info_by_id"
headers = {"accept": "application/json"}
data = {"query": movid_id}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def movie_get_person_info_by_id(self, person_id: int):
url = self.server + "/movie/get_person_info_by_id"
headers = {"accept": "application/json"}
data = {"query": person_id}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def finance_get_company_name(self, query: str):
url = self.server + "/finance/get_company_name"
headers = {"accept": "application/json"}
data = {"query": query}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def finance_get_ticker_by_name(self, query: str):
url = self.server + "/finance/get_ticker_by_name"
headers = {"accept": "application/json"}
data = {"query": query}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def finance_get_price_history(self, ticker_name: str):
url = self.server + "/finance/get_price_history"
headers = {"accept": "application/json"}
data = {"query": ticker_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def finance_get_detailed_price_history(self, ticker_name: str):
url = self.server + "/finance/get_detailed_price_history"
headers = {"accept": "application/json"}
data = {"query": ticker_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def finance_get_dividends_history(self, ticker_name: str):
url = self.server + "/finance/get_dividends_history"
headers = {"accept": "application/json"}
data = {"query": ticker_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def finance_get_market_capitalization(self, ticker_name: str):
url = self.server + "/finance/get_market_capitalization"
headers = {"accept": "application/json"}
data = {"query": ticker_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def finance_get_eps(self, ticker_name: str):
url = self.server + "/finance/get_eps"
headers = {"accept": "application/json"}
data = {"query": ticker_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def finance_get_pe_ratio(self, ticker_name: str):
url = self.server + "/finance/get_pe_ratio"
headers = {"accept": "application/json"}
data = {"query": ticker_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def finance_get_info(self, ticker_name: str):
url = self.server + "/finance/get_info"
headers = {"accept": "application/json"}
data = {"query": ticker_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_search_artist_entity_by_name(self, artist_name: str):
url = self.server + "/music/search_artist_entity_by_name"
headers = {"accept": "application/json"}
data = {"query": artist_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_search_song_entity_by_name(self, song_name: str):
url = self.server + "/music/search_song_entity_by_name"
headers = {"accept": "application/json"}
data = {"query": song_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_get_billboard_rank_date(self, rank: int, date: str = None):
url = self.server + "/music/get_billboard_rank_date"
headers = {"accept": "application/json"}
data = {"rank": rank, "date": date}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_get_billboard_attributes(self, date: str, attribute: str, song_name: str):
url = self.server + "/music/get_billboard_attributes"
headers = {"accept": "application/json"}
data = {"date": date, "attribute": attribute, "song_name": song_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_grammy_get_best_artist_by_year(self, year: int):
url = self.server + "/music/grammy_get_best_artist_by_year"
headers = {"accept": "application/json"}
data = {"query": year}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_grammy_get_award_count_by_artist(self, artist_name: str):
url = self.server + "/music/grammy_get_award_count_by_artist"
headers = {"accept": "application/json"}
data = {"query": artist_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_grammy_get_award_count_by_song(self, song_name: str):
url = self.server + "/music/grammy_get_award_count_by_song"
headers = {"accept": "application/json"}
data = {"query": song_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_grammy_get_best_song_by_year(self, year: int):
url = self.server + "/music/grammy_get_best_song_by_year"
headers = {"accept": "application/json"}
data = {"query": year}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_grammy_get_award_date_by_artist(self, artist_name: str):
url = self.server + "/music/grammy_get_award_date_by_artist"
headers = {"accept": "application/json"}
data = {"query": artist_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_grammy_get_best_album_by_year(self, year: int):
url = self.server + "/music/grammy_get_best_album_by_year"
headers = {"accept": "application/json"}
data = {"query": year}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_grammy_get_all_awarded_artists(self):
url = self.server + "/music/grammy_get_all_awarded_artists"
headers = {"accept": "application/json"}
result = requests.post(url, headers=headers)
return json.loads(result.text)
def music_get_artist_birth_place(self, artist_name: str):
url = self.server + "/music/get_artist_birth_place"
headers = {"accept": "application/json"}
data = {"query": artist_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_get_artist_birth_date(self, artist_name: str):
url = self.server + "/music/get_artist_birth_date"
headers = {"accept": "application/json"}
data = {"query": artist_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_get_members(self, band_name: str):
url = self.server + "/music/get_members"
headers = {"accept": "application/json"}
data = {"query": band_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_get_lifespan(self, artist_name: str):
url = self.server + "/music/get_lifespan"
headers = {"accept": "application/json"}
data = {"query": artist_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_get_song_author(self, song_name: str):
url = self.server + "/music/get_song_author"
headers = {"accept": "application/json"}
data = {"query": song_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_get_song_release_country(self, song_name: str):
url = self.server + "/music/get_song_release_country"
headers = {"accept": "application/json"}
data = {"query": song_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_get_song_release_date(self, song_name: str):
url = self.server + "/music/get_song_release_date"
headers = {"accept": "application/json"}
data = {"query": song_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def music_get_artist_all_works(self, song_name: str):
url = self.server + "/music/get_artist_all_works"
headers = {"accept": "application/json"}
data = {"query": song_name}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def sports_soccer_get_games_on_date(self, date: str, team_name: str = None):
url = self.server + "/sports/soccer/get_games_on_date"
headers = {"accept": "application/json"}
data = {"team_name": team_name, "date": date}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def sports_nba_get_games_on_date(self, date: str, team_name: str = None):
url = self.server + "/sports/nba/get_games_on_date"
headers = {"accept": "application/json"}
data = {"team_name": team_name, "date": date}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)
def sports_nba_get_play_by_play_data_by_game_ids(self, game_ids: List[str]):
url = self.server + "/sports/nba/get_play_by_play_data_by_game_ids"
headers = {"accept": "application/json"}
data = {"game_ids": game_ids}
result = requests.post(url, json=data, headers=headers)
return json.loads(result.text)

View File

@@ -1,59 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
search_knowledge_base:
description: Search knowledge base for a given query. Returns text related to the query.
callable_api: tools.py:search_knowledge_base
args_schema:
query:
type: str
description: query
return_output: retrieved_data
get_artist_birth_place:
description: Get the birth place of an artist.
callable_api: tools.py:get_artist_birth_place
args_schema:
artist_name:
type: str
description: artist name
return_output: birth_place
get_billboard_rank_date:
description: Get Billboard ranking for a specific rank and date.
callable_api: tools.py:get_billboard_rank_date
args_schema:
rank:
type: int
description: song name
date:
type: str
description: date
return_output: billboard_info
get_song_release_date:
description: Get the release date of a song.
callable_api: tools.py:get_song_release_date
args_schema:
song_name:
type: str
description: song name
return_output: release_date
get_members:
description: Get the member list of a band.
callable_api: tools.py:get_members
args_schema:
band_name:
type: str
description: band name
return_output: members
get_grammy_best_artist_by_year:
description: Get the Grammy Best New Artist for a specific year.
callable_api: tools.py:get_grammy_best_artist_by_year
args_schema:
year:
type: int
description: year
return_output: grammy_best_new_artist

View File

@@ -1,52 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
import requests
from tools.pycragapi import CRAG
def search_knowledge_base(query: str) -> str:
"""Search the knowledge base for a specific query."""
# use worker agent (DocGrader) to search the knowledge base
url = os.environ.get("WORKER_AGENT_URL")
print(url)
proxies = {"http": ""}
payload = {
"query": query,
}
response = requests.post(url, json=payload, proxies=proxies)
return response.json()["text"]
def get_grammy_best_artist_by_year(year: int) -> dict:
"""Get the Grammy Best New Artist for a specific year."""
api = CRAG()
year = int(year)
return api.music_grammy_get_best_artist_by_year(year)
def get_members(band_name: str) -> dict:
"""Get the member list of a band."""
api = CRAG()
return api.music_get_members(band_name)
def get_artist_birth_place(artist_name: str) -> dict:
"""Get the birthplace of an artist."""
api = CRAG()
return api.music_get_artist_birth_place(artist_name)
def get_billboard_rank_date(rank: int, date: str = None) -> dict:
"""Get Billboard ranking for a specific rank and date."""
api = CRAG()
rank = int(rank)
return api.music_get_billboard_rank_date(rank, date)
def get_song_release_date(song_name: str) -> dict:
"""Get the release date of a song."""
api = CRAG()
return api.music_get_song_release_date(song_name)

View File

@@ -1,5 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
duckduckgo_search:
callable_api: ddg-search

View File

@@ -1,54 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
audioqna:
build:
args:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
no_proxy: ${no_proxy}
dockerfile: ./Dockerfile
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
whisper-gaudi:
build:
context: GenAIComps
dockerfile: comps/asr/whisper/Dockerfile_hpu
extends: audioqna
image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
whisper:
build:
context: GenAIComps
dockerfile: comps/asr/whisper/Dockerfile
extends: audioqna
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
asr:
build:
context: GenAIComps
dockerfile: comps/asr/Dockerfile
extends: audioqna
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
llm-tgi:
build:
context: GenAIComps
dockerfile: comps/llms/text-generation/tgi/Dockerfile
extends: audioqna
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
speecht5-gaudi:
build:
context: GenAIComps
dockerfile: comps/tts/speecht5/Dockerfile_hpu
extends: audioqna
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
speecht5:
build:
context: GenAIComps
dockerfile: comps/tts/speecht5/Dockerfile
extends: audioqna
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
tts:
build:
context: GenAIComps
dockerfile: comps/tts/Dockerfile
extends: audioqna
image: ${REGISTRY:-opea}/tts:${TAG:-latest}

View File

@@ -81,7 +81,7 @@ export LLM_SERVICE_PORT=3007
```bash
cd GenAIExamples/AudioQnA/docker/gaudi/
TAG=v0.9 docker compose up -d
docker compose up -d
```
## 🚀 Test MicroServices

View File

@@ -1,9 +1,12 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
version: "3.8"
services:
whisper-service:
image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
image: opea/whisper-gaudi:latest
container_name: whisper-service
ports:
- "7066:7066"
@@ -19,7 +22,7 @@ services:
- SYS_NICE
restart: unless-stopped
asr:
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
image: opea/asr:latest
container_name: asr-service
ports:
- "3001:9099"
@@ -27,7 +30,7 @@ services:
environment:
ASR_ENDPOINT: ${ASR_ENDPOINT}
speecht5-service:
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
image: opea/speecht5-gaudi:latest
container_name: speecht5-service
ports:
- "7055:7055"
@@ -43,7 +46,7 @@ services:
- SYS_NICE
restart: unless-stopped
tts:
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
image: opea/tts:latest
container_name: tts-service
ports:
- "3002:9088"
@@ -72,7 +75,7 @@ services:
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
image: opea/llm-tgi:latest
container_name: llm-tgi-gaudi-server
depends_on:
- tgi-service
@@ -87,7 +90,7 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
audioqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
image: opea/audioqna:latest
container_name: audioqna-gaudi-backend-server
depends_on:
- asr

View File

@@ -81,7 +81,7 @@ export LLM_SERVICE_PORT=3007
```bash
cd GenAIExamples/AudioQnA/docker/xeon/
TAG=v0.9 docker compose up -d
docker compose up -d
```
## 🚀 Test MicroServices

View File

@@ -1,9 +1,12 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
version: "3.8"
services:
whisper-service:
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
image: opea/whisper:latest
container_name: whisper-service
ports:
- "7066:7066"
@@ -14,7 +17,7 @@ services:
https_proxy: ${https_proxy}
restart: unless-stopped
asr:
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
image: opea/asr:latest
container_name: asr-service
ports:
- "3001:9099"
@@ -22,7 +25,7 @@ services:
environment:
ASR_ENDPOINT: ${ASR_ENDPOINT}
speecht5-service:
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
image: opea/speecht5:latest
container_name: speecht5-service
ports:
- "7055:7055"
@@ -33,7 +36,7 @@ services:
https_proxy: ${https_proxy}
restart: unless-stopped
tts:
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
image: opea/tts:latest
container_name: tts-service
ports:
- "3002:9088"
@@ -41,7 +44,7 @@ services:
environment:
TTS_ENDPOINT: ${TTS_ENDPOINT}
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:1.4
container_name: tgi-service
ports:
- "3006:80"
@@ -53,9 +56,9 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
command: --model-id ${LLM_MODEL_ID}
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
image: opea/llm-tgi:latest
container_name: llm-tgi-server
depends_on:
- tgi-service
@@ -70,7 +73,7 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
audioqna-xeon-backend-server:
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
image: opea/audioqna:latest
container_name: audioqna-xeon-backend-server
depends_on:
- asr

View File

@@ -1,74 +0,0 @@
# Deploy AudioQnA in Kubernetes Cluster on Xeon and Gaudi
This document outlines the deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline components on Intel Xeon server and Gaudi machines.
The AudioQnA Service leverages a Kubernetes operator called genai-microservices-connector(GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere.
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
The AudioQnA application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not starts them and then proceeds to connect them. When the AudioQnA pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular `asr`, `tts`, and `llm`.
## Using prebuilt images
The AudioQnA uses the below prebuilt images if you choose a Xeon deployment
- tgi-service: ghcr.io/huggingface/text-generation-inference:1.4
- llm: opea/llm-tgi:v0.9
- asr: opea/asr:v0.9
- whisper: opea/whisper:v0.9
- tts: opea/tts:v0.9
- speecht5: opea/speecht5:v0.9
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
For Gaudi:
- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
- whisper-gaudi: opea/whisper-gaudi:v0.9
- speecht5-gaudi: opea/speecht5-gaudi:v0.9
> [NOTE]
> Please refer to [Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/docker/xeon/README.md) or [Gaudi README](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/docker/gaudi/README.md) to build the OPEA images. These too will be available on Docker Hub soon to simplify use.
## Deploy AudioQnA pipeline
This involves deploying the AudioQnA custom resource. You can use audioQnA_xeon.yaml or if you have a Gaudi cluster, you could use audioQnA_gaudi.yaml.
1. Create namespace and deploy application
```sh
kubectl create ns audioqa
kubectl apply -f $(pwd)/audioQnA_xeon.yaml
```
2. GMC will reconcile the AudioQnA custom resource and get all related components/services ready. Check if the service up.
```sh
kubectl get service -n audioqa
```
3. Retrieve the application access URL
```sh
kubectl get gmconnectors.gmc.opea.io -n audioqa
NAME URL READY AGE
audioqa http://router-service.audioqa.svc.cluster.local:8080 6/0/6 5m
```
4. Deploy a client pod to test the application
```sh
kubectl create deployment client-test -n audioqa --image=python:3.8.13 -- sleep infinity
```
5. Access the application using the above URL from the client pod
```sh
export CLIENT_POD=$(kubectl get pod -n audioqa -l app=client-test -o jsonpath={.items..metadata.name})
export accessUrl=$(kubectl get gmc -n audioqa -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
kubectl exec "$CLIENT_POD" -n audioqa -- curl $accessUrl -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json'
```
> [NOTE]
You can remove your AudioQnA pipeline by executing standard Kubernetes kubectl commands to remove a custom resource. Verify it was removed by executing kubectl get pods in the audioqa namespace.

View File

@@ -1,58 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: gmc.opea.io/v1alpha3
kind: GMConnector
metadata:
labels:
app.kubernetes.io/name: gmconnector
app.kubernetes.io/managed-by: kustomize
gmc/platform: gaudi
name: audioqa
namespace: audioqa
spec:
routerConfig:
name: router
serviceName: router-service
nodes:
root:
routerType: Sequence
steps:
- name: Asr
internalService:
serviceName: asr-svc
config:
endpoint: /v1/audio/transcriptions
ASR_ENDPOINT: whisper-gaudi-svc
- name: WhisperGaudi
internalService:
serviceName: whisper-gaudi-svc
config:
endpoint: /v1/asr
isDownstreamService: true
- name: Llm
data: $response
internalService:
serviceName: llm-svc
config:
endpoint: /v1/chat/completions
TGI_LLM_ENDPOINT: tgi-gaudi-svc
- name: TgiGaudi
internalService:
serviceName: tgi-gaudi-svc
config:
endpoint: /generate
isDownstreamService: true
- name: Tts
data: $response
internalService:
serviceName: tts-svc
config:
endpoint: /v1/audio/speech
TTS_ENDPOINT: speecht5-gaudi-svc
- name: SpeechT5Gaudi
internalService:
serviceName: speecht5-gaudi-svc
config:
endpoint: /v1/tts
isDownstreamService: true

View File

@@ -1,58 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: gmc.opea.io/v1alpha3
kind: GMConnector
metadata:
labels:
app.kubernetes.io/name: gmconnector
app.kubernetes.io/managed-by: kustomize
gmc/platform: xeon
name: audioqa
namespace: audioqa
spec:
routerConfig:
name: router
serviceName: router-service
nodes:
root:
routerType: Sequence
steps:
- name: Asr
internalService:
serviceName: asr-svc
config:
endpoint: /v1/audio/transcriptions
ASR_ENDPOINT: whisper-svc
- name: Whisper
internalService:
serviceName: whisper-svc
config:
endpoint: /v1/asr
isDownstreamService: true
- name: Llm
data: $response
internalService:
serviceName: llm-svc
config:
endpoint: /v1/chat/completions
TGI_LLM_ENDPOINT: tgi-svc
- name: Tgi
internalService:
serviceName: tgi-svc
config:
endpoint: /generate
isDownstreamService: true
- name: Tts
data: $response
internalService:
serviceName: tts-svc
config:
endpoint: /v1/audio/speech
TTS_ENDPOINT: speecht5-svc
- name: SpeechT5
internalService:
serviceName: speecht5-svc
config:
endpoint: /v1/tts
isDownstreamService: true

View File

@@ -1,32 +0,0 @@
# Deploy VisualQnA in a Kubernetes Cluster
> [NOTE]
> The following values must be set before you can deploy:
> HUGGINGFACEHUB_API_TOKEN
> You can also customize the "MODEL_ID" and "model-volume"
## Deploy On Xeon
```
cd GenAIExamples/AudioQnA/kubernetes/manifests/xeon
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
kubectl apply -f audioqna.yaml
```
## Deploy On Gaudi
```
cd GenAIExamples/AudioQnA/kubernetes/manifests/gaudi
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
kubectl apply -f audioqna.yaml
```
## Verify Services
Make sure all the pods are running, and restart the audioqna-xxxx pod if necessary.
```bash
kubectl get pods
curl http://${host_ip}:3008/v1/audioqna -X POST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json'
```

View File

@@ -1,439 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: audio-qna-config
namespace: default
data:
ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
ASR_SERVICE_HOST_IP: asr-svc
ASR_SERVICE_PORT: "3001"
LLM_SERVICE_HOST_IP: llm-svc
LLM_SERVICE_PORT: "3007"
TTS_SERVICE_HOST_IP: tts-svc
TTS_SERVICE_PORT: "3002"
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: asr-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: asr-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: asr-deploy
spec:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: asr-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: audio-qna-config
image: opea/asr:v0.9
imagePullPolicy: IfNotPresent
name: asr-deploy
args: null
ports:
- containerPort: 9099
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: asr-svc
spec:
type: ClusterIP
selector:
app: asr-deploy
ports:
- name: service
port: 3001
targetPort: 9099
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: whisper-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: whisper-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: whisper-deploy
spec:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: whisper-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: audio-qna-config
image: opea/whisper-gaudi:v0.9
imagePullPolicy: IfNotPresent
name: whisper-deploy
args: null
ports:
- containerPort: 7066
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: whisper-svc
spec:
type: ClusterIP
selector:
app: whisper-deploy
ports:
- name: service
port: 7066
targetPort: 7066
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: tts-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: tts-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: tts-deploy
spec:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: tts-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: audio-qna-config
image: opea/tts:v0.9
imagePullPolicy: IfNotPresent
name: tts-deploy
args: null
ports:
- containerPort: 9088
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: tts-svc
spec:
type: ClusterIP
selector:
app: tts-deploy
ports:
- name: service
port: 3002
targetPort: 9088
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: speecht5-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: speecht5-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: speecht5-deploy
spec:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: speecht5-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: audio-qna-config
image: opea/speecht5-gaudi:v0.9
imagePullPolicy: IfNotPresent
name: speecht5-deploy
args: null
ports:
- containerPort: 7055
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: speecht5-svc
spec:
type: ClusterIP
selector:
app: speecht5-deploy
ports:
- name: service
port: 7055
targetPort: 7055
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: audio-qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
name: llm-dependency-deploy-demo
securityContext:
capabilities:
add:
- SYS_NICE
args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: PREFILL_BATCH_BUCKET_SIZE
value: "1"
- name: BATCH_BUCKET_SIZE
value: "8"
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /home/sdp/cesg
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: llm-dependency-svc
spec:
type: ClusterIP
selector:
app: llm-dependency-deploy
ports:
- name: service
port: 3006
targetPort: 80
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: llm-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-deploy
spec:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: llm-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: audio-qna-config
image: opea/llm-tgi:v0.9
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null
ports:
- containerPort: 9000
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: llm-svc
spec:
type: ClusterIP
selector:
app: llm-deploy
ports:
- name: service
port: 3007
targetPort: 9000
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: audioqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: audioqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: audioqna-backend-server-deploy
spec:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: audioqna-backend-server-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: audio-qna-config
image: opea/audioqna:v0.9
imagePullPolicy: IfNotPresent
name: audioqna-backend-server-deploy
args: null
ports:
- containerPort: 8888
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: audioqna-backend-server-svc
spec:
type: NodePort
selector:
app: audioqna-backend-server-deploy
ports:
- name: service
port: 3008
targetPort: 8888
nodePort: 30666

View File

@@ -1,395 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: audio-qna-config
namespace: default
data:
ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
ASR_SERVICE_HOST_IP: asr-svc
ASR_SERVICE_PORT: "3001"
LLM_SERVICE_HOST_IP: llm-svc
LLM_SERVICE_PORT: "3007"
TTS_SERVICE_HOST_IP: tts-svc
TTS_SERVICE_PORT: "3002"
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: asr-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: asr-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: asr-deploy
spec:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: asr-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: audio-qna-config
image: opea/asr:v0.9
imagePullPolicy: IfNotPresent
name: asr-deploy
args: null
ports:
- containerPort: 9099
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: asr-svc
spec:
type: ClusterIP
selector:
app: asr-deploy
ports:
- name: service
port: 3001
targetPort: 9099
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: whisper-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: whisper-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: whisper-deploy
spec:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: whisper-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: audio-qna-config
image: opea/whisper:v0.9
imagePullPolicy: IfNotPresent
name: whisper-deploy
args: null
ports:
- containerPort: 7066
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: whisper-svc
spec:
type: ClusterIP
selector:
app: whisper-deploy
ports:
- name: service
port: 7066
targetPort: 7066
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: tts-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: tts-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: tts-deploy
spec:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: tts-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: audio-qna-config
image: opea/tts:v0.9
imagePullPolicy: IfNotPresent
name: tts-deploy
args: null
ports:
- containerPort: 9088
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: tts-svc
spec:
type: ClusterIP
selector:
app: tts-deploy
ports:
- name: service
port: 3002
targetPort: 9088
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: speecht5-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: speecht5-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: speecht5-deploy
spec:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: speecht5-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: audio-qna-config
image: opea/speecht5:v0.9
imagePullPolicy: IfNotPresent
name: speecht5-deploy
args: null
ports:
- containerPort: 7055
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: speecht5-svc
spec:
type: ClusterIP
selector:
app: speecht5-deploy
ports:
- name: service
port: 7055
targetPort: 7055
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: audio-qna-config
image: ghcr.io/huggingface/text-generation-inference:2.2.0
name: llm-dependency-deploy-demo
securityContext:
capabilities:
add:
- SYS_NICE
args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /home/sdp/cesg
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: llm-dependency-svc
spec:
type: ClusterIP
selector:
app: llm-dependency-deploy
ports:
- name: service
port: 3006
targetPort: 80
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: llm-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-deploy
spec:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: llm-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: audio-qna-config
image: opea/llm-tgi:v0.9
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null
ports:
- containerPort: 9000
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: llm-svc
spec:
type: ClusterIP
selector:
app: llm-deploy
ports:
- name: service
port: 3007
targetPort: 9000
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: audioqna-backend-server-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: audioqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: audioqna-backend-server-deploy
spec:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: audioqna-backend-server-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: audio-qna-config
image: opea/audioqna:v0.9
imagePullPolicy: IfNotPresent
name: audioqna-backend-server-deploy
args: null
ports:
- containerPort: 8888
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: audioqna-backend-server-svc
spec:
type: NodePort
selector:
app: audioqna-backend-server-deploy
ports:
- name: service
port: 3008
targetPort: 8888
nodePort: 30666

View File

@@ -3,27 +3,35 @@
# SPDX-License-Identifier: Apache-2.0
set -e
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
echo "IMAGE_REPO=${IMAGE_REPO}"
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH/docker
cd $WORKPATH
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="audioqna whisper-gaudi asr llm-tgi speecht5-gaudi tts"
docker compose -f docker_build_compose.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker build -t opea/whisper-gaudi:latest -f comps/asr/whisper/Dockerfile_hpu .
docker build -t opea/asr:latest -f comps/asr/Dockerfile .
docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile .
docker build -t opea/speecht5-gaudi:latest -f comps/tts/speecht5/Dockerfile_hpu .
docker build -t opea/tts:latest -f comps/tts/Dockerfile .
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
docker images && sleep 1s
cd ..
cd $WORKPATH/docker
docker build --no-cache -t opea/audioqna:latest -f Dockerfile .
# cd $WORKPATH/docker/ui
# docker build --no-cache -t opea/audioqna-ui:latest -f docker/Dockerfile .
docker images
}
function start_services() {
@@ -47,25 +55,25 @@ function start_services() {
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env
if [[ "$IMAGE_REPO" != "" ]]; then
# Replace the container name with a test-specific name
echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
sed -i "s#image: opea/audioqna:latest#image: opea/audioqna:${IMAGE_TAG}#g" compose.yaml
sed -i "s#image: opea/audioqna-ui:latest#image: opea/audioqna-ui:${IMAGE_TAG}#g" compose.yaml
sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" compose.yaml
echo "cat compose.yaml"
cat compose.yaml
fi
# Start Docker Containers
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
docker compose up -d
n=0
until [[ "$n" -ge 100 ]]; do
until [[ "$n" -ge 500 ]]; do
docker logs tgi-gaudi-server > $LOG_PATH/tgi_service_start.log
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
n=0
until [[ "$n" -ge 100 ]]; do
docker logs whisper-service > $LOG_PATH/whisper_service_start.log
if grep -q "Uvicorn server setup on port" $LOG_PATH/whisper_service_start.log; then
break
fi
sleep 5s
sleep 1s
n=$((n+1))
done
}
@@ -123,7 +131,7 @@ function stop_docker() {
function main() {
stop_docker
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi
start_services
# validate_microservices

View File

@@ -3,27 +3,32 @@
# SPDX-License-Identifier: Apache-2.0
set -e
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
echo "IMAGE_REPO=${IMAGE_REPO}"
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH/docker
cd $WORKPATH
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="audioqna whisper asr llm-tgi speecht5 tts"
docker compose -f docker_build_compose.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker build -t opea/whisper:latest -f comps/asr/whisper/Dockerfile .
docker build -t opea/asr:latest -f comps/asr/Dockerfile .
docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile .
docker build -t opea/speecht5:latest -f comps/tts/speecht5/Dockerfile .
docker build -t opea/tts:latest -f comps/tts/Dockerfile .
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
docker images && sleep 1s
cd $WORKPATH/docker
docker build --no-cache -t opea/audioqna:latest -f Dockerfile .
# cd $WORKPATH/docker/ui
# docker build --no-cache -t opea/audioqna-ui:latest -f docker/Dockerfile .
docker images
}
function start_services() {
@@ -46,15 +51,25 @@ function start_services() {
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env
if [[ "$IMAGE_REPO" != "" ]]; then
# Replace the container name with a test-specific name
echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
sed -i "s#image: opea/audioqna:latest#image: opea/audioqna:${IMAGE_TAG}#g" compose.yaml
sed -i "s#image: opea/audioqna-ui:latest#image: opea/audioqna-ui:${IMAGE_TAG}#g" compose.yaml
sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" compose.yaml
echo "cat compose.yaml"
cat compose.yaml
fi
# Start Docker Containers
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
docker compose up -d
n=0
until [[ "$n" -ge 100 ]]; do
until [[ "$n" -ge 500 ]]; do
docker logs tgi-service > $LOG_PATH/tgi_service_start.log
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
break
fi
sleep 5s
sleep 1s
n=$((n+1))
done
}
@@ -113,7 +128,7 @@ function stop_docker() {
function main() {
stop_docker
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi
start_services
validate_megaservice

View File

@@ -1,111 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -xe
USER_ID=$(whoami)
LOG_PATH=/home/$(whoami)/logs
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
IMAGE_REPO=${IMAGE_REPO:-}
function install_audioqa() {
kubectl create ns $APP_NAMESPACE
sed -i "s|namespace: audioqa|namespace: $APP_NAMESPACE|g" ./audioQnA_gaudi.yaml
kubectl apply -f ./audioQnA_gaudi.yaml
# Wait until the router service is ready
echo "Waiting for the audioqa router service to be ready..."
wait_until_pod_ready "audioqa router" $APP_NAMESPACE "router-service"
output=$(kubectl get pods -n $APP_NAMESPACE)
echo $output
}
function validate_audioqa() {
# deploy client pod for testing
kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity
# wait for client pod ready
wait_until_pod_ready "client-test" $APP_NAMESPACE "client-test"
# giving time to populating data
sleep 60
kubectl get pods -n $APP_NAMESPACE
# send request to audioqa
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
echo "$CLIENT_POD"
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
echo "$byte_str" > $LOG_PATH/curl_audioqa.log
if [ -z "$byte_str" ]; then
echo "audioqa failed, please check the logs in ${LOG_PATH}!"
exit 1
fi
echo "Audioqa response check succeed!"
}
function wait_until_pod_ready() {
echo "Waiting for the $1 to be ready..."
max_retries=30
retry_count=0
while ! is_pod_ready $2 $3; do
if [ $retry_count -ge $max_retries ]; then
echo "$1 is not ready after waiting for a significant amount of time"
get_gmc_controller_logs
exit 1
fi
echo "$1 is not ready yet. Retrying in 10 seconds..."
sleep 10
output=$(kubectl get pods -n $2)
echo $output
retry_count=$((retry_count + 1))
done
}
function is_pod_ready() {
if [ "$2" == "gmc-controller" ]; then
pod_status=$(kubectl get pods -n $1 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}')
else
pod_status=$(kubectl get pods -n $1 -l app=$2 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}')
fi
if [ "$pod_status" == "True" ]; then
return 0
else
return 1
fi
}
function get_gmc_controller_logs() {
# Fetch the name of the pod with the app-name gmc-controller in the specified namespace
pod_name=$(kubectl get pods -n $SYSTEM_NAMESPACE -l control-plane=gmc-controller -o jsonpath='{.items[0].metadata.name}')
# Check if the pod name was found
if [ -z "$pod_name" ]; then
echo "No pod found with app-name gmc-controller in namespace $SYSTEM_NAMESPACE"
return 1
fi
# Get the logs of the found pod
echo "Fetching logs for pod $pod_name in namespace $SYSTEM_NAMESPACE..."
kubectl logs $pod_name -n $SYSTEM_NAMESPACE
}
if [ $# -eq 0 ]; then
echo "Usage: $0 <function_name>"
exit 1
fi
case "$1" in
install_AudioQnA)
pushd AudioQnA/kubernetes
install_audioqa
popd
;;
validate_AudioQnA)
pushd AudioQnA/kubernetes
validate_audioqa
popd
;;
*)
echo "Unknown function: $1"
;;
esac

View File

@@ -1,111 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -xe
USER_ID=$(whoami)
LOG_PATH=/home/$(whoami)/logs
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
IMAGE_REPO=${IMAGE_REPO:-}
function install_audioqa() {
kubectl create ns $APP_NAMESPACE
sed -i "s|namespace: audioqa|namespace: $APP_NAMESPACE|g" ./audioQnA_xeon.yaml
kubectl apply -f ./audioQnA_xeon.yaml
# Wait until the router service is ready
echo "Waiting for the audioqa router service to be ready..."
wait_until_pod_ready "audioqa router" $APP_NAMESPACE "router-service"
output=$(kubectl get pods -n $APP_NAMESPACE)
echo $output
}
function validate_audioqa() {
# deploy client pod for testing
kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity
# wait for client pod ready
wait_until_pod_ready "client-test" $APP_NAMESPACE "client-test"
# giving time to populating data
sleep 60
kubectl get pods -n $APP_NAMESPACE
# send request to audioqa
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
echo "$CLIENT_POD"
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
echo "$byte_str" > $LOG_PATH/curl_audioqa.log
if [ -z "$byte_str" ]; then
echo "audioqa failed, please check the logs in ${LOG_PATH}!"
exit 1
fi
echo "Audioqa response check succeed!"
}
function wait_until_pod_ready() {
echo "Waiting for the $1 to be ready..."
max_retries=30
retry_count=0
while ! is_pod_ready $2 $3; do
if [ $retry_count -ge $max_retries ]; then
echo "$1 is not ready after waiting for a significant amount of time"
get_gmc_controller_logs
exit 1
fi
echo "$1 is not ready yet. Retrying in 10 seconds..."
sleep 10
output=$(kubectl get pods -n $2)
echo $output
retry_count=$((retry_count + 1))
done
}
function is_pod_ready() {
if [ "$2" == "gmc-controller" ]; then
pod_status=$(kubectl get pods -n $1 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}')
else
pod_status=$(kubectl get pods -n $1 -l app=$2 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}')
fi
if [ "$pod_status" == "True" ]; then
return 0
else
return 1
fi
}
function get_gmc_controller_logs() {
# Fetch the name of the pod with the app-name gmc-controller in the specified namespace
pod_name=$(kubectl get pods -n $SYSTEM_NAMESPACE -l control-plane=gmc-controller -o jsonpath='{.items[0].metadata.name}')
# Check if the pod name was found
if [ -z "$pod_name" ]; then
echo "No pod found with app-name gmc-controller in namespace $SYSTEM_NAMESPACE"
return 1
fi
# Get the logs of the found pod
echo "Fetching logs for pod $pod_name in namespace $SYSTEM_NAMESPACE..."
kubectl logs $pod_name -n $SYSTEM_NAMESPACE
}
if [ $# -eq 0 ]; then
echo "Usage: $0 <function_name>"
exit 1
fi
case "$1" in
install_AudioQnA)
pushd AudioQnA/kubernetes
install_audioqa
popd
;;
validate_AudioQnA)
pushd AudioQnA/kubernetes
validate_audioqa
popd
;;
*)
echo "Unknown function: $1"
;;
esac

View File

@@ -10,90 +10,7 @@ ChatQnA architecture shows below:
ChatQnA is implemented on top of [GenAIComps](https://github.com/opea-project/GenAIComps), the ChatQnA Flow Chart shows below:
```mermaid
---
config:
flowchart:
nodeSpacing: 100
rankSpacing: 100
curve: linear
theme: base
themeVariables:
fontSize: 42px
---
flowchart LR
%% Colors %%
classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
classDef invisible fill:transparent,stroke:transparent;
style ChatQnA-MegaService stroke:#000000
%% Subgraphs %%
subgraph ChatQnA-MegaService["ChatQnA-MegaService"]
direction LR
EM([Embedding <br>]):::blue
RET([Retrieval <br>]):::blue
RER([Rerank <br>]):::blue
LLM([LLM <br>]):::blue
end
subgraph User Interface
direction TB
a([User Input Query]):::orchid
Ingest([Ingest data]):::orchid
UI([UI server<br>]):::orchid
end
subgraph ChatQnA GateWay
direction LR
invisible1[ ]:::invisible
GW([ChatQnA GateWay<br>]):::orange
end
subgraph .
X([OPEA Micsrservice]):::blue
Y{{Open Source Service}}
Z([OPEA Gateway]):::orange
Z1([UI]):::orchid
end
TEI_RER{{Reranking service<br>'TEI'<br>}}
TEI_EM{{Embedding service <br>'TEI LangChain'<br>}}
VDB{{Vector DB<br>'Redis'<br>}}
R_RET{{Retriever service <br>'LangChain Redis'<br>}}
DP([Data Preparation<br>'LangChain Redis'<br>]):::blue
LLM_gen{{LLM Service <br>'TGI'<br>}}
%% Data Preparation flow
%% Ingest data flow
direction LR
Ingest[Ingest data] -->|a| UI
UI -->|b| DP
DP <-.->|c| TEI_EM
%% Questions interaction
direction LR
a[User Input Query] -->|1| UI
UI -->|2| GW
GW <==>|3| ChatQnA-MegaService
EM ==>|4| RET
RET ==>|5| RER
RER ==>|6| LLM
%% Embedding service flow
direction TB
EM <-.->|3'| TEI_EM
RET <-.->|4'| R_RET
RER <-.->|5'| TEI_RER
LLM <-.->|6'| LLM_gen
direction TB
%% Vector DB interaction
R_RET <-.->|d|VDB
DP <-.->|d|VDB
```
![Flow Chart](./assets/img/chatqna_flow_chart.png)
This ChatQnA use case performs RAG using LangChain, Redis VectorDB and Text Generation Inference on Intel Gaudi2 or Intel XEON Scalable Processors. The Intel Gaudi2 accelerator supports both training and inference for deep learning models in particular for LLMs. Visit [Habana AI products](https://habana.ai/products) for more details.
@@ -161,7 +78,7 @@ Find the corresponding [compose.yaml](./docker/gaudi/compose.yaml).
```bash
cd GenAIExamples/ChatQnA/docker/gaudi/
TAG=v0.9 docker compose up -d
docker compose up -d
```
> Notice: Currently only the **Habana Driver 1.16.x** is supported for Gaudi.
@@ -174,7 +91,7 @@ Find the corresponding [compose.yaml](./docker/xeon/compose.yaml).
```bash
cd GenAIExamples/ChatQnA/docker/xeon/
TAG=v0.9 docker compose up -d
docker compose up -d
```
Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on building docker images from source.
@@ -183,7 +100,7 @@ Refer to the [Xeon Guide](./docker/xeon/README.md) for more instructions on buil
```bash
cd GenAIExamples/ChatQnA/docker/gpu/
TAG=v0.9 docker compose up -d
docker compose up -d
```
Refer to the [NVIDIA GPU Guide](./docker/gpu/README.md) for more instructions on building docker images from source.
@@ -206,10 +123,6 @@ Refer to the [ChatQnA helm chart](https://github.com/opea-project/GenAIInfra/tre
Refer to the [AI PC Guide](./docker/aipc/README.md) for instructions on deploying ChatQnA on AI PC.
### Deploy ChatQnA on Red Hat OpenShift Container Platform (RHOCP)
Refer to the [Intel Technology enabling for Openshift readme](https://github.com/intel/intel-technology-enabling-for-openshift/blob/main/workloads/opea/chatqna/README.md) for instructions to deploy ChatQnA prototype on RHOCP with [Red Hat OpenShift AI (RHOAI)](https://www.redhat.com/en/technologies/cloud-computing/openshift/openshift-ai).
## Consume ChatQnA Service
Two ways of consuming ChatQnA Service:

View File

@@ -1,546 +0,0 @@
# ChatQnA Benchmarking
This folder contains a collection of Kubernetes manifest files for deploying the ChatQnA service across scalable nodes. It includes a comprehensive [benchmarking tool](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/README.md) that enables throughput analysis to assess inference performance.
By following this guide, you can run benchmarks on your deployment and share the results with the OPEA community.
# Purpose
We aim to run these benchmarks and share them with the OPEA community for three primary reasons:
- To offer insights on inference throughput in real-world scenarios, helping you choose the best service or deployment for your needs.
- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading llms, serving frameworks etc.
# Metrics
The benchmark will report the below metrics, including:
- Number of Concurrent Requests
- End-to-End Latency: P50, P90, P99 (in milliseconds)
- End-to-End First Token Latency: P50, P90, P99 (in milliseconds)
- Average Next Token Latency (in milliseconds)
- Average Token Latency (in milliseconds)
- Requests Per Second (RPS)
- Output Tokens Per Second
- Input Tokens Per Second
Results will be displayed in the terminal and saved as CSV file named `1_stats.csv` for easy export to spreadsheets.
# Getting Started
## Prerequisites
- Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md).
- Every node has direct internet access
- Set up kubectl on the master node with access to the Kubernetes cluster.
- Install Python 3.8+ on the master node for running the stress tool.
- Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods.
## Kubernetes Cluster Example
```bash
$ kubectl get nodes
NAME STATUS ROLES AGE VERSION
k8s-master Ready control-plane 35d v1.29.6
k8s-work1 Ready <none> 35d v1.29.5
k8s-work2 Ready <none> 35d v1.29.6
k8s-work3 Ready <none> 35d v1.29.6
```
## Manifest preparation
We have created the [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single node, two nodes and four nodes K8s cluster. In order to apply, we need to check out and configure some values.
```bash
# on k8s-master node
git clone https://github.com/opea-project/GenAIExamples.git
cd GenAIExamples/ChatQnA/benchmark
# replace the image tag from latest to v0.9 since we want to test with v0.9 release
IMAGE_TAG=v0.9
find . -name '*.yaml' -type f -exec sed -i "s#image: opea/\(.*\):latest#image: opea/\1:${IMAGE_TAG}#g" {} \;
# set the huggingface token
HUGGINGFACE_TOKEN=<your token>
find . -name '*.yaml' -type f -exec sed -i "s#\${HF_TOKEN}#${HUGGINGFACE_TOKEN}#g" {} \;
# set models
LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
RERANK_MODEL_ID=BAAI/bge-reranker-base
find . -name '*.yaml' -type f -exec sed -i "s#\$(LLM_MODEL_ID)#${LLM_MODEL_ID}#g" {} \;
find . -name '*.yaml' -type f -exec sed -i "s#\$(EMBEDDING_MODEL_ID)#${EMBEDDING_MODEL_ID}#g" {} \;
find . -name '*.yaml' -type f -exec sed -i "s#\$(RERANK_MODEL_ID)#${RERANK_MODEL_ID}#g" {} \;
```
## Benchmark tool preparation
The test uses the [benchmark tool](https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark) to do performance test. We need to set up benchmark tool at the master node of Kubernetes which is k8s-master.
```bash
# on k8s-master node
git clone https://github.com/opea-project/GenAIEval.git
cd GenAIEval
python3 -m venv stress_venv
source stress_venv/bin/activate
pip install -r requirements.txt
```
## Test Configurations
Workload configuration:
| Key | Value |
| -------- | ------- |
| Workload | ChatQnA |
| Tag | V0.9 |
Models configuration
| Key | Value |
| ---------- | ------------------ |
| Embedding | BAAI/bge-base-en-v1.5 |
| Reranking | BAAI/bge-reranker-base |
| Inference | Intel/neural-chat-7b-v3-3 |
Benchmark parameters
| Key | Value |
| ---------- | ------------------ |
| LLM input tokens | 1024 |
| LLM output tokens | 128 |
Number of test requests for different scheduled node number:
| Node count | Concurrency | Query number |
| ----- | -------- | -------- |
| 1 | 128 | 640 |
| 2 | 256 | 1280 |
| 4 | 512 | 2560 |
More detailed configuration can be found in configuration file [benchmark.yaml](./benchmark.yaml).
## Test Steps
### Single node test
#### 1. Preparation
We add label to 1 Kubernetes node to make sure all pods are scheduled to this node:
```bash
kubectl label nodes k8s-worker1 node-type=chatqna-opea
```
#### 2. Install ChatQnA
Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark/single_gaudi) and apply to K8s.
```bash
# on k8s-master node
cd GenAIExamples/ChatQnA/benchmark/single_gaudi
kubectl apply -f .
```
#### 3. Run tests
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
```bash
export USER_QUERIES="[4, 8, 16, 640]"
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_1"
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
```
And then run the benchmark tool by:
```bash
cd GenAIEval/evals/benchmark
python benchmark.py
```
#### 4. Data collection
All the test results will come to this folder `/home/sdp/benchmark_output/node_1` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
#### 5. Clean up
```bash
# on k8s-master node
cd GenAIExamples/ChatQnA/benchmark/single_gaudi
kubectl delete -f .
kubectl label nodes k8s-worker1 node-type-
```
### Two node test
#### 1. Preparation
We add label to 2 Kubernetes node to make sure all pods are scheduled to this node:
```bash
kubectl label nodes k8s-worker1 k8s-worker2 node-type=chatqna-opea
```
#### 2. Install ChatQnA
Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark/two_gaudi) and apply to K8s.
```bash
# on k8s-master node
cd GenAIExamples/ChatQnA/benchmark/two_gaudi
kubectl apply -f .
```
#### 3. Run tests
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
```bash
export USER_QUERIES="[4, 8, 16, 1280]"
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_2"
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
```
And then run the benchmark tool by:
```bash
cd GenAIEval/evals/benchmark
python benchmark.py
```
#### 4. Data collection
All the test results will come to this folder `/home/sdp/benchmark_output/node_2` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
#### 5. Clean up
```bash
# on k8s-master node
kubectl delete -f .
kubectl label nodes k8s-worker1 k8s-worker2 node-type-
```
### Four node test
#### 1. Preparation
We add label to 4 Kubernetes node to make sure all pods are scheduled to this node:
```bash
kubectl label nodes k8s-master k8s-worker1 k8s-worker2 k8s-worker3 node-type=chatqna-opea
```
#### 2. Install ChatQnA
Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark/four_gaudi) and apply to K8s.
```bash
# on k8s-master node
cd GenAIExamples/ChatQnA/benchmark/four_gaudi
kubectl apply -f .
```
#### 3. Run tests
We copy the configuration file [benchmark.yaml](./benchmark.yaml) to `GenAIEval/evals/benchmark/benchmark.yaml` and config `test_suite_config.user_queries` and `test_suite_config.test_output_dir`.
```bash
export USER_QUERIES="[4, 8, 16, 2560]"
export TEST_OUTPUT_DIR="/home/sdp/benchmark_output/node_4"
envsubst < ./benchmark.yaml > GenAIEval/evals/benchmark/benchmark.yaml
```
And then run the benchmark tool by:
```bash
cd GenAIEval/evals/benchmark
python benchmark.py
```
#### 4. Data collection
All the test results will come to this folder `/home/sdp/benchmark_output/node_4` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
#### 5. Clean up
```bash
# on k8s-master node
cd GenAIExamples/ChatQnA/benchmark/single_gaudi
kubectl delete -f .
kubectl label nodes k8s-master k8s-worker1 k8s-worker2 k8s-worker3 node-type-
```
### Example Result
The following is a summary of the test result, with files saved at `TEST_OUTPUT_DIR`.
```statistics
Concurrency : 512
Max request count : 2560
Http timeout : 60000
Benchmark target : chatqnafixed
=================Total statistics=====================
Succeed Response: 2560 (Total 2560, 100.0% Success), Duration: 26.44s, Input Tokens: 61440, Output Tokens: 255985, RPS: 96.82, Input Tokens per Second: 2323.71, Output Tokens per Second: 9681.57
End to End latency(ms), P50: 3576.34, P90: 4242.19, P99: 5252.23, Avg: 3581.55
First token latency(ms), P50: 726.64, P90: 1128.27, P99: 1796.09, Avg: 769.58
Average Next token latency(ms): 28.41
Average token latency(ms) : 35.85
======================================================
```
```test spec
benchmarkresult:
Average_Next_token_latency: '28.41'
Average_token_latency: '35.85'
Duration: '26.44'
End_to_End_latency_Avg: '3581.55'
End_to_End_latency_P50: '3576.34'
End_to_End_latency_P90: '4242.19'
End_to_End_latency_P99: '5252.23'
First_token_latency_Avg: '769.58'
First_token_latency_P50: '726.64'
First_token_latency_P90: '1128.27'
First_token_latency_P99: '1796.09'
Input_Tokens: '61440'
Input_Tokens_per_Second: '2323.71'
Onput_Tokens: '255985'
Output_Tokens_per_Second: '9681.57'
RPS: '96.82'
Succeed_Response: '2560'
locust_P50: '160'
locust_P99: '810'
locust_num_failures: '0'
locust_num_requests: '2560'
benchmarkspec:
bench-target: chatqnafixed
endtest_time: '2024-08-25T14:19:25.955973'
host: http://10.110.105.197:8888
llm-model: Intel/neural-chat-7b-v3-3
locustfile: /home/sdp/lvl/GenAIEval/evals/benchmark/stresscli/locust/aistress.py
max_requests: 2560
namespace: default
processes: 2
run_name: benchmark
runtime: 60m
starttest_time: '2024-08-25T14:18:50.366514'
stop_timeout: 120
tool: locust
users: 512
hardwarespec:
aise-gaudi-00:
architecture: amd64
containerRuntimeVersion: containerd://1.7.18
cpu: '160'
habana.ai/gaudi: '8'
kernelVersion: 5.15.0-92-generic
kubeProxyVersion: v1.29.7
kubeletVersion: v1.29.7
memory: 1056375272Ki
operatingSystem: linux
osImage: Ubuntu 22.04.3 LTS
aise-gaudi-01:
architecture: amd64
containerRuntimeVersion: containerd://1.7.18
cpu: '160'
habana.ai/gaudi: '8'
kernelVersion: 5.15.0-92-generic
kubeProxyVersion: v1.29.7
kubeletVersion: v1.29.7
memory: 1056375256Ki
operatingSystem: linux
osImage: Ubuntu 22.04.3 LTS
aise-gaudi-02:
architecture: amd64
containerRuntimeVersion: containerd://1.7.18
cpu: '160'
habana.ai/gaudi: '8'
kernelVersion: 5.15.0-92-generic
kubeProxyVersion: v1.29.7
kubeletVersion: v1.29.7
memory: 1056375260Ki
operatingSystem: linux
osImage: Ubuntu 22.04.3 LTS
aise-gaudi-03:
architecture: amd64
containerRuntimeVersion: containerd://1.6.8
cpu: '160'
habana.ai/gaudi: '8'
kernelVersion: 5.15.0-112-generic
kubeProxyVersion: v1.29.7
kubeletVersion: v1.29.7
memory: 1056374404Ki
operatingSystem: linux
osImage: Ubuntu 22.04.4 LTS
workloadspec:
aise-gaudi-00:
chatqna-backend-server-deploy:
replica: 1
resources:
limits:
cpu: '8'
memory: 4000Mi
requests:
cpu: '8'
memory: 4000Mi
embedding-dependency-deploy:
replica: 1
resources:
limits:
cpu: '80'
memory: 20000Mi
requests:
cpu: '80'
memory: 20000Mi
embedding-deploy:
replica: 1
llm-dependency-deploy:
replica: 8
resources:
limits:
habana.ai/gaudi: '1'
requests:
habana.ai/gaudi: '1'
llm-deploy:
replica: 1
retriever-deploy:
replica: 1
resources:
limits:
cpu: '8'
memory: 2500Mi
requests:
cpu: '8'
memory: 2500Mi
aise-gaudi-01:
chatqna-backend-server-deploy:
replica: 1
resources:
limits:
cpu: '8'
memory: 4000Mi
requests:
cpu: '8'
memory: 4000Mi
embedding-dependency-deploy:
replica: 1
resources:
limits:
cpu: '80'
memory: 20000Mi
requests:
cpu: '80'
memory: 20000Mi
embedding-deploy:
replica: 1
llm-dependency-deploy:
replica: 8
resources:
limits:
habana.ai/gaudi: '1'
requests:
habana.ai/gaudi: '1'
llm-deploy:
replica: 1
prometheus-operator:
replica: 1
resources:
limits:
cpu: 200m
memory: 200Mi
requests:
cpu: 100m
memory: 100Mi
retriever-deploy:
replica: 1
resources:
limits:
cpu: '8'
memory: 2500Mi
requests:
cpu: '8'
memory: 2500Mi
aise-gaudi-02:
chatqna-backend-server-deploy:
replica: 1
resources:
limits:
cpu: '8'
memory: 4000Mi
requests:
cpu: '8'
memory: 4000Mi
embedding-dependency-deploy:
replica: 1
resources:
limits:
cpu: '80'
memory: 20000Mi
requests:
cpu: '80'
memory: 20000Mi
embedding-deploy:
replica: 1
llm-dependency-deploy:
replica: 8
resources:
limits:
habana.ai/gaudi: '1'
requests:
habana.ai/gaudi: '1'
llm-deploy:
replica: 1
retriever-deploy:
replica: 1
resources:
limits:
cpu: '8'
memory: 2500Mi
requests:
cpu: '8'
memory: 2500Mi
aise-gaudi-03:
chatqna-backend-server-deploy:
replica: 1
resources:
limits:
cpu: '8'
memory: 4000Mi
requests:
cpu: '8'
memory: 4000Mi
dataprep-deploy:
replica: 1
embedding-dependency-deploy:
replica: 1
resources:
limits:
cpu: '80'
memory: 20000Mi
requests:
cpu: '80'
memory: 20000Mi
embedding-deploy:
replica: 1
llm-dependency-deploy:
replica: 8
resources:
limits:
habana.ai/gaudi: '1'
requests:
habana.ai/gaudi: '1'
llm-deploy:
replica: 1
retriever-deploy:
replica: 1
resources:
limits:
cpu: '8'
memory: 2500Mi
requests:
cpu: '8'
memory: 2500Mi
vector-db:
replica: 1
```

View File

@@ -1,55 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
test_suite_config: # Overall configuration settings for the test suite
examples: ["chatqna"] # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
concurrent_level: 5 # The concurrency level, adjustable based on requirements
user_queries: ${USER_QUERIES} # Number of test requests at each concurrency level
random_prompt: false # Use random prompts if true, fixed prompts if false
run_time: 60m # The max total run time for the test suite
collect_service_metric: false # Collect service metrics if true, do not collect service metrics if false
data_visualization: false # Generate data visualization if true, do not generate data visualization if false
llm_model: "Intel/neural-chat-7b-v3-3" # The LLM model used for the test
test_output_dir: "${TEST_OUTPUT_DIR}" # The directory to store the test output
test_cases:
chatqna:
embedding:
run_test: false
service_name: "embedding-svc" # Replace with your service name
embedserve:
run_test: false
service_name: "embedding-dependency-svc" # Replace with your service name
retriever:
run_test: false
service_name: "retriever-svc" # Replace with your service name
parameters:
search_type: "similarity"
k: 4
fetch_k: 20
lambda_mult: 0.5
score_threshold: 0.2
reranking:
run_test: false
service_name: "reranking-svc" # Replace with your service name
parameters:
top_n: 1
rerankserve:
run_test: false
service_name: "reranking-dependency-svc" # Replace with your service name
llm:
run_test: false
service_name: "llm-svc" # Replace with your service name
parameters:
max_new_tokens: 128
temperature: 0.01
top_k: 10
top_p: 0.95
repetition_penalty: 1.03
streaming: true
llmserve:
run_test: false
service_name: "llm-dependency-svc" # Replace with your service name
e2e:
run_test: true
service_name: "chatqna-backend-server-svc" # Replace with your service name

View File

@@ -1,23 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: qna-config
namespace: default
data:
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
RERANK_MODEL_ID: BAAI/bge-reranker-base
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: {HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
NODE_SELECTOR: chatqna-opea
LLM_SERVICE_HOST_IP: llm-svc

View File

@@ -1,62 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: chatqna-backend-server-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: chatqna-backend-server-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: chatqna-backend-server-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:v0.9
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
args: null
ports:
- containerPort: 8888
resources:
limits:
cpu: 8
memory: 4000Mi
requests:
cpu: 8
memory: 4000Mi
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: chatqna-backend-server-svc
spec:
type: NodePort
selector:
app: chatqna-backend-server-deploy
ports:
- name: service
port: 8888
targetPort: 8888
nodePort: 30888

View File

@@ -1,70 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: dataprep-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: dataprep-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: dataprep-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: dataprep-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/dataprep-redis:v0.9
imagePullPolicy: IfNotPresent
name: dataprep-deploy
args: null
ports:
- containerPort: 6007
- containerPort: 6008
- containerPort: 6009
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: dataprep-svc
spec:
type: ClusterIP
selector:
app: dataprep-deploy
ports:
- name: port1
port: 6007
targetPort: 6007
- name: port2
port: 6008
targetPort: 6008
- name: port3
port: 6009
targetPort: 6009

View File

@@ -1,69 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
name: embedding-dependency-deploy
args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
requests:
cpu: 80
memory: 20000Mi
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: embedding-dependency-svc
spec:
type: ClusterIP
selector:
app: embedding-dependency-deploy
ports:
- name: service
port: 6006
targetPort: 80

View File

@@ -1,59 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: embedding-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: embedding-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/embedding-tei:v0.9
imagePullPolicy: IfNotPresent
name: embedding-deploy
args: null
ports:
- containerPort: 6000
resources:
limits:
cpu: 4
requests:
cpu: 4
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: embedding-svc
spec:
type: ClusterIP
selector:
app: embedding-deploy
ports:
- name: service
port: 6000
targetPort: 6000

View File

@@ -1,88 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-dependency-deploy
namespace: default
spec:
replicas: 31
selector:
matchLabels:
app: llm-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
name: llm-dependency-deploy-demo
securityContext:
capabilities:
add:
- SYS_NICE
args:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: llm-dependency-svc
spec:
type: ClusterIP
selector:
app: llm-dependency-deploy
ports:
- name: service
port: 9009
targetPort: 80

View File

@@ -1,59 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: llm-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: llm-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: llm-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/llm-tgi:v0.9
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null
ports:
- containerPort: 9000
resources:
limits:
cpu: 4
requests:
cpu: 4
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: llm-svc
spec:
type: ClusterIP
selector:
app: llm-deploy
ports:
- name: service
port: 9000
targetPort: 9000

View File

@@ -1,85 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-dependency-deploy
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: reranking-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-dependency-deploy
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:v0.9
name: reranking-dependency-deploy
args:
- --model-id
- $(RERANK_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: reranking-dependency-svc
spec:
type: ClusterIP
selector:
app: reranking-dependency-deploy
ports:
- name: service
port: 8808
targetPort: 80

View File

@@ -1,59 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: reranking-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: reranking-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: reranking-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: reranking-deploy
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: opea/reranking-tei:v0.9
imagePullPolicy: IfNotPresent
name: reranking-deploy
args: null
ports:
- containerPort: 8000
resources:
limits:
cpu: 4
requests:
cpu: 4
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: reranking-svc
spec:
type: ClusterIP
selector:
app: reranking-deploy
ports:
- name: service
port: 8000
targetPort: 8000

View File

@@ -1,69 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 4
selector:
matchLabels:
app: retriever-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: retriever-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: retriever-deploy
hostIPC: true
containers:
- env:
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: qna-config
key: REDIS_URL
- name: INDEX_NAME
valueFrom:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/retriever-redis:v0.9
imagePullPolicy: IfNotPresent
name: retriever-deploy
args: null
ports:
- containerPort: 7000
resources:
limits:
cpu: 8
memory: 2500Mi
requests:
cpu: 8
memory: 2500Mi
serviceAccountName: default
---
kind: Service
apiVersion: v1
metadata:
name: retriever-svc
spec:
type: ClusterIP
selector:
app: retriever-deploy
ports:
- name: service
port: 7000
targetPort: 7000

View File

@@ -1,48 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: vector-db
spec:
replicas: 1
selector:
matchLabels:
app: vector-db
template:
metadata:
labels:
app: vector-db
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: vector-db
containers:
- name: vector-db
image: redis/redis-stack:7.2.0-v9
ports:
- containerPort: 6379
- containerPort: 8001
---
apiVersion: v1
kind: Service
metadata:
name: vector-db
spec:
type: ClusterIP
selector:
app: vector-db
ports:
- name: vector-db-service
port: 6379
targetPort: 6379
- name: vector-db-insight
port: 8001
targetPort: 8001

View File

@@ -15,9 +15,7 @@ data:
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
NODE_SELECTOR: chatqna-opea
LLM_SERVICE_HOST_IP: llm-svc

View File

@@ -18,8 +18,6 @@ spec:
labels:
app: chatqna-backend-server-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
@@ -32,7 +30,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:v0.9
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
args: null
@@ -50,7 +48,7 @@ spec:
kind: Service
apiVersion: v1
metadata:
name: chatqna-backend-server-svc
name: chaqna-backend-server-svc
spec:
type: NodePort
selector:

View File

@@ -18,8 +18,6 @@ spec:
labels:
app: dataprep-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
@@ -40,7 +38,7 @@ spec:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/dataprep-redis:v0.9
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
args: null

View File

@@ -7,7 +7,7 @@ metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 1
replicas: 4
selector:
matchLabels:
app: embedding-dependency-deploy
@@ -18,13 +18,11 @@ spec:
labels:
app: embedding-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
name: embedding-dependency-deploy
args:
- --model-id
@@ -39,16 +37,16 @@ spec:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
cpu: 24
memory: 4000Mi
requests:
cpu: 80
memory: 20000Mi
cpu: 24
memory: 4000Mi
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
path: /home/sdp/cesg
type: Directory
- name: shm
emptyDir:

View File

@@ -18,8 +18,6 @@ spec:
labels:
app: embedding-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
@@ -32,7 +30,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/embedding-tei:v0.9
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
args: null

View File

@@ -18,14 +18,12 @@ spec:
labels:
app: llm-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
image: tgi_gaudi:2.0.1
name: llm-dependency-deploy-demo
securityContext:
capabilities:
@@ -38,10 +36,6 @@ spec:
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
@@ -62,12 +56,12 @@ spec:
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
value: $(HF_TOKEN)
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
path: /home/sdp/cesg
type: Directory
- name: shm
emptyDir:

View File

@@ -18,8 +18,6 @@ spec:
labels:
app: llm-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
@@ -32,7 +30,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/llm-tgi:v0.9
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null

View File

@@ -18,8 +18,6 @@ spec:
labels:
app: reranking-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
@@ -31,7 +29,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:v0.9
image: tei_gaudi:rerank
name: reranking-dependency-deploy
args:
- --model-id
@@ -57,14 +55,14 @@ spec:
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
value: $(HF_TOKEN)
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
path: /home/sdp/cesg
type: Directory
- name: shm
emptyDir:

View File

@@ -18,8 +18,6 @@ spec:
labels:
app: reranking-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
@@ -32,7 +30,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/reranking-tei:v0.9
image: opea/reranking-tei:latest
imagePullPolicy: IfNotPresent
name: reranking-deploy
args: null

View File

@@ -18,8 +18,6 @@ spec:
labels:
app: retriever-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
@@ -40,7 +38,7 @@ spec:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/retriever-redis:v0.9
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
args: null

View File

@@ -15,8 +15,6 @@ spec:
labels:
app: vector-db
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname

View File

@@ -15,9 +15,8 @@ data:
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
HUGGINGFACEHUB_API_TOKEN: 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
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
NODE_SELECTOR: chatqna-opea
LLM_SERVICE_HOST_IP: llm-svc

View File

@@ -7,7 +7,7 @@ metadata:
name: chatqna-backend-server-deploy
namespace: default
spec:
replicas: 2
replicas: 1
selector:
matchLabels:
app: chatqna-backend-server-deploy
@@ -18,8 +18,6 @@ spec:
labels:
app: chatqna-backend-server-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
@@ -32,7 +30,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/chatqna:v0.9
image: opea/chatqna:latest
imagePullPolicy: IfNotPresent
name: chatqna-backend-server-deploy
args: null
@@ -50,7 +48,7 @@ spec:
kind: Service
apiVersion: v1
metadata:
name: chatqna-backend-server-svc
name: chaqna-backend-server-svc
spec:
type: NodePort
selector:

View File

@@ -18,8 +18,6 @@ spec:
labels:
app: dataprep-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
@@ -40,7 +38,7 @@ spec:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/dataprep-redis:v0.9
image: opea/dataprep-redis:latest
imagePullPolicy: IfNotPresent
name: dataprep-deploy
args: null

View File

@@ -7,7 +7,7 @@ metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 2
replicas: 10
selector:
matchLabels:
app: embedding-dependency-deploy
@@ -18,13 +18,11 @@ spec:
labels:
app: embedding-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
name: embedding-dependency-deploy
args:
- --model-id
@@ -39,16 +37,16 @@ spec:
- containerPort: 80
resources:
limits:
cpu: 80
memory: 20000Mi
cpu: 24
memory: 4000Mi
requests:
cpu: 80
memory: 20000Mi
cpu: 24
memory: 4000Mi
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
path: /home/sdp/cesg
type: Directory
- name: shm
emptyDir:

View File

@@ -7,7 +7,7 @@ metadata:
name: embedding-deploy
namespace: default
spec:
replicas: 2
replicas: 1
selector:
matchLabels:
app: embedding-deploy
@@ -18,8 +18,6 @@ spec:
labels:
app: embedding-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
@@ -32,7 +30,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/embedding-tei:v0.9
image: opea/embedding-tei:latest
imagePullPolicy: IfNotPresent
name: embedding-deploy
args: null

View File

@@ -18,14 +18,12 @@ spec:
labels:
app: llm-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
hostIPC: true
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
image: tgi_gaudi:2.0.1
name: llm-dependency-deploy-demo
securityContext:
capabilities:
@@ -38,10 +36,6 @@ spec:
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
- '4096'
volumeMounts:
- mountPath: /data
name: model-volume
@@ -62,12 +56,12 @@ spec:
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
value: $(HF_TOKEN)
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
path: /home/sdp/cesg
type: Directory
- name: shm
emptyDir:

View File

@@ -7,7 +7,7 @@ metadata:
name: llm-deploy
namespace: default
spec:
replicas: 2
replicas: 1
selector:
matchLabels:
app: llm-deploy
@@ -18,8 +18,6 @@ spec:
labels:
app: llm-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
@@ -32,7 +30,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/llm-tgi:v0.9
image: opea/llm-tgi:latest
imagePullPolicy: IfNotPresent
name: llm-deploy
args: null

View File

@@ -18,8 +18,6 @@ spec:
labels:
app: reranking-dependency-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
@@ -31,7 +29,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:v0.9
image: tei_gaudi:rerank
name: reranking-dependency-deploy
args:
- --model-id
@@ -57,14 +55,14 @@ spec:
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: ${HF_TOKEN}
value: $(HF_TOKEN)
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /mnt/models
path: /home/sdp/cesg
type: Directory
- name: shm
emptyDir:

View File

@@ -7,7 +7,7 @@ metadata:
name: reranking-deploy
namespace: default
spec:
replicas: 2
replicas: 1
selector:
matchLabels:
app: reranking-deploy
@@ -18,8 +18,6 @@ spec:
labels:
app: reranking-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
@@ -32,7 +30,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: opea/reranking-tei:v0.9
image: opea/reranking-tei:latest
imagePullPolicy: IfNotPresent
name: reranking-deploy
args: null

View File

@@ -7,7 +7,7 @@ metadata:
name: retriever-deploy
namespace: default
spec:
replicas: 2
replicas: 1
selector:
matchLabels:
app: retriever-deploy
@@ -18,8 +18,6 @@ spec:
labels:
app: retriever-deploy
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
@@ -40,7 +38,7 @@ spec:
configMapKeyRef:
name: qna-config
key: INDEX_NAME
image: opea/retriever-redis:v0.9
image: opea/retriever-redis:latest
imagePullPolicy: IfNotPresent
name: retriever-deploy
args: null

View File

@@ -15,8 +15,6 @@ spec:
labels:
app: vector-db
spec:
nodeSelector:
node-type: chatqna-opea
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname

View File

@@ -1,33 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
FROM python:3.11-slim
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
libgl1-mesa-glx \
libjemalloc-dev \
vim \
git
RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/
WORKDIR /home/user/
RUN git clone https://github.com/opea-project/GenAIComps.git
WORKDIR /home/user/GenAIComps
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
COPY ./chatqna_without_rerank.py /home/user/chatqna_without_rerank.py
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
USER user
WORKDIR /home/user
ENTRYPOINT ["python", "chatqna_without_rerank.py"]

View File

@@ -160,7 +160,7 @@ Note: Please replace with `host_ip` with you external IP address, do not use loc
```bash
cd GenAIExamples/ChatQnA/docker/aipc/
TAG=v0.9 docker compose up -d
docker compose up -d
# let ollama service runs
# e.g. ollama run llama3

View File

@@ -1,6 +1,8 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
version: "3.8"
services:
redis-vector-db:
image: redis/redis-stack:7.2.0-v9
@@ -9,7 +11,7 @@ services:
- "6379:6379"
- "8001:8001"
dataprep-redis-service:
image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
image: opea/dataprep-redis:latest
container_name: dataprep-redis-server
depends_on:
- redis-vector-db
@@ -21,8 +23,6 @@ services:
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-server
@@ -37,7 +37,7 @@ services:
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
image: opea/embedding-tei:latest
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
@@ -49,9 +49,12 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-embedding-service"
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
image: opea/retriever-redis:latest
container_name: retriever-redis-server
depends_on:
- redis-vector-db
@@ -65,6 +68,9 @@ services:
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-retriever-service"
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -83,7 +89,7 @@ services:
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
image: opea/reranking-tei:latest
container_name: reranking-tei-aipc-server
depends_on:
- tei-reranking-service
@@ -98,9 +104,12 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-reranking-service"
restart: unless-stopped
llm:
image: ${REGISTRY:-opea}/llm-ollama
image: opea/llm-ollama
container_name: llm-ollama
ports:
- "9000:9000"
@@ -113,10 +122,12 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-llm-service"
OLLAMA_ENDPOINT: ${OLLAMA_ENDPOINT}
OLLAMA_MODEL: ${OLLAMA_MODEL}
chaqna-aipc-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
image: opea/chatqna:latest
container_name: chatqna-aipc-backend-server
depends_on:
- redis-vector-db
@@ -140,7 +151,7 @@ services:
ipc: host
restart: always
chaqna-aipc-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
image: opea/chatqna-ui:latest
container_name: chatqna-aipc-ui-server
depends_on:
- chaqna-aipc-backend-server

Some files were not shown because too many files have changed in this diff Show More