From bcaffd7db4c05da34692f6d3da8f4eafce928c28 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Mon, 21 Oct 2024 12:21:02 +0300 Subject: [PATCH] added more cases --- .../oob_four_gaudi_with_rerank.yaml | 24 ++++++++++ .../oob_four_gaudi_without_rerank.yaml | 21 ++++++++ .../oob_single_gaudi_with_rerank.yaml | 24 ++++++++++ .../oob_single_gaudi_without_rerank.yaml | 21 ++++++++ .../oob_two_gaudi_with_rerank.yaml | 24 ++++++++++ .../oob_two_gaudi_without_rerank.yaml | 21 ++++++++ .../tuned_four_gaudi_with_rerank.yaml | 48 +++++++++++++++++++ .../tuned_four_gaudi_without_rerank.yaml | 42 ++++++++++++++++ .../tuned_single_gaudi_with_rerank.yaml | 48 +++++++++++++++++++ .../tuned_single_gaudi_without_rerank.yaml | 42 ++++++++++++++++ .../tuned_two_gaudi_with_rerank.yaml | 48 +++++++++++++++++++ .../tuned_two_gaudi_without_rerank.yaml | 42 ++++++++++++++++ 12 files changed, 405 insertions(+) create mode 100644 ChatQnA/benchmark/performance/helm_charts/oob_four_gaudi_with_rerank.yaml create mode 100644 ChatQnA/benchmark/performance/helm_charts/oob_four_gaudi_without_rerank.yaml create mode 100644 ChatQnA/benchmark/performance/helm_charts/oob_single_gaudi_with_rerank.yaml create mode 100644 ChatQnA/benchmark/performance/helm_charts/oob_single_gaudi_without_rerank.yaml create mode 100644 ChatQnA/benchmark/performance/helm_charts/oob_two_gaudi_with_rerank.yaml create mode 100644 ChatQnA/benchmark/performance/helm_charts/oob_two_gaudi_without_rerank.yaml create mode 100644 ChatQnA/benchmark/performance/helm_charts/tuned_four_gaudi_with_rerank.yaml create mode 100644 ChatQnA/benchmark/performance/helm_charts/tuned_four_gaudi_without_rerank.yaml create mode 100644 ChatQnA/benchmark/performance/helm_charts/tuned_single_gaudi_with_rerank.yaml create mode 100644 ChatQnA/benchmark/performance/helm_charts/tuned_single_gaudi_without_rerank.yaml create mode 100644 ChatQnA/benchmark/performance/helm_charts/tuned_two_gaudi_with_rerank.yaml create mode 100644 ChatQnA/benchmark/performance/helm_charts/tuned_two_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/performance/helm_charts/oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance/helm_charts/oob_four_gaudi_with_rerank.yaml new file mode 100644 index 000000000..6b11d0024 --- /dev/null +++ b/ChatQnA/benchmark/performance/helm_charts/oob_four_gaudi_with_rerank.yaml @@ -0,0 +1,24 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +podSpecs: + - name: chatqna-backend-server-deploy + replicas: 4 + + - name: embedding-dependency-deploy + replicas: 4 + + - name: reranking-dependency-deploy + replicas: 1 + + - name: llm-dependency-deploy + replicas: 63 + + - name: dataprep-deploy + replicas: 1 + + - name: vector-db + replicas: 1 + + - name: retriever-deploy + replicas: 4 diff --git a/ChatQnA/benchmark/performance/helm_charts/oob_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance/helm_charts/oob_four_gaudi_without_rerank.yaml new file mode 100644 index 000000000..e65e3da52 --- /dev/null +++ b/ChatQnA/benchmark/performance/helm_charts/oob_four_gaudi_without_rerank.yaml @@ -0,0 +1,21 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +podSpecs: + - name: chatqna-backend-server-deploy + replicas: 4 + + - name: embedding-dependency-deploy + replicas: 4 + + - name: llm-dependency-deploy + replicas: 63 + + - name: dataprep-deploy + replicas: 1 + + - name: vector-db + replicas: 1 + + - name: retriever-deploy + replicas: 4 diff --git a/ChatQnA/benchmark/performance/helm_charts/oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance/helm_charts/oob_single_gaudi_with_rerank.yaml new file mode 100644 index 000000000..a8a3909cb --- /dev/null +++ b/ChatQnA/benchmark/performance/helm_charts/oob_single_gaudi_with_rerank.yaml @@ -0,0 +1,24 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +podSpecs: + - name: chatqna-backend-server-deploy + replicas: 2 + + - name: embedding-dependency-deploy + replicas: 2 + + - name: reranking-dependency-deploy + replicas: 1 + + - name: llm-dependency-deploy + replicas: 15 + + - name: dataprep-deploy + replicas: 1 + + - name: vector-db + replicas: 1 + + - name: retriever-deploy + replicas: 2 diff --git a/ChatQnA/benchmark/performance/helm_charts/oob_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance/helm_charts/oob_single_gaudi_without_rerank.yaml new file mode 100644 index 000000000..47ff36a89 --- /dev/null +++ b/ChatQnA/benchmark/performance/helm_charts/oob_single_gaudi_without_rerank.yaml @@ -0,0 +1,21 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +podSpecs: + - name: chatqna-backend-server-deploy + replicas: 2 + + - name: embedding-dependency-deploy + replicas: 2 + + - name: llm-dependency-deploy + replicas: 16 + + - name: dataprep-deploy + replicas: 1 + + - name: vector-db + replicas: 1 + + - name: retriever-deploy + replicas: 2 diff --git a/ChatQnA/benchmark/performance/helm_charts/oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance/helm_charts/oob_two_gaudi_with_rerank.yaml new file mode 100644 index 000000000..0fdf5edc4 --- /dev/null +++ b/ChatQnA/benchmark/performance/helm_charts/oob_two_gaudi_with_rerank.yaml @@ -0,0 +1,24 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +podSpecs: + - name: chatqna-backend-server-deploy + replicas: 2 + + - name: embedding-dependency-deploy + replicas: 2 + + - name: reranking-dependency-deploy + replicas: 1 + + - name: llm-dependency-deploy + replicas: 31 + + - name: dataprep-deploy + replicas: 1 + + - name: vector-db + replicas: 1 + + - name: retriever-deploy + replicas: 2 diff --git a/ChatQnA/benchmark/performance/helm_charts/oob_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance/helm_charts/oob_two_gaudi_without_rerank.yaml new file mode 100644 index 000000000..aeed536b1 --- /dev/null +++ b/ChatQnA/benchmark/performance/helm_charts/oob_two_gaudi_without_rerank.yaml @@ -0,0 +1,21 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +podSpecs: + - name: chatqna-backend-server-deploy + replicas: 2 + + - name: embedding-dependency-deploy + replicas: 2 + + - name: llm-dependency-deploy + replicas: 32 + + - name: dataprep-deploy + replicas: 1 + + - name: vector-db + replicas: 1 + + - name: retriever-deploy + replicas: 2 diff --git a/ChatQnA/benchmark/performance/helm_charts/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance/helm_charts/tuned_four_gaudi_with_rerank.yaml new file mode 100644 index 000000000..402ada89d --- /dev/null +++ b/ChatQnA/benchmark/performance/helm_charts/tuned_four_gaudi_with_rerank.yaml @@ -0,0 +1,48 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +podSpecs: + - name: chatqna-backend-server-deploy + replicas: 4 + resources: + limits: + cpu: "16" + memory: "8000Mi" + requests: + cpu: "16" + memory: "8000Mi" + + - name: embedding-dependency-deploy + replicas: 4 + resources: + limits: + cpu: "80" + memory: "20000Mi" + requests: + cpu: "80" + memory: "20000Mi" + + - name: reranking-dependency-deploy + replicas: 1 + resources: + limits: + habana.ai/gaudi: 1 + + - name: llm-dependency-deploy + replicas: 63 + resources: + limits: + habana.ai/gaudi: 1 + + - name: dataprep-deploy + replicas: 1 + + - name: vector-db + replicas: 1 + + - name: retriever-deploy + replicas: 4 + resources: + requests: + cpu: "16" + memory: "4000Mi" diff --git a/ChatQnA/benchmark/performance/helm_charts/tuned_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance/helm_charts/tuned_four_gaudi_without_rerank.yaml new file mode 100644 index 000000000..9f16f91c8 --- /dev/null +++ b/ChatQnA/benchmark/performance/helm_charts/tuned_four_gaudi_without_rerank.yaml @@ -0,0 +1,42 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +podSpecs: + - name: chatqna-backend-server-deploy + replicas: 4 + resources: + limits: + cpu: "16" + memory: "8000Mi" + requests: + cpu: "16" + memory: "8000Mi" + + - name: embedding-dependency-deploy + replicas: 4 + resources: + limits: + cpu: "80" + memory: "20000Mi" + requests: + cpu: "80" + memory: "20000Mi" + + - name: llm-dependency-deploy + replicas: 64 + resources: + limits: + habana.ai/gaudi: 1 + + - name: dataprep-deploy + replicas: 1 + + - name: vector-db + replicas: 1 + + - name: retriever-deploy + replicas: 4 + resources: + requests: + cpu: "16" + memory: "4000Mi" diff --git a/ChatQnA/benchmark/performance/helm_charts/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance/helm_charts/tuned_single_gaudi_with_rerank.yaml new file mode 100644 index 000000000..e36ab3ff7 --- /dev/null +++ b/ChatQnA/benchmark/performance/helm_charts/tuned_single_gaudi_with_rerank.yaml @@ -0,0 +1,48 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +podSpecs: + - name: chatqna-backend-server-deploy + replicas: 2 + resources: + limits: + cpu: "16" + memory: "8000Mi" + requests: + cpu: "16" + memory: "8000Mi" + + - name: embedding-dependency-deploy + replicas: 1 + resources: + limits: + cpu: "80" + memory: "20000Mi" + requests: + cpu: "80" + memory: "20000Mi" + + - name: reranking-dependency-deploy + replicas: 1 + resources: + limits: + habana.ai/gaudi: 1 + + - name: llm-dependency-deploy + replicas: 7 + resources: + limits: + habana.ai/gaudi: 1 + + - name: dataprep-deploy + replicas: 1 + + - name: vector-db + replicas: 1 + + - name: retriever-deploy + replicas: 2 + resources: + requests: + cpu: "16" + memory: "4000Mi" diff --git a/ChatQnA/benchmark/performance/helm_charts/tuned_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance/helm_charts/tuned_single_gaudi_without_rerank.yaml new file mode 100644 index 000000000..4743394fe --- /dev/null +++ b/ChatQnA/benchmark/performance/helm_charts/tuned_single_gaudi_without_rerank.yaml @@ -0,0 +1,42 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +podSpecs: + - name: chatqna-backend-server-deploy + replicas: 2 + resources: + limits: + cpu: "16" + memory: "8000Mi" + requests: + cpu: "16" + memory: "8000Mi" + + - name: embedding-dependency-deploy + replicas: 1 + resources: + limits: + cpu: "80" + memory: "20000Mi" + requests: + cpu: "80" + memory: "20000Mi" + + - name: llm-dependency-deploy + replicas: 8 + resources: + limits: + habana.ai/gaudi: 1 + + - name: dataprep-deploy + replicas: 1 + + - name: vector-db + replicas: 1 + + - name: retriever-deploy + replicas: 2 + resources: + requests: + cpu: "16" + memory: "4000Mi" diff --git a/ChatQnA/benchmark/performance/helm_charts/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance/helm_charts/tuned_two_gaudi_with_rerank.yaml new file mode 100644 index 000000000..558f6c7b0 --- /dev/null +++ b/ChatQnA/benchmark/performance/helm_charts/tuned_two_gaudi_with_rerank.yaml @@ -0,0 +1,48 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +podSpecs: + - name: chatqna-backend-server-deploy + replicas: 2 + resources: + limits: + cpu: "16" + memory: "8000Mi" + requests: + cpu: "16" + memory: "8000Mi" + + - name: embedding-dependency-deploy + replicas: 2 + resources: + limits: + cpu: "80" + memory: "20000Mi" + requests: + cpu: "80" + memory: "20000Mi" + + - name: reranking-dependency-deploy + replicas: 1 + resources: + limits: + habana.ai/gaudi: 1 + + - name: llm-dependency-deploy + replicas: 15 + resources: + limits: + habana.ai/gaudi: 1 + + - name: dataprep-deploy + replicas: 1 + + - name: vector-db + replicas: 1 + + - name: retriever-deploy + replicas: 2 + resources: + requests: + cpu: "16" + memory: "4000Mi" diff --git a/ChatQnA/benchmark/performance/helm_charts/tuned_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance/helm_charts/tuned_two_gaudi_without_rerank.yaml new file mode 100644 index 000000000..324a23ada --- /dev/null +++ b/ChatQnA/benchmark/performance/helm_charts/tuned_two_gaudi_without_rerank.yaml @@ -0,0 +1,42 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +podSpecs: + - name: chatqna-backend-server-deploy + replicas: 2 + resources: + limits: + cpu: "16" + memory: "8000Mi" + requests: + cpu: "16" + memory: "8000Mi" + + - name: embedding-dependency-deploy + replicas: 2 + resources: + limits: + cpu: "80" + memory: "20000Mi" + requests: + cpu: "80" + memory: "20000Mi" + + - name: llm-dependency-deploy + replicas: 16 + resources: + limits: + habana.ai/gaudi: 1 + + - name: dataprep-deploy + replicas: 1 + + - name: vector-db + replicas: 1 + + - name: retriever-deploy + replicas: 2 + resources: + requests: + cpu: "16" + memory: "4000Mi"