From 1046aad26f9c55b3ec54a229b5a6ac8545f708a4 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Wed, 23 Oct 2024 09:30:03 +0300 Subject: [PATCH] removed benchmark template --- .../performance/helm_charts/.helmignore | 23 -- .../performance/helm_charts/Chart.yaml | 27 --- .../performance/helm_charts/README.md | 25 --- .../performance/helm_charts/customize.yaml | 23 -- .../helm_charts/templates/configmap.yaml | 25 --- .../helm_charts/templates/deployment.yaml | 131 ------------ .../helm_charts/templates/service.yaml | 24 --- .../performance/helm_charts/values.yaml | 200 ------------------ .../performance/helm_charts/deployment.py | 4 +- .../performance/helm_charts/.helmignore | 23 -- .../performance/helm_charts/Chart.yaml | 27 --- .../performance/helm_charts/README.md | 25 --- .../performance/helm_charts/customize.yaml | 23 -- .../helm_charts/templates/configmap.yaml | 16 -- .../helm_charts/templates/deployment.yaml | 131 ------------ .../helm_charts/templates/service.yaml | 24 --- .../performance/helm_charts/values.yaml | 102 --------- .../performance/helm_charts/.helmignore | 23 -- .../performance/helm_charts/Chart.yaml | 27 --- .../performance/helm_charts/README.md | 25 --- .../performance/helm_charts/customize.yaml | 23 -- .../helm_charts/templates/configmap.yaml | 24 --- .../helm_charts/templates/deployment.yaml | 131 ------------ .../helm_charts/templates/service.yaml | 24 --- .../performance/helm_charts/values.yaml | 84 -------- 25 files changed, 2 insertions(+), 1212 deletions(-) delete mode 100644 AudioQnA/benchmark/performance/helm_charts/.helmignore delete mode 100644 AudioQnA/benchmark/performance/helm_charts/Chart.yaml delete mode 100644 AudioQnA/benchmark/performance/helm_charts/README.md delete mode 100644 AudioQnA/benchmark/performance/helm_charts/customize.yaml delete mode 100644 AudioQnA/benchmark/performance/helm_charts/templates/configmap.yaml delete mode 100644 AudioQnA/benchmark/performance/helm_charts/templates/deployment.yaml delete mode 100644 AudioQnA/benchmark/performance/helm_charts/templates/service.yaml delete mode 100644 AudioQnA/benchmark/performance/helm_charts/values.yaml delete mode 100644 FaqGen/benchmark/performance/helm_charts/.helmignore delete mode 100644 FaqGen/benchmark/performance/helm_charts/Chart.yaml delete mode 100644 FaqGen/benchmark/performance/helm_charts/README.md delete mode 100644 FaqGen/benchmark/performance/helm_charts/customize.yaml delete mode 100644 FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml delete mode 100644 FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml delete mode 100644 FaqGen/benchmark/performance/helm_charts/templates/service.yaml delete mode 100644 FaqGen/benchmark/performance/helm_charts/values.yaml delete mode 100644 VisualQnA/benchmark/performance/helm_charts/.helmignore delete mode 100644 VisualQnA/benchmark/performance/helm_charts/Chart.yaml delete mode 100644 VisualQnA/benchmark/performance/helm_charts/README.md delete mode 100644 VisualQnA/benchmark/performance/helm_charts/customize.yaml delete mode 100644 VisualQnA/benchmark/performance/helm_charts/templates/configmap.yaml delete mode 100644 VisualQnA/benchmark/performance/helm_charts/templates/deployment.yaml delete mode 100644 VisualQnA/benchmark/performance/helm_charts/templates/service.yaml delete mode 100644 VisualQnA/benchmark/performance/helm_charts/values.yaml diff --git a/AudioQnA/benchmark/performance/helm_charts/.helmignore b/AudioQnA/benchmark/performance/helm_charts/.helmignore deleted file mode 100644 index 0e8a0eb36..000000000 --- a/AudioQnA/benchmark/performance/helm_charts/.helmignore +++ /dev/null @@ -1,23 +0,0 @@ -# Patterns to ignore when building packages. -# This supports shell glob matching, relative path matching, and -# negation (prefixed with !). Only one pattern per line. -.DS_Store -# Common VCS dirs -.git/ -.gitignore -.bzr/ -.bzrignore -.hg/ -.hgignore -.svn/ -# Common backup files -*.swp -*.bak -*.tmp -*.orig -*~ -# Various IDEs -.project -.idea/ -*.tmproj -.vscode/ diff --git a/AudioQnA/benchmark/performance/helm_charts/Chart.yaml b/AudioQnA/benchmark/performance/helm_charts/Chart.yaml deleted file mode 100644 index 67f950087..000000000 --- a/AudioQnA/benchmark/performance/helm_charts/Chart.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v2 -name: audioqna-charts -description: A Helm chart for Kubernetes - -# A chart can be either an 'application' or a 'library' chart. -# -# Application charts are a collection of templates that can be packaged into versioned archives -# to be deployed. -# -# Library charts provide useful utilities or functions for the chart developer. They're included as -# a dependency of application charts to inject those utilities and functions into the rendering -# pipeline. Library charts do not define any templates and therefore cannot be deployed. -type: application - -# This is the chart version. This version number should be incremented each time you make changes -# to the chart and its templates, including the app version. -# Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 1.0 - -# This is the version number of the application being deployed. This version number should be -# incremented each time you make changes to the application. Versions are not expected to -# follow Semantic Versioning. They should reflect the version the application is using. -# It is recommended to use it with quotes. -appVersion: "1.16.0" diff --git a/AudioQnA/benchmark/performance/helm_charts/README.md b/AudioQnA/benchmark/performance/helm_charts/README.md deleted file mode 100644 index 3b19eb373..000000000 --- a/AudioQnA/benchmark/performance/helm_charts/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# Benchmarking Deployment - -This document guides you through deploying this example pipeline using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources. - -## Getting Started - -### Preparation - -```bash -# on k8s-master node -cd GenAIExamples/{example_name}/benchmark/performance/helm_charts - -# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token: -# vim values.yaml -HUGGINGFACEHUB_API_TOKEN: hf_xxxxx -``` - -### Deployment - -```bash -# Deploy the pipeline -helm install {example_name} . -``` - -Note: Currently we only support the HPU version, because only HPU values.yaml is provided here. diff --git a/AudioQnA/benchmark/performance/helm_charts/customize.yaml b/AudioQnA/benchmark/performance/helm_charts/customize.yaml deleted file mode 100644 index 43e5dec83..000000000 --- a/AudioQnA/benchmark/performance/helm_charts/customize.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -podSpecs: - - name: audioqna-backend-server-deploy - replicas: 1 - - - name: asr-deploy - replicas: 1 - - - name: whisper-deploy - replicas: 1 - - - - name: tts-deploy - replicas: 1 - - - name: speecht5-deploy - replicas: 1 - - - - name: llm-dependency-deploy - replicas: 1 diff --git a/AudioQnA/benchmark/performance/helm_charts/templates/configmap.yaml b/AudioQnA/benchmark/performance/helm_charts/templates/configmap.yaml deleted file mode 100644 index 7aa21eb20..000000000 --- a/AudioQnA/benchmark/performance/helm_charts/templates/configmap.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ .Values.config.CONFIG_MAP_NAME }} - namespace: default -data: - HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }} - LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }} - NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }} - TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010 - - ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066 - TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006 - MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc - ASR_SERVICE_HOST_IP: asr-svc - ASR_SERVICE_PORT: "3001" - LLM_SERVICE_HOST_IP: llm-svc - LLM_SERVICE_PORT: "3007" - TTS_SERVICE_HOST_IP: tts-svc - TTS_SERVICE_PORT: "3002" ---- diff --git a/AudioQnA/benchmark/performance/helm_charts/templates/deployment.yaml b/AudioQnA/benchmark/performance/helm_charts/templates/deployment.yaml deleted file mode 100644 index 4fe4fc581..000000000 --- a/AudioQnA/benchmark/performance/helm_charts/templates/deployment.yaml +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -{{- $global := .Values }} -{{- range $microservice := .Values.microservices }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ $microservice.name }} - namespace: default -spec: - {{- $replicas := $microservice.replicas }} - {{- range $podSpec := $global.podSpecs }} - {{- if eq $podSpec.name $microservice.name }} - {{- $replicas = $podSpec.replicas | default $microservice.replicas }} - {{- end }} - {{- end }} - replicas: {{ $replicas }} - - selector: - matchLabels: - app: {{ $microservice.name }} - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: {{ $microservice.name }} - spec: - containers: - - envFrom: - - configMapRef: - name: {{ $global.config.CONFIG_MAP_NAME }} - {{- if $microservice.args }} - args: - {{- range $arg := $microservice.args }} - {{- if $arg.name }} - - {{ $arg.name }} - {{- end }} - {{- if $arg.value }} - - "{{ $arg.value }}" - {{- end }} - {{- end }} - {{- end }} - - {{- if $microservice.env }} - env: - {{- range $env := $microservice.env }} - - name: {{ $env.name }} - value: "{{ $env.value }}" - {{- end }} - {{- end }} - - {{- $image := $microservice.image }} - {{- range $podSpec := $global.podSpecs }} - {{- if eq $podSpec.name $microservice.name }} - {{- $image = $podSpec.image | default $microservice.image }} - {{- end }} - {{- end }} - image: {{ $image }} - - imagePullPolicy: IfNotPresent - name: {{ $microservice.name }} - - {{- if $microservice.ports }} - ports: - {{- range $port := $microservice.ports }} - {{- range $port_name, $port_id := $port }} - - {{ $port_name }}: {{ $port_id }} - {{- end }} - {{- end }} - {{- end }} - - {{- $resources := $microservice.resources }} - {{- range $podSpec := $global.podSpecs }} - {{- if eq $podSpec.name $microservice.name }} - {{- if $podSpec.resources }} - {{- $resources = $podSpec.resources }} - {{- end }} - {{- end }} - {{- end }} - - {{- if $resources }} - resources: - {{- range $resourceType, $resource := $resources }} - {{ $resourceType }}: - {{- range $limitType, $limit := $resource }} - {{ $limitType }}: {{ $limit }} - {{- end }} - {{- end }} - {{- end }} - - {{- if $microservice.volumeMounts }} - volumeMounts: - {{- range $volumeMount := $microservice.volumeMounts }} - - mountPath: {{ $volumeMount.mountPath }} - name: {{ $volumeMount.name }} - {{- end }} - {{- end }} - - hostIPC: true - nodeSelector: - node-type: {{ $global.config.NODE_SELECTOR }} - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: {{ $microservice.name }} - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - - - {{- if $microservice.volumes }} - volumes: - {{- range $index, $volume := $microservice.volumes }} - - name: {{ $volume.name }} - {{- if $volume.hostPath }} - hostPath: - path: {{ $volume.hostPath.path }} - type: {{ $volume.hostPath.type }} - {{- else if $volume.emptyDir }} - emptyDir: - medium: {{ $volume.emptyDir.medium }} - sizeLimit: {{ $volume.emptyDir.sizeLimit }} - {{- end }} - {{- end }} - {{- end }} - ---- -{{- end }} diff --git a/AudioQnA/benchmark/performance/helm_charts/templates/service.yaml b/AudioQnA/benchmark/performance/helm_charts/templates/service.yaml deleted file mode 100644 index 5a5896921..000000000 --- a/AudioQnA/benchmark/performance/helm_charts/templates/service.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -{{- range $service := .Values.services }} -apiVersion: v1 -kind: Service -metadata: - name: {{ $service.name }} - namespace: default -spec: - ports: - {{- range $port := $service.spec.ports }} - - name: {{ $port.name }} - {{- range $port_name, $port_id := $port }} - {{- if ne $port_name "name"}} - {{ $port_name }}: {{ $port_id }} - {{- end }} - {{- end }} - {{- end }} - selector: - app: {{ $service.spec.selector.app }} - type: {{ $service.spec.type }} ---- -{{- end }} diff --git a/AudioQnA/benchmark/performance/helm_charts/values.yaml b/AudioQnA/benchmark/performance/helm_charts/values.yaml deleted file mode 100644 index fd62df479..000000000 --- a/AudioQnA/benchmark/performance/helm_charts/values.yaml +++ /dev/null @@ -1,200 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -namespace: default - -config: - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - CONFIG_MAP_NAME: audio-qna-config - NODE_SELECTOR: opea - ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066 - TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006 - MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc - ASR_SERVICE_HOST_IP: asr-svc - ASR_SERVICE_PORT: "3001" - LLM_SERVICE_HOST_IP: llm-svc - LLM_SERVICE_PORT: "3007" - TTS_SERVICE_HOST_IP: tts-svc - TTS_SERVICE_PORT: "3002" - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - -microservices: - - name: audioqna-backend-server-deploy - image: opea/audioqna:latest - replicas: 1 - ports: - - containerPort: 8888 - - - name: asr-deploy - image: opea/asr:latest - replicas: 1 - ports: - - containerPort: 9099 - - - name: whisper-deploy - image: opea/whisper-gaudi:latest - replicas: 1 - ports: - - containerPort: 7066 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - resources: - limits: - habana.ai/gaudi: 1 - - - name: tts-deploy - image: opea/tts:latest - replicas: 1 - ports: - - containerPort: 9088 - - - name: speecht5-deploy - image: opea/speecht5-gaudi:latest - replicas: 1 - ports: - - containerPort: 7055 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - resources: - limits: - habana.ai/gaudi: 1 - - - name: llm-deploy - image: opea/llm-tgi:latest - replicas: 1 - ports: - - containerPort: 9000 - - - name: llm-dependency-deploy - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 - replicas: 1 - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - args: - - name: "--model-id" - value: $(LLM_MODEL_ID) - - name: "--max-input-length" - value: "2048" - - name: "--max-total-tokens" - value: "4096" - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: "true" - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: ENABLE_HPU_GRAPH - value: 'true' - - name: LIMIT_HPU_GRAPH - value: 'true' - - name: USE_FLASH_ATTENTION - value: 'true' - - name: FLASH_ATTENTION_RECOMPUTE - value: 'true' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm - -services: - - name: asr-svc - spec: - ports: - - name: service - port: 3001 - targetPort: 9099 - selector: - app: asr-deploy - type: ClusterIP - - - name: whisper-svc - spec: - ports: - - name: service - port: 7066 - targetPort: 7066 - selector: - app: whisper-deploy - type: ClusterIP - - - name: tts-svc - spec: - ports: - - name: service - port: 3002 - targetPort: 9088 - selector: - app: tts-deploy - type: ClusterIP - - - name: speecht5-svc - spec: - ports: - - name: service - port: 7055 - targetPort: 7055 - selector: - app: speecht5-deploy - type: ClusterIP - - - name: llm-dependency-svc - spec: - ports: - - name: service - port: 3006 - targetPort: 80 - selector: - app: llm-dependency-deploy - type: ClusterIP - - - name: llm-svc - spec: - ports: - - name: service - port: 3007 - targetPort: 9000 - selector: - app: llm-deploy - type: ClusterIP - - - name: audioqna-backend-server-svc - spec: - ports: - - name: service - port: 3088 - targetPort: 8888 - nodePort: 30666 - selector: - app: audioqna-backend-server-deploy - type: NodePort diff --git a/ChatQnA/benchmark/performance/helm_charts/deployment.py b/ChatQnA/benchmark/performance/helm_charts/deployment.py index 0c7649821..cec903e1d 100644 --- a/ChatQnA/benchmark/performance/helm_charts/deployment.py +++ b/ChatQnA/benchmark/performance/helm_charts/deployment.py @@ -59,7 +59,7 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"): else None ), {"name": "llm-dependency-deploy", "resources": {"limits": {"habana.ai/gaudi": 1}}}, - {"name": "retriever-deploy", "resources": {"requests": {"cpu": "16", "memory": "8000Mi"}}}, + {"name": "retriever-deploy", "resources": {"requests": {"cpu": "8", "memory": "8000Mi"}}}, ] replicas = [replica for replica in replicas if replica] @@ -72,7 +72,7 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"): {"name": "--model-id", "value": "$(LLM_MODEL_ID)"}, {"name": "--max-input-length", "value": 1280}, {"name": "--max-total-tokens", "value": 2048}, - {"name": "--max-batch-total-tokens", "value": 35536}, + {"name": "--max-batch-total-tokens", "value": 65536}, {"name": "--max-batch-prefill-tokens", "value": 4096}, ], }, diff --git a/FaqGen/benchmark/performance/helm_charts/.helmignore b/FaqGen/benchmark/performance/helm_charts/.helmignore deleted file mode 100644 index 0e8a0eb36..000000000 --- a/FaqGen/benchmark/performance/helm_charts/.helmignore +++ /dev/null @@ -1,23 +0,0 @@ -# Patterns to ignore when building packages. -# This supports shell glob matching, relative path matching, and -# negation (prefixed with !). Only one pattern per line. -.DS_Store -# Common VCS dirs -.git/ -.gitignore -.bzr/ -.bzrignore -.hg/ -.hgignore -.svn/ -# Common backup files -*.swp -*.bak -*.tmp -*.orig -*~ -# Various IDEs -.project -.idea/ -*.tmproj -.vscode/ diff --git a/FaqGen/benchmark/performance/helm_charts/Chart.yaml b/FaqGen/benchmark/performance/helm_charts/Chart.yaml deleted file mode 100644 index a810cf937..000000000 --- a/FaqGen/benchmark/performance/helm_charts/Chart.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v2 -name: faqgen-charts -description: A Helm chart for Kubernetes - -# A chart can be either an 'application' or a 'library' chart. -# -# Application charts are a collection of templates that can be packaged into versioned archives -# to be deployed. -# -# Library charts provide useful utilities or functions for the chart developer. They're included as -# a dependency of application charts to inject those utilities and functions into the rendering -# pipeline. Library charts do not define any templates and therefore cannot be deployed. -type: application - -# This is the chart version. This version number should be incremented each time you make changes -# to the chart and its templates, including the app version. -# Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 1.0 - -# This is the version number of the application being deployed. This version number should be -# incremented each time you make changes to the application. Versions are not expected to -# follow Semantic Versioning. They should reflect the version the application is using. -# It is recommended to use it with quotes. -appVersion: "1.16.0" diff --git a/FaqGen/benchmark/performance/helm_charts/README.md b/FaqGen/benchmark/performance/helm_charts/README.md deleted file mode 100644 index 3b19eb373..000000000 --- a/FaqGen/benchmark/performance/helm_charts/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# Benchmarking Deployment - -This document guides you through deploying this example pipeline using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources. - -## Getting Started - -### Preparation - -```bash -# on k8s-master node -cd GenAIExamples/{example_name}/benchmark/performance/helm_charts - -# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token: -# vim values.yaml -HUGGINGFACEHUB_API_TOKEN: hf_xxxxx -``` - -### Deployment - -```bash -# Deploy the pipeline -helm install {example_name} . -``` - -Note: Currently we only support the HPU version, because only HPU values.yaml is provided here. diff --git a/FaqGen/benchmark/performance/helm_charts/customize.yaml b/FaqGen/benchmark/performance/helm_charts/customize.yaml deleted file mode 100644 index 01388e66a..000000000 --- a/FaqGen/benchmark/performance/helm_charts/customize.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -podSpecs: - - name: faq-mega-server-deploy - replicas: 2 - resources: - limits: - cpu: "8" - memory: "8000Mi" - requests: - cpu: "8" - memory: "8000Mi" - - - - name: faq-tgi-deploy - replicas: 7 - resources: - limits: - habana.ai/gaudi: 1 - - - name: faq-micro-deploy - replicas: 1 diff --git a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml deleted file mode 100644 index df3e61d20..000000000 --- a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ .Values.config.CONFIG_MAP_NAME }} - namespace: default -data: - HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }} - LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }} - NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }} - TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010 - LLM_SERVICE_HOST_IP: faq-micro-svc - MEGA_SERVICE_HOST_IP: faq-mega-server-svc ---- diff --git a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml deleted file mode 100644 index 4fe4fc581..000000000 --- a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -{{- $global := .Values }} -{{- range $microservice := .Values.microservices }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ $microservice.name }} - namespace: default -spec: - {{- $replicas := $microservice.replicas }} - {{- range $podSpec := $global.podSpecs }} - {{- if eq $podSpec.name $microservice.name }} - {{- $replicas = $podSpec.replicas | default $microservice.replicas }} - {{- end }} - {{- end }} - replicas: {{ $replicas }} - - selector: - matchLabels: - app: {{ $microservice.name }} - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: {{ $microservice.name }} - spec: - containers: - - envFrom: - - configMapRef: - name: {{ $global.config.CONFIG_MAP_NAME }} - {{- if $microservice.args }} - args: - {{- range $arg := $microservice.args }} - {{- if $arg.name }} - - {{ $arg.name }} - {{- end }} - {{- if $arg.value }} - - "{{ $arg.value }}" - {{- end }} - {{- end }} - {{- end }} - - {{- if $microservice.env }} - env: - {{- range $env := $microservice.env }} - - name: {{ $env.name }} - value: "{{ $env.value }}" - {{- end }} - {{- end }} - - {{- $image := $microservice.image }} - {{- range $podSpec := $global.podSpecs }} - {{- if eq $podSpec.name $microservice.name }} - {{- $image = $podSpec.image | default $microservice.image }} - {{- end }} - {{- end }} - image: {{ $image }} - - imagePullPolicy: IfNotPresent - name: {{ $microservice.name }} - - {{- if $microservice.ports }} - ports: - {{- range $port := $microservice.ports }} - {{- range $port_name, $port_id := $port }} - - {{ $port_name }}: {{ $port_id }} - {{- end }} - {{- end }} - {{- end }} - - {{- $resources := $microservice.resources }} - {{- range $podSpec := $global.podSpecs }} - {{- if eq $podSpec.name $microservice.name }} - {{- if $podSpec.resources }} - {{- $resources = $podSpec.resources }} - {{- end }} - {{- end }} - {{- end }} - - {{- if $resources }} - resources: - {{- range $resourceType, $resource := $resources }} - {{ $resourceType }}: - {{- range $limitType, $limit := $resource }} - {{ $limitType }}: {{ $limit }} - {{- end }} - {{- end }} - {{- end }} - - {{- if $microservice.volumeMounts }} - volumeMounts: - {{- range $volumeMount := $microservice.volumeMounts }} - - mountPath: {{ $volumeMount.mountPath }} - name: {{ $volumeMount.name }} - {{- end }} - {{- end }} - - hostIPC: true - nodeSelector: - node-type: {{ $global.config.NODE_SELECTOR }} - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: {{ $microservice.name }} - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - - - {{- if $microservice.volumes }} - volumes: - {{- range $index, $volume := $microservice.volumes }} - - name: {{ $volume.name }} - {{- if $volume.hostPath }} - hostPath: - path: {{ $volume.hostPath.path }} - type: {{ $volume.hostPath.type }} - {{- else if $volume.emptyDir }} - emptyDir: - medium: {{ $volume.emptyDir.medium }} - sizeLimit: {{ $volume.emptyDir.sizeLimit }} - {{- end }} - {{- end }} - {{- end }} - ---- -{{- end }} diff --git a/FaqGen/benchmark/performance/helm_charts/templates/service.yaml b/FaqGen/benchmark/performance/helm_charts/templates/service.yaml deleted file mode 100644 index 5a5896921..000000000 --- a/FaqGen/benchmark/performance/helm_charts/templates/service.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -{{- range $service := .Values.services }} -apiVersion: v1 -kind: Service -metadata: - name: {{ $service.name }} - namespace: default -spec: - ports: - {{- range $port := $service.spec.ports }} - - name: {{ $port.name }} - {{- range $port_name, $port_id := $port }} - {{- if ne $port_name "name"}} - {{ $port_name }}: {{ $port_id }} - {{- end }} - {{- end }} - {{- end }} - selector: - app: {{ $service.spec.selector.app }} - type: {{ $service.spec.type }} ---- -{{- end }} diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml deleted file mode 100644 index 1548a7eab..000000000 --- a/FaqGen/benchmark/performance/helm_charts/values.yaml +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -namespace: default - -config: - LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct - CONFIG_MAP_NAME: faq-config - NODE_SELECTOR: opea - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - -microservices: - - name: faq-mega-server-deploy - image: opea/chatqna:latest - replicas: 1 - ports: - - containerPort: 7777 - - - name: faq-micro-deploy - image: opea/llm-faqgen-tgi:latest - replicas: 1 - ports: - - containerPort: 9000 - - - name: faq-tgi-deploy - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 - replicas: 1 - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - args: - - name: "--model-id" - value: $(LLM_MODEL_ID) - - name: "--max-input-length" - value: "2048" - - name: "--max-total-tokens" - value: "4096" - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: "true" - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: ENABLE_HPU_GRAPH - value: 'true' - - name: LIMIT_HPU_GRAPH - value: 'true' - - name: USE_FLASH_ATTENTION - value: 'true' - - name: FLASH_ATTENTION_RECOMPUTE - value: 'true' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm - -services: - - name: faq-micro-svc - spec: - ports: - - name: service - port: 9003 - targetPort: 9000 - selector: - app: faq-micro-deploy - type: ClusterIP - - - name: faq-tgi-svc - spec: - ports: - - name: service - port: 8010 - targetPort: 80 - selector: - app: faq-tgi-deploy - type: ClusterIP - - - name: faq-mega-server-svc - spec: - ports: - - name: service - port: 7779 - targetPort: 7777 - nodePort: 30779 - selector: - app: faq-mega-server-deploy - type: NodePort diff --git a/VisualQnA/benchmark/performance/helm_charts/.helmignore b/VisualQnA/benchmark/performance/helm_charts/.helmignore deleted file mode 100644 index 0e8a0eb36..000000000 --- a/VisualQnA/benchmark/performance/helm_charts/.helmignore +++ /dev/null @@ -1,23 +0,0 @@ -# Patterns to ignore when building packages. -# This supports shell glob matching, relative path matching, and -# negation (prefixed with !). Only one pattern per line. -.DS_Store -# Common VCS dirs -.git/ -.gitignore -.bzr/ -.bzrignore -.hg/ -.hgignore -.svn/ -# Common backup files -*.swp -*.bak -*.tmp -*.orig -*~ -# Various IDEs -.project -.idea/ -*.tmproj -.vscode/ diff --git a/VisualQnA/benchmark/performance/helm_charts/Chart.yaml b/VisualQnA/benchmark/performance/helm_charts/Chart.yaml deleted file mode 100644 index 5f6695c8c..000000000 --- a/VisualQnA/benchmark/performance/helm_charts/Chart.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v2 -name: visualqna-charts -description: A Helm chart for Kubernetes - -# A chart can be either an 'application' or a 'library' chart. -# -# Application charts are a collection of templates that can be packaged into versioned archives -# to be deployed. -# -# Library charts provide useful utilities or functions for the chart developer. They're included as -# a dependency of application charts to inject those utilities and functions into the rendering -# pipeline. Library charts do not define any templates and therefore cannot be deployed. -type: application - -# This is the chart version. This version number should be incremented each time you make changes -# to the chart and its templates, including the app version. -# Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 1.0 - -# This is the version number of the application being deployed. This version number should be -# incremented each time you make changes to the application. Versions are not expected to -# follow Semantic Versioning. They should reflect the version the application is using. -# It is recommended to use it with quotes. -appVersion: "1.16.0" diff --git a/VisualQnA/benchmark/performance/helm_charts/README.md b/VisualQnA/benchmark/performance/helm_charts/README.md deleted file mode 100644 index 3b19eb373..000000000 --- a/VisualQnA/benchmark/performance/helm_charts/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# Benchmarking Deployment - -This document guides you through deploying this example pipeline using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources. - -## Getting Started - -### Preparation - -```bash -# on k8s-master node -cd GenAIExamples/{example_name}/benchmark/performance/helm_charts - -# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token: -# vim values.yaml -HUGGINGFACEHUB_API_TOKEN: hf_xxxxx -``` - -### Deployment - -```bash -# Deploy the pipeline -helm install {example_name} . -``` - -Note: Currently we only support the HPU version, because only HPU values.yaml is provided here. diff --git a/VisualQnA/benchmark/performance/helm_charts/customize.yaml b/VisualQnA/benchmark/performance/helm_charts/customize.yaml deleted file mode 100644 index 01388e66a..000000000 --- a/VisualQnA/benchmark/performance/helm_charts/customize.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -podSpecs: - - name: faq-mega-server-deploy - replicas: 2 - resources: - limits: - cpu: "8" - memory: "8000Mi" - requests: - cpu: "8" - memory: "8000Mi" - - - - name: faq-tgi-deploy - replicas: 7 - resources: - limits: - habana.ai/gaudi: 1 - - - name: faq-micro-deploy - replicas: 1 diff --git a/VisualQnA/benchmark/performance/helm_charts/templates/configmap.yaml b/VisualQnA/benchmark/performance/helm_charts/templates/configmap.yaml deleted file mode 100644 index 824d9a4fc..000000000 --- a/VisualQnA/benchmark/performance/helm_charts/templates/configmap.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ .Values.config.CONFIG_MAP_NAME }} - namespace: default -data: - HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }} - LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }} - NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }} - - CONFIG_MAP_NAME: visualqna-config - NODE_SELECTOR: opea - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - - LVM_ENDPOINT: "http://visualqna-tgi" - MODEL_ID: "llava-hf/llava-v1.6-mistral-7b-hf" - LVM_SERVICE_HOST_IP: visualqna-lvm-uservice - PORT: "8399" - MAX_INPUT_TOKENS: "4096" - MAX_TOTAL_TOKENS: "8192" ---- diff --git a/VisualQnA/benchmark/performance/helm_charts/templates/deployment.yaml b/VisualQnA/benchmark/performance/helm_charts/templates/deployment.yaml deleted file mode 100644 index 4fe4fc581..000000000 --- a/VisualQnA/benchmark/performance/helm_charts/templates/deployment.yaml +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -{{- $global := .Values }} -{{- range $microservice := .Values.microservices }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ $microservice.name }} - namespace: default -spec: - {{- $replicas := $microservice.replicas }} - {{- range $podSpec := $global.podSpecs }} - {{- if eq $podSpec.name $microservice.name }} - {{- $replicas = $podSpec.replicas | default $microservice.replicas }} - {{- end }} - {{- end }} - replicas: {{ $replicas }} - - selector: - matchLabels: - app: {{ $microservice.name }} - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: {{ $microservice.name }} - spec: - containers: - - envFrom: - - configMapRef: - name: {{ $global.config.CONFIG_MAP_NAME }} - {{- if $microservice.args }} - args: - {{- range $arg := $microservice.args }} - {{- if $arg.name }} - - {{ $arg.name }} - {{- end }} - {{- if $arg.value }} - - "{{ $arg.value }}" - {{- end }} - {{- end }} - {{- end }} - - {{- if $microservice.env }} - env: - {{- range $env := $microservice.env }} - - name: {{ $env.name }} - value: "{{ $env.value }}" - {{- end }} - {{- end }} - - {{- $image := $microservice.image }} - {{- range $podSpec := $global.podSpecs }} - {{- if eq $podSpec.name $microservice.name }} - {{- $image = $podSpec.image | default $microservice.image }} - {{- end }} - {{- end }} - image: {{ $image }} - - imagePullPolicy: IfNotPresent - name: {{ $microservice.name }} - - {{- if $microservice.ports }} - ports: - {{- range $port := $microservice.ports }} - {{- range $port_name, $port_id := $port }} - - {{ $port_name }}: {{ $port_id }} - {{- end }} - {{- end }} - {{- end }} - - {{- $resources := $microservice.resources }} - {{- range $podSpec := $global.podSpecs }} - {{- if eq $podSpec.name $microservice.name }} - {{- if $podSpec.resources }} - {{- $resources = $podSpec.resources }} - {{- end }} - {{- end }} - {{- end }} - - {{- if $resources }} - resources: - {{- range $resourceType, $resource := $resources }} - {{ $resourceType }}: - {{- range $limitType, $limit := $resource }} - {{ $limitType }}: {{ $limit }} - {{- end }} - {{- end }} - {{- end }} - - {{- if $microservice.volumeMounts }} - volumeMounts: - {{- range $volumeMount := $microservice.volumeMounts }} - - mountPath: {{ $volumeMount.mountPath }} - name: {{ $volumeMount.name }} - {{- end }} - {{- end }} - - hostIPC: true - nodeSelector: - node-type: {{ $global.config.NODE_SELECTOR }} - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: {{ $microservice.name }} - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - - - {{- if $microservice.volumes }} - volumes: - {{- range $index, $volume := $microservice.volumes }} - - name: {{ $volume.name }} - {{- if $volume.hostPath }} - hostPath: - path: {{ $volume.hostPath.path }} - type: {{ $volume.hostPath.type }} - {{- else if $volume.emptyDir }} - emptyDir: - medium: {{ $volume.emptyDir.medium }} - sizeLimit: {{ $volume.emptyDir.sizeLimit }} - {{- end }} - {{- end }} - {{- end }} - ---- -{{- end }} diff --git a/VisualQnA/benchmark/performance/helm_charts/templates/service.yaml b/VisualQnA/benchmark/performance/helm_charts/templates/service.yaml deleted file mode 100644 index 5a5896921..000000000 --- a/VisualQnA/benchmark/performance/helm_charts/templates/service.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -{{- range $service := .Values.services }} -apiVersion: v1 -kind: Service -metadata: - name: {{ $service.name }} - namespace: default -spec: - ports: - {{- range $port := $service.spec.ports }} - - name: {{ $port.name }} - {{- range $port_name, $port_id := $port }} - {{- if ne $port_name "name"}} - {{ $port_name }}: {{ $port_id }} - {{- end }} - {{- end }} - {{- end }} - selector: - app: {{ $service.spec.selector.app }} - type: {{ $service.spec.type }} ---- -{{- end }} diff --git a/VisualQnA/benchmark/performance/helm_charts/values.yaml b/VisualQnA/benchmark/performance/helm_charts/values.yaml deleted file mode 100644 index 7357a303a..000000000 --- a/VisualQnA/benchmark/performance/helm_charts/values.yaml +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -namespace: default - -config: - LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct - CONFIG_MAP_NAME: visualqna-config - NODE_SELECTOR: opea - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - - LVM_ENDPOINT: "http://visualqna-tgi" - MODEL_ID: "llava-hf/llava-v1.6-mistral-7b-hf" - LVM_SERVICE_HOST_IP: visualqna-lvm-uservice - PORT: "8399" - MAX_INPUT_TOKENS: "4096" - MAX_TOTAL_TOKENS: "8192" - -microservices: - - name: visualqna-lvm-uservice - image: opea/lvm-tgi:latest - replicas: 1 - ports: - - containerPort: 9399 - - - name: visualqna - image: opea/visualqna:latest - replicas: 1 - ports: - - containerPort: 8399 - - - name: visualqna-tgi - image: ghcr.io/huggingface/tgi-gaudi:2.0.5 - replicas: 1 - ports: - - containerPort: 8399 - resources: - limits: - habana.ai/gaudi: 1 - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm - -services: - - name: visualqna-lvm-uservice - spec: - ports: - - name: service - port: 9399 - targetPort: 9399 - selector: - app: visualqna-lvm-uservice - type: ClusterIP - - - name: visualqna-tgi-service - spec: - ports: - - name: service - port: 80 - targetPort: 8399 - selector: - app: visualqna-tgi - type: ClusterIP - - - name: visualqna-service - spec: - ports: - - name: service - port: 8888 - targetPort: 8888 - selector: - app: visualqna - type: ClusterIP