From 048b4e1df9adb002aa789aff525a00f66b88f8ae Mon Sep 17 00:00:00 2001 From: Zhenzhong Xu Date: Mon, 21 Oct 2024 11:06:37 +0300 Subject: [PATCH] refactored AudioQNA --- AudioQnA/benchmark/helm_charts/customize.yaml | 43 +---- .../helm_charts/templates/configmap.yaml | 2 +- .../helm_charts/templates/deployment.yaml | 59 +++--- AudioQnA/benchmark/helm_charts/values.yaml | 182 ++++++++++-------- .../performance/helm_charts/values.yaml | 2 +- .../performance/helm_charts/values.yaml | 2 +- 6 files changed, 144 insertions(+), 146 deletions(-) diff --git a/AudioQnA/benchmark/helm_charts/customize.yaml b/AudioQnA/benchmark/helm_charts/customize.yaml index 31e1b6ca6..87fe3b280 100644 --- a/AudioQnA/benchmark/helm_charts/customize.yaml +++ b/AudioQnA/benchmark/helm_charts/customize.yaml @@ -1,50 +1,23 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - podSpecs: - name: audioqna-backend-server-deploy - spec: - image_name: opea/audioqna - image_tag: latest - replicas: 1 + replicas: 1 - name: asr-deploy - spec: - image_name: opea/asr - image_tag: latest - replicas: 1 + replicas: 1 - name: whisper-deploy - spec: - image_name: opea/whisper-gaudi - image_tag: latest - replicas: 1 - resources: - limits: - habana.ai/gaudi: 1 + replicas: 1 + - name: tts-deploy - spec: - image_name: opea/tts - image_tag: latest - replicas: 1 + replicas: 1 - name: speecht5-deploy - spec: - image_name: opea/speecht5-gaudi - image_tag: latest - replicas: 1 - resources: - limits: - habana.ai/gaudi: 1 + replicas: 1 + - name: llm-dependency-deploy - spec: - image_name: ghcr.io/huggingface/tgi-gaudi - image_tag: 2.0.5 - replicas: 1 - resources: - limits: - habana.ai/gaudi: 1 + replicas: 1 \ No newline at end of file diff --git a/AudioQnA/benchmark/helm_charts/templates/configmap.yaml b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml index 79246763f..7aa21eb20 100644 --- a/AudioQnA/benchmark/helm_charts/templates/configmap.yaml +++ b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml @@ -7,7 +7,7 @@ metadata: name: {{ .Values.config.CONFIG_MAP_NAME }} namespace: default data: - HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }} + HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }} LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }} NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }} TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010 diff --git a/AudioQnA/benchmark/helm_charts/templates/deployment.yaml b/AudioQnA/benchmark/helm_charts/templates/deployment.yaml index 7a9fe0c54..920e54685 100644 --- a/AudioQnA/benchmark/helm_charts/templates/deployment.yaml +++ b/AudioQnA/benchmark/helm_charts/templates/deployment.yaml @@ -2,33 +2,38 @@ # SPDX-License-Identifier: Apache-2.0 {{- $global := .Values }} -{{- range $deployment := .Values.deployments }} -{{- range $podSpec := $global.podSpecs }} -{{- if eq $podSpec.name $deployment.name }} +{{- range $microservice := .Values.microservices }} apiVersion: apps/v1 kind: Deployment metadata: - name: {{ $deployment.name }} + name: {{ $microservice.name }} namespace: default spec: - replicas: {{ $podSpec.spec.replicas }} + {{- $replicas := $microservice.replicas }} + {{- range $podSpec := $global.podSpecs }} + {{- if eq $podSpec.name $microservice.name }} + {{- $replicas = $podSpec.replicas | default $microservice.replicas }} + {{- end }} + {{- end }} + replicas: {{ $replicas }} + selector: matchLabels: - app: {{ $deployment.name }} + app: {{ $microservice.name }} template: metadata: annotations: sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: - app: {{ $deployment.name }} + app: {{ $microservice.name }} spec: containers: - envFrom: - configMapRef: name: {{ $global.config.CONFIG_MAP_NAME }} - {{- if $deployment.spec.args }} + {{- if $microservice.args }} args: - {{- range $arg := $deployment.spec.args }} + {{- range $arg := $microservice.args }} {{- if $arg.name }} - {{ $arg.name }} {{- end }} @@ -38,31 +43,39 @@ spec: {{- end }} {{- end }} - {{- if $deployment.spec.env }} + {{- if $microservice.env }} env: - {{- range $env := $deployment.spec.env }} + {{- range $env := $microservice.env }} - name: {{ $env.name }} value: "{{ $env.value }}" {{- end }} {{- end }} - image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }} + image: {{ $microservice.image }} imagePullPolicy: IfNotPresent - name: {{ $podSpec.name }} + name: {{ $microservice.name }} - {{- if $deployment.spec.ports }} + {{- if $microservice.ports }} ports: - {{- range $port := $deployment.spec.ports }} + {{- range $port := $microservice.ports }} {{- range $port_name, $port_id := $port }} - {{ $port_name }}: {{ $port_id }} {{- end }} {{- end }} {{- end }} + {{- $resources := $microservice.resources }} + {{- range $podSpec := $global.podSpecs }} + {{- if eq $podSpec.name $microservice.name }} + {{- if $podSpec.resources }} + {{- $resources = $podSpec.resources }} + {{- end }} + {{- end }} + {{- end }} - {{- if $podSpec.spec.resources }} + {{- if $resources }} resources: - {{- range $resourceType, $resource := $podSpec.spec.resources }} + {{- range $resourceType, $resource := $resources }} {{ $resourceType }}: {{- range $limitType, $limit := $resource }} {{ $limitType }}: {{ $limit }} @@ -70,9 +83,9 @@ spec: {{- end }} {{- end }} - {{- if $deployment.spec.volumeMounts }} + {{- if $microservice.volumeMounts }} volumeMounts: - {{- range $volumeMount := $deployment.spec.volumeMounts }} + {{- range $volumeMount := $microservice.volumeMounts }} - mountPath: {{ $volumeMount.mountPath }} name: {{ $volumeMount.name }} {{- end }} @@ -85,15 +98,15 @@ spec: topologySpreadConstraints: - labelSelector: matchLabels: - app: {{ $deployment.name }} + app: {{ $microservice.name }} maxSkew: 1 topologyKey: kubernetes.io/hostname whenUnsatisfiable: ScheduleAnyway - {{- if $deployment.spec.volumes }} + {{- if $microservice.volumes }} volumes: - {{- range $index, $volume := $deployment.spec.volumes }} + {{- range $index, $volume := $microservice.volumes }} - name: {{ $volume.name }} {{- if $volume.hostPath }} hostPath: @@ -109,5 +122,3 @@ spec: --- {{- end }} -{{- end }} -{{- end }} diff --git a/AudioQnA/benchmark/helm_charts/values.yaml b/AudioQnA/benchmark/helm_charts/values.yaml index e2f03da95..28d414c96 100644 --- a/AudioQnA/benchmark/helm_charts/values.yaml +++ b/AudioQnA/benchmark/helm_charts/values.yaml @@ -6,7 +6,7 @@ namespace: default config: LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 CONFIG_MAP_NAME: audio-qna-config - NODE_SELECTOR: audioqna-opea + NODE_SELECTOR: opea ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066 TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006 @@ -17,101 +17,115 @@ config: LLM_SERVICE_PORT: "3007" TTS_SERVICE_HOST_IP: tts-svc TTS_SERVICE_PORT: "3002" + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} -deployments: +microservices: - name: audioqna-backend-server-deploy - spec: - ports: - - containerPort: 8888 + image: opea/audioqna:latest + replicas: 1 + ports: + - containerPort: 8888 - name: asr-deploy - spec: - ports: - - containerPort: 9099 + image: opea/asr:latest + replicas: 1 + ports: + - containerPort: 9099 - name: whisper-deploy - spec: - ports: - - containerPort: 7066 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all + image: opea/whisper-gaudi:latest + replicas: 1 + ports: + - containerPort: 7066 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + resources: + limits: + habana.ai/gaudi: 1 - name: tts-deploy - spec: - ports: - - containerPort: 9088 - - - name: llm-deploy - spec: - ports: - - containerPort: 9000 + image: opea/tts:latest + replicas: 1 + ports: + - containerPort: 9088 - name: speecht5-deploy - spec: - ports: - - containerPort: 7055 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all + image: opea/speecht5-gaudi:latest + replicas: 1 + ports: + - containerPort: 7055 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + resources: + limits: + habana.ai/gaudi: 1 + - name: llm-deploy + image: opea/llm-tgi:latest + replicas: 1 + ports: + - containerPort: 9000 + - name: llm-dependency-deploy - spec: - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - args: - - name: "--model-id" - value: $(LLM_MODEL_ID) - - name: "--max-input-length" - value: "2048" - - name: "--max-total-tokens" - value: "4096" - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: "true" - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: ENABLE_HPU_GRAPH - value: 'true' - - name: LIMIT_HPU_GRAPH - value: 'true' - - name: USE_FLASH_ATTENTION - value: 'true' - - name: FLASH_ATTENTION_RECOMPUTE - value: 'true' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm + image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + replicas: 1 + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + args: + - name: "--model-id" + value: $(LLM_MODEL_ID) + - name: "--max-input-length" + value: "2048" + - name: "--max-total-tokens" + value: "4096" + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: "true" + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: ENABLE_HPU_GRAPH + value: 'true' + - name: LIMIT_HPU_GRAPH + value: 'true' + - name: USE_FLASH_ATTENTION + value: 'true' + - name: FLASH_ATTENTION_RECOMPUTE + value: 'true' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm services: - name: asr-svc diff --git a/ChatQnA/benchmark/performance/helm_charts/values.yaml b/ChatQnA/benchmark/performance/helm_charts/values.yaml index ddb76b0bb..5cd34f578 100644 --- a/ChatQnA/benchmark/performance/helm_charts/values.yaml +++ b/ChatQnA/benchmark/performance/helm_charts/values.yaml @@ -5,7 +5,7 @@ namespace: default config: CONFIG_MAP_NAME: chatqna-config - NODE_SELECTOR: chatqna-opea + NODE_SELECTOR: opea EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 RERANK_MODEL_ID: BAAI/bge-reranker-base diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml index 6f12074b8..1548a7eab 100644 --- a/FaqGen/benchmark/performance/helm_charts/values.yaml +++ b/FaqGen/benchmark/performance/helm_charts/values.yaml @@ -6,7 +6,7 @@ namespace: default config: LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct CONFIG_MAP_NAME: faq-config - NODE_SELECTOR: faq-opea + NODE_SELECTOR: opea HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} microservices: