refactored AudioQNA

This commit is contained in:
Zhenzhong Xu
2024-10-21 11:06:37 +03:00
parent fdb8a33a6e
commit 048b4e1df9
6 changed files with 144 additions and 146 deletions

View File

@@ -1,50 +1,23 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
podSpecs:
- name: audioqna-backend-server-deploy
spec:
image_name: opea/audioqna
image_tag: latest
replicas: 1
replicas: 1
- name: asr-deploy
spec:
image_name: opea/asr
image_tag: latest
replicas: 1
replicas: 1
- name: whisper-deploy
spec:
image_name: opea/whisper-gaudi
image_tag: latest
replicas: 1
resources:
limits:
habana.ai/gaudi: 1
replicas: 1
- name: tts-deploy
spec:
image_name: opea/tts
image_tag: latest
replicas: 1
replicas: 1
- name: speecht5-deploy
spec:
image_name: opea/speecht5-gaudi
image_tag: latest
replicas: 1
resources:
limits:
habana.ai/gaudi: 1
replicas: 1
- name: llm-dependency-deploy
spec:
image_name: ghcr.io/huggingface/tgi-gaudi
image_tag: 2.0.5
replicas: 1
resources:
limits:
habana.ai/gaudi: 1
replicas: 1

View File

@@ -7,7 +7,7 @@ metadata:
name: {{ .Values.config.CONFIG_MAP_NAME }}
namespace: default
data:
HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }}
LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010

View File

@@ -2,33 +2,38 @@
# SPDX-License-Identifier: Apache-2.0
{{- $global := .Values }}
{{- range $deployment := .Values.deployments }}
{{- range $podSpec := $global.podSpecs }}
{{- if eq $podSpec.name $deployment.name }}
{{- range $microservice := .Values.microservices }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ $deployment.name }}
name: {{ $microservice.name }}
namespace: default
spec:
replicas: {{ $podSpec.spec.replicas }}
{{- $replicas := $microservice.replicas }}
{{- range $podSpec := $global.podSpecs }}
{{- if eq $podSpec.name $microservice.name }}
{{- $replicas = $podSpec.replicas | default $microservice.replicas }}
{{- end }}
{{- end }}
replicas: {{ $replicas }}
selector:
matchLabels:
app: {{ $deployment.name }}
app: {{ $microservice.name }}
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: {{ $deployment.name }}
app: {{ $microservice.name }}
spec:
containers:
- envFrom:
- configMapRef:
name: {{ $global.config.CONFIG_MAP_NAME }}
{{- if $deployment.spec.args }}
{{- if $microservice.args }}
args:
{{- range $arg := $deployment.spec.args }}
{{- range $arg := $microservice.args }}
{{- if $arg.name }}
- {{ $arg.name }}
{{- end }}
@@ -38,31 +43,39 @@ spec:
{{- end }}
{{- end }}
{{- if $deployment.spec.env }}
{{- if $microservice.env }}
env:
{{- range $env := $deployment.spec.env }}
{{- range $env := $microservice.env }}
- name: {{ $env.name }}
value: "{{ $env.value }}"
{{- end }}
{{- end }}
image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }}
image: {{ $microservice.image }}
imagePullPolicy: IfNotPresent
name: {{ $podSpec.name }}
name: {{ $microservice.name }}
{{- if $deployment.spec.ports }}
{{- if $microservice.ports }}
ports:
{{- range $port := $deployment.spec.ports }}
{{- range $port := $microservice.ports }}
{{- range $port_name, $port_id := $port }}
- {{ $port_name }}: {{ $port_id }}
{{- end }}
{{- end }}
{{- end }}
{{- $resources := $microservice.resources }}
{{- range $podSpec := $global.podSpecs }}
{{- if eq $podSpec.name $microservice.name }}
{{- if $podSpec.resources }}
{{- $resources = $podSpec.resources }}
{{- end }}
{{- end }}
{{- end }}
{{- if $podSpec.spec.resources }}
{{- if $resources }}
resources:
{{- range $resourceType, $resource := $podSpec.spec.resources }}
{{- range $resourceType, $resource := $resources }}
{{ $resourceType }}:
{{- range $limitType, $limit := $resource }}
{{ $limitType }}: {{ $limit }}
@@ -70,9 +83,9 @@ spec:
{{- end }}
{{- end }}
{{- if $deployment.spec.volumeMounts }}
{{- if $microservice.volumeMounts }}
volumeMounts:
{{- range $volumeMount := $deployment.spec.volumeMounts }}
{{- range $volumeMount := $microservice.volumeMounts }}
- mountPath: {{ $volumeMount.mountPath }}
name: {{ $volumeMount.name }}
{{- end }}
@@ -85,15 +98,15 @@ spec:
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: {{ $deployment.name }}
app: {{ $microservice.name }}
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
{{- if $deployment.spec.volumes }}
{{- if $microservice.volumes }}
volumes:
{{- range $index, $volume := $deployment.spec.volumes }}
{{- range $index, $volume := $microservice.volumes }}
- name: {{ $volume.name }}
{{- if $volume.hostPath }}
hostPath:
@@ -109,5 +122,3 @@ spec:
---
{{- end }}
{{- end }}
{{- end }}

View File

@@ -6,7 +6,7 @@ namespace: default
config:
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
CONFIG_MAP_NAME: audio-qna-config
NODE_SELECTOR: audioqna-opea
NODE_SELECTOR: opea
ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
@@ -17,101 +17,115 @@ config:
LLM_SERVICE_PORT: "3007"
TTS_SERVICE_HOST_IP: tts-svc
TTS_SERVICE_PORT: "3002"
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
deployments:
microservices:
- name: audioqna-backend-server-deploy
spec:
ports:
- containerPort: 8888
image: opea/audioqna:latest
replicas: 1
ports:
- containerPort: 8888
- name: asr-deploy
spec:
ports:
- containerPort: 9099
image: opea/asr:latest
replicas: 1
ports:
- containerPort: 9099
- name: whisper-deploy
spec:
ports:
- containerPort: 7066
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
image: opea/whisper-gaudi:latest
replicas: 1
ports:
- containerPort: 7066
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
resources:
limits:
habana.ai/gaudi: 1
- name: tts-deploy
spec:
ports:
- containerPort: 9088
- name: llm-deploy
spec:
ports:
- containerPort: 9000
image: opea/tts:latest
replicas: 1
ports:
- containerPort: 9088
- name: speecht5-deploy
spec:
ports:
- containerPort: 7055
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
image: opea/speecht5-gaudi:latest
replicas: 1
ports:
- containerPort: 7055
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: 'true'
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
resources:
limits:
habana.ai/gaudi: 1
- name: llm-deploy
image: opea/llm-tgi:latest
replicas: 1
ports:
- containerPort: 9000
- name: llm-dependency-deploy
spec:
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
value: $(LLM_MODEL_ID)
- name: "--max-input-length"
value: "2048"
- name: "--max-total-tokens"
value: "4096"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
replicas: 1
ports:
- containerPort: 80
resources:
limits:
habana.ai/gaudi: 1
args:
- name: "--model-id"
value: $(LLM_MODEL_ID)
- name: "--max-input-length"
value: "2048"
- name: "--max-total-tokens"
value: "4096"
env:
- name: OMPI_MCA_btl_vader_single_copy_mechanism
value: none
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
value: "true"
- name: runtime
value: habana
- name: HABANA_VISIBLE_DEVICES
value: all
- name: ENABLE_HPU_GRAPH
value: 'true'
- name: LIMIT_HPU_GRAPH
value: 'true'
- name: USE_FLASH_ATTENTION
value: 'true'
- name: FLASH_ATTENTION_RECOMPUTE
value: 'true'
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
volumes:
- hostPath:
path: /mnt/models
type: Directory
name: model-volume
- emptyDir:
medium: Memory
sizeLimit: 1Gi
name: shm
services:
- name: asr-svc

View File

@@ -5,7 +5,7 @@ namespace: default
config:
CONFIG_MAP_NAME: chatqna-config
NODE_SELECTOR: chatqna-opea
NODE_SELECTOR: opea
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
RERANK_MODEL_ID: BAAI/bge-reranker-base

View File

@@ -6,7 +6,7 @@ namespace: default
config:
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
CONFIG_MAP_NAME: faq-config
NODE_SELECTOR: faq-opea
NODE_SELECTOR: opea
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
microservices: