refactored AudioQNA
This commit is contained in:
@@ -1,50 +1,23 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
|
||||
podSpecs:
|
||||
- name: audioqna-backend-server-deploy
|
||||
spec:
|
||||
image_name: opea/audioqna
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
replicas: 1
|
||||
|
||||
- name: asr-deploy
|
||||
spec:
|
||||
image_name: opea/asr
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
replicas: 1
|
||||
|
||||
- name: whisper-deploy
|
||||
spec:
|
||||
image_name: opea/whisper-gaudi
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
replicas: 1
|
||||
|
||||
|
||||
- name: tts-deploy
|
||||
spec:
|
||||
image_name: opea/tts
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
replicas: 1
|
||||
|
||||
- name: speecht5-deploy
|
||||
spec:
|
||||
image_name: opea/speecht5-gaudi
|
||||
image_tag: latest
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
replicas: 1
|
||||
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
spec:
|
||||
image_name: ghcr.io/huggingface/tgi-gaudi
|
||||
image_tag: 2.0.5
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
replicas: 1
|
||||
@@ -7,7 +7,7 @@ metadata:
|
||||
name: {{ .Values.config.CONFIG_MAP_NAME }}
|
||||
namespace: default
|
||||
data:
|
||||
HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
|
||||
HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }}
|
||||
LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
|
||||
NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
|
||||
TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
|
||||
|
||||
@@ -2,33 +2,38 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
{{- $global := .Values }}
|
||||
{{- range $deployment := .Values.deployments }}
|
||||
{{- range $podSpec := $global.podSpecs }}
|
||||
{{- if eq $podSpec.name $deployment.name }}
|
||||
{{- range $microservice := .Values.microservices }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ $deployment.name }}
|
||||
name: {{ $microservice.name }}
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: {{ $podSpec.spec.replicas }}
|
||||
{{- $replicas := $microservice.replicas }}
|
||||
{{- range $podSpec := $global.podSpecs }}
|
||||
{{- if eq $podSpec.name $microservice.name }}
|
||||
{{- $replicas = $podSpec.replicas | default $microservice.replicas }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
replicas: {{ $replicas }}
|
||||
|
||||
selector:
|
||||
matchLabels:
|
||||
app: {{ $deployment.name }}
|
||||
app: {{ $microservice.name }}
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
|
||||
labels:
|
||||
app: {{ $deployment.name }}
|
||||
app: {{ $microservice.name }}
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: {{ $global.config.CONFIG_MAP_NAME }}
|
||||
{{- if $deployment.spec.args }}
|
||||
{{- if $microservice.args }}
|
||||
args:
|
||||
{{- range $arg := $deployment.spec.args }}
|
||||
{{- range $arg := $microservice.args }}
|
||||
{{- if $arg.name }}
|
||||
- {{ $arg.name }}
|
||||
{{- end }}
|
||||
@@ -38,31 +43,39 @@ spec:
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $deployment.spec.env }}
|
||||
{{- if $microservice.env }}
|
||||
env:
|
||||
{{- range $env := $deployment.spec.env }}
|
||||
{{- range $env := $microservice.env }}
|
||||
- name: {{ $env.name }}
|
||||
value: "{{ $env.value }}"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }}
|
||||
image: {{ $microservice.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: {{ $podSpec.name }}
|
||||
name: {{ $microservice.name }}
|
||||
|
||||
{{- if $deployment.spec.ports }}
|
||||
{{- if $microservice.ports }}
|
||||
ports:
|
||||
{{- range $port := $deployment.spec.ports }}
|
||||
{{- range $port := $microservice.ports }}
|
||||
{{- range $port_name, $port_id := $port }}
|
||||
- {{ $port_name }}: {{ $port_id }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- $resources := $microservice.resources }}
|
||||
{{- range $podSpec := $global.podSpecs }}
|
||||
{{- if eq $podSpec.name $microservice.name }}
|
||||
{{- if $podSpec.resources }}
|
||||
{{- $resources = $podSpec.resources }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $podSpec.spec.resources }}
|
||||
{{- if $resources }}
|
||||
resources:
|
||||
{{- range $resourceType, $resource := $podSpec.spec.resources }}
|
||||
{{- range $resourceType, $resource := $resources }}
|
||||
{{ $resourceType }}:
|
||||
{{- range $limitType, $limit := $resource }}
|
||||
{{ $limitType }}: {{ $limit }}
|
||||
@@ -70,9 +83,9 @@ spec:
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if $deployment.spec.volumeMounts }}
|
||||
{{- if $microservice.volumeMounts }}
|
||||
volumeMounts:
|
||||
{{- range $volumeMount := $deployment.spec.volumeMounts }}
|
||||
{{- range $volumeMount := $microservice.volumeMounts }}
|
||||
- mountPath: {{ $volumeMount.mountPath }}
|
||||
name: {{ $volumeMount.name }}
|
||||
{{- end }}
|
||||
@@ -85,15 +98,15 @@ spec:
|
||||
topologySpreadConstraints:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: {{ $deployment.name }}
|
||||
app: {{ $microservice.name }}
|
||||
maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
|
||||
|
||||
{{- if $deployment.spec.volumes }}
|
||||
{{- if $microservice.volumes }}
|
||||
volumes:
|
||||
{{- range $index, $volume := $deployment.spec.volumes }}
|
||||
{{- range $index, $volume := $microservice.volumes }}
|
||||
- name: {{ $volume.name }}
|
||||
{{- if $volume.hostPath }}
|
||||
hostPath:
|
||||
@@ -109,5 +122,3 @@ spec:
|
||||
|
||||
---
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
@@ -6,7 +6,7 @@ namespace: default
|
||||
config:
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
CONFIG_MAP_NAME: audio-qna-config
|
||||
NODE_SELECTOR: audioqna-opea
|
||||
NODE_SELECTOR: opea
|
||||
ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
|
||||
TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
|
||||
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
|
||||
@@ -17,101 +17,115 @@ config:
|
||||
LLM_SERVICE_PORT: "3007"
|
||||
TTS_SERVICE_HOST_IP: tts-svc
|
||||
TTS_SERVICE_PORT: "3002"
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
|
||||
deployments:
|
||||
microservices:
|
||||
- name: audioqna-backend-server-deploy
|
||||
spec:
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
image: opea/audioqna:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
|
||||
- name: asr-deploy
|
||||
spec:
|
||||
ports:
|
||||
- containerPort: 9099
|
||||
image: opea/asr:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 9099
|
||||
|
||||
- name: whisper-deploy
|
||||
spec:
|
||||
ports:
|
||||
- containerPort: 7066
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
image: opea/whisper-gaudi:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 7066
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
|
||||
- name: tts-deploy
|
||||
spec:
|
||||
ports:
|
||||
- containerPort: 9088
|
||||
|
||||
- name: llm-deploy
|
||||
spec:
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
image: opea/tts:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 9088
|
||||
|
||||
- name: speecht5-deploy
|
||||
spec:
|
||||
ports:
|
||||
- containerPort: 7055
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
image: opea/speecht5-gaudi:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 7055
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
|
||||
- name: llm-deploy
|
||||
image: opea/llm-tgi:latest
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
|
||||
- name: llm-dependency-deploy
|
||||
spec:
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(LLM_MODEL_ID)
|
||||
- name: "--max-input-length"
|
||||
value: "2048"
|
||||
- name: "--max-total-tokens"
|
||||
value: "4096"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
replicas: 1
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
args:
|
||||
- name: "--model-id"
|
||||
value: $(LLM_MODEL_ID)
|
||||
- name: "--max-input-length"
|
||||
value: "2048"
|
||||
- name: "--max-total-tokens"
|
||||
value: "4096"
|
||||
env:
|
||||
- name: OMPI_MCA_btl_vader_single_copy_mechanism
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: "true"
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /dev/shm
|
||||
name: shm
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
name: model-volume
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 1Gi
|
||||
name: shm
|
||||
|
||||
services:
|
||||
- name: asr-svc
|
||||
|
||||
@@ -5,7 +5,7 @@ namespace: default
|
||||
|
||||
config:
|
||||
CONFIG_MAP_NAME: chatqna-config
|
||||
NODE_SELECTOR: chatqna-opea
|
||||
NODE_SELECTOR: opea
|
||||
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
|
||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||
RERANK_MODEL_ID: BAAI/bge-reranker-base
|
||||
|
||||
@@ -6,7 +6,7 @@ namespace: default
|
||||
config:
|
||||
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
CONFIG_MAP_NAME: faq-config
|
||||
NODE_SELECTOR: faq-opea
|
||||
NODE_SELECTOR: opea
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
|
||||
|
||||
microservices:
|
||||
|
||||
Reference in New Issue
Block a user