refactored AudioQNA

This commit is contained in:
Zhenzhong Xu
2024-10-21 11:06:37 +03:00
parent fdb8a33a6e
commit 048b4e1df9
6 changed files with 144 additions and 146 deletions

View File

@@ -1,50 +1,23 @@
# Copyright (C) 2024 Intel Corporation # Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
podSpecs: podSpecs:
- name: audioqna-backend-server-deploy - name: audioqna-backend-server-deploy
spec:
image_name: opea/audioqna
image_tag: latest
replicas: 1 replicas: 1
- name: asr-deploy - name: asr-deploy
spec:
image_name: opea/asr
image_tag: latest
replicas: 1 replicas: 1
- name: whisper-deploy - name: whisper-deploy
spec:
image_name: opea/whisper-gaudi
image_tag: latest
replicas: 1 replicas: 1
resources:
limits:
habana.ai/gaudi: 1
- name: tts-deploy - name: tts-deploy
spec:
image_name: opea/tts
image_tag: latest
replicas: 1 replicas: 1
- name: speecht5-deploy - name: speecht5-deploy
spec:
image_name: opea/speecht5-gaudi
image_tag: latest
replicas: 1 replicas: 1
resources:
limits:
habana.ai/gaudi: 1
- name: llm-dependency-deploy - name: llm-dependency-deploy
spec:
image_name: ghcr.io/huggingface/tgi-gaudi
image_tag: 2.0.5
replicas: 1 replicas: 1
resources:
limits:
habana.ai/gaudi: 1

View File

@@ -7,7 +7,7 @@ metadata:
name: {{ .Values.config.CONFIG_MAP_NAME }} name: {{ .Values.config.CONFIG_MAP_NAME }}
namespace: default namespace: default
data: data:
HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }} HUGGINGFACEHUB_API_TOKEN: {{ .Values.config.HUGGINGFACEHUB_API_TOKEN }}
LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }} LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }} NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010 TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010

View File

@@ -2,33 +2,38 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
{{- $global := .Values }} {{- $global := .Values }}
{{- range $deployment := .Values.deployments }} {{- range $microservice := .Values.microservices }}
{{- range $podSpec := $global.podSpecs }}
{{- if eq $podSpec.name $deployment.name }}
apiVersion: apps/v1 apiVersion: apps/v1
kind: Deployment kind: Deployment
metadata: metadata:
name: {{ $deployment.name }} name: {{ $microservice.name }}
namespace: default namespace: default
spec: spec:
replicas: {{ $podSpec.spec.replicas }} {{- $replicas := $microservice.replicas }}
{{- range $podSpec := $global.podSpecs }}
{{- if eq $podSpec.name $microservice.name }}
{{- $replicas = $podSpec.replicas | default $microservice.replicas }}
{{- end }}
{{- end }}
replicas: {{ $replicas }}
selector: selector:
matchLabels: matchLabels:
app: {{ $deployment.name }} app: {{ $microservice.name }}
template: template:
metadata: metadata:
annotations: annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true' sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels: labels:
app: {{ $deployment.name }} app: {{ $microservice.name }}
spec: spec:
containers: containers:
- envFrom: - envFrom:
- configMapRef: - configMapRef:
name: {{ $global.config.CONFIG_MAP_NAME }} name: {{ $global.config.CONFIG_MAP_NAME }}
{{- if $deployment.spec.args }} {{- if $microservice.args }}
args: args:
{{- range $arg := $deployment.spec.args }} {{- range $arg := $microservice.args }}
{{- if $arg.name }} {{- if $arg.name }}
- {{ $arg.name }} - {{ $arg.name }}
{{- end }} {{- end }}
@@ -38,31 +43,39 @@ spec:
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- if $deployment.spec.env }} {{- if $microservice.env }}
env: env:
{{- range $env := $deployment.spec.env }} {{- range $env := $microservice.env }}
- name: {{ $env.name }} - name: {{ $env.name }}
value: "{{ $env.value }}" value: "{{ $env.value }}"
{{- end }} {{- end }}
{{- end }} {{- end }}
image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }} image: {{ $microservice.image }}
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
name: {{ $podSpec.name }} name: {{ $microservice.name }}
{{- if $deployment.spec.ports }} {{- if $microservice.ports }}
ports: ports:
{{- range $port := $deployment.spec.ports }} {{- range $port := $microservice.ports }}
{{- range $port_name, $port_id := $port }} {{- range $port_name, $port_id := $port }}
- {{ $port_name }}: {{ $port_id }} - {{ $port_name }}: {{ $port_id }}
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- $resources := $microservice.resources }}
{{- range $podSpec := $global.podSpecs }}
{{- if eq $podSpec.name $microservice.name }}
{{- if $podSpec.resources }}
{{- $resources = $podSpec.resources }}
{{- end }}
{{- end }}
{{- end }}
{{- if $podSpec.spec.resources }} {{- if $resources }}
resources: resources:
{{- range $resourceType, $resource := $podSpec.spec.resources }} {{- range $resourceType, $resource := $resources }}
{{ $resourceType }}: {{ $resourceType }}:
{{- range $limitType, $limit := $resource }} {{- range $limitType, $limit := $resource }}
{{ $limitType }}: {{ $limit }} {{ $limitType }}: {{ $limit }}
@@ -70,9 +83,9 @@ spec:
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- if $deployment.spec.volumeMounts }} {{- if $microservice.volumeMounts }}
volumeMounts: volumeMounts:
{{- range $volumeMount := $deployment.spec.volumeMounts }} {{- range $volumeMount := $microservice.volumeMounts }}
- mountPath: {{ $volumeMount.mountPath }} - mountPath: {{ $volumeMount.mountPath }}
name: {{ $volumeMount.name }} name: {{ $volumeMount.name }}
{{- end }} {{- end }}
@@ -85,15 +98,15 @@ spec:
topologySpreadConstraints: topologySpreadConstraints:
- labelSelector: - labelSelector:
matchLabels: matchLabels:
app: {{ $deployment.name }} app: {{ $microservice.name }}
maxSkew: 1 maxSkew: 1
topologyKey: kubernetes.io/hostname topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway whenUnsatisfiable: ScheduleAnyway
{{- if $deployment.spec.volumes }} {{- if $microservice.volumes }}
volumes: volumes:
{{- range $index, $volume := $deployment.spec.volumes }} {{- range $index, $volume := $microservice.volumes }}
- name: {{ $volume.name }} - name: {{ $volume.name }}
{{- if $volume.hostPath }} {{- if $volume.hostPath }}
hostPath: hostPath:
@@ -109,5 +122,3 @@ spec:
--- ---
{{- end }} {{- end }}
{{- end }}
{{- end }}

View File

@@ -6,7 +6,7 @@ namespace: default
config: config:
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
CONFIG_MAP_NAME: audio-qna-config CONFIG_MAP_NAME: audio-qna-config
NODE_SELECTOR: audioqna-opea NODE_SELECTOR: opea
ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066 ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055 TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
@@ -17,20 +17,24 @@ config:
LLM_SERVICE_PORT: "3007" LLM_SERVICE_PORT: "3007"
TTS_SERVICE_HOST_IP: tts-svc TTS_SERVICE_HOST_IP: tts-svc
TTS_SERVICE_PORT: "3002" TTS_SERVICE_PORT: "3002"
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
deployments: microservices:
- name: audioqna-backend-server-deploy - name: audioqna-backend-server-deploy
spec: image: opea/audioqna:latest
replicas: 1
ports: ports:
- containerPort: 8888 - containerPort: 8888
- name: asr-deploy - name: asr-deploy
spec: image: opea/asr:latest
replicas: 1
ports: ports:
- containerPort: 9099 - containerPort: 9099
- name: whisper-deploy - name: whisper-deploy
spec: image: opea/whisper-gaudi:latest
replicas: 1
ports: ports:
- containerPort: 7066 - containerPort: 7066
env: env:
@@ -42,19 +46,19 @@ deployments:
value: habana value: habana
- name: HABANA_VISIBLE_DEVICES - name: HABANA_VISIBLE_DEVICES
value: all value: all
resources:
limits:
habana.ai/gaudi: 1
- name: tts-deploy - name: tts-deploy
spec: image: opea/tts:latest
replicas: 1
ports: ports:
- containerPort: 9088 - containerPort: 9088
- name: llm-deploy
spec:
ports:
- containerPort: 9000
- name: speecht5-deploy - name: speecht5-deploy
spec: image: opea/speecht5-gaudi:latest
replicas: 1
ports: ports:
- containerPort: 7055 - containerPort: 7055
env: env:
@@ -66,9 +70,19 @@ deployments:
value: habana value: habana
- name: HABANA_VISIBLE_DEVICES - name: HABANA_VISIBLE_DEVICES
value: all value: all
resources:
limits:
habana.ai/gaudi: 1
- name: llm-deploy
image: opea/llm-tgi:latest
replicas: 1
ports:
- containerPort: 9000
- name: llm-dependency-deploy - name: llm-dependency-deploy
spec: image: ghcr.io/huggingface/tgi-gaudi:2.0.5
replicas: 1
ports: ports:
- containerPort: 80 - containerPort: 80
resources: resources:

View File

@@ -5,7 +5,7 @@ namespace: default
config: config:
CONFIG_MAP_NAME: chatqna-config CONFIG_MAP_NAME: chatqna-config
NODE_SELECTOR: chatqna-opea NODE_SELECTOR: opea
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
RERANK_MODEL_ID: BAAI/bge-reranker-base RERANK_MODEL_ID: BAAI/bge-reranker-base

View File

@@ -6,7 +6,7 @@ namespace: default
config: config:
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
CONFIG_MAP_NAME: faq-config CONFIG_MAP_NAME: faq-config
NODE_SELECTOR: faq-opea NODE_SELECTOR: opea
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
microservices: microservices: