feat:add tts-streaming config and future (#5492)

This commit is contained in:
chenxu9741
2024-07-09 11:33:58 +08:00
committed by GitHub
parent b29a36f461
commit 6ef401a9f0
44 changed files with 1280 additions and 358 deletions

View File

@@ -11,11 +11,13 @@ import { usePathname } from 'next/navigation'
import { useTranslation } from 'react-i18next'
import { Listbox, Transition } from '@headlessui/react'
import { CheckIcon, ChevronDownIcon } from '@heroicons/react/20/solid'
import RadioGroup from '@/app/components/app/configuration/config-vision/radio-group'
import type { Item } from '@/app/components/base/select'
import ConfigContext from '@/context/debug-configuration'
import { fetchAppVoices } from '@/service/apps'
import Tooltip from '@/app/components/base/tooltip'
import { languages } from '@/i18n/language'
import { TtsAutoPlay } from '@/types/app'
const VoiceParamConfig: FC = () => {
const { t } = useTranslation()
const pathname = usePathname()
@@ -27,12 +29,16 @@ const VoiceParamConfig: FC = () => {
setTextToSpeechConfig,
} = useContext(ConfigContext)
const languageItem = languages.find(item => item.value === textToSpeechConfig.language)
let languageItem = languages.find(item => item.value === textToSpeechConfig.language)
const localLanguagePlaceholder = languageItem?.name || t('common.placeholder.select')
if (languages && !languageItem)
languageItem = languages[0]
const language = languageItem?.value
const voiceItems = useSWR({ appId, language }, fetchAppVoices).data
const voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice)
let voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice)
if (voiceItems && !voiceItem)
voiceItem = voiceItems[0]
const localVoicePlaceholder = voiceItem?.name || t('common.placeholder.select')
return (
@@ -42,8 +48,9 @@ const VoiceParamConfig: FC = () => {
<div className='pt-3 space-y-6'>
<div>
<div className='mb-2 flex items-center space-x-1'>
<div className='leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.language')}</div>
<Tooltip htmlContent={<div className='w-[180px]' >
<div
className='leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.language')}</div>
<Tooltip htmlContent={<div className='w-[180px]'>
{t('appDebug.voice.voiceSettings.resolutionTooltip').split('\n').map(item => (
<div key={item}>{item}</div>
))}
@@ -61,7 +68,8 @@ const VoiceParamConfig: FC = () => {
}}
>
<div className={'relative h-9'}>
<Listbox.Button className={'w-full h-full rounded-lg border-0 bg-gray-100 py-1.5 pl-3 pr-10 sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-pointer'}>
<Listbox.Button
className={'w-full h-full rounded-lg border-0 bg-gray-100 py-1.5 pl-3 pr-10 sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-pointer'}>
<span className={classNames('block truncate text-left', !languageItem?.name && 'text-gray-400')}>
{languageItem?.name ? t(`common.voice.language.${languageItem?.value.replace('-', '')}`) : localLanguagePlaceholder}
</span>
@@ -79,7 +87,8 @@ const VoiceParamConfig: FC = () => {
leaveTo="opacity-0"
>
<Listbox.Options className="absolute z-10 mt-1 px-1 max-h-60 w-full overflow-auto rounded-md bg-white py-1 text-base shadow-lg border-gray-200 border-[0.5px] focus:outline-none sm:text-sm">
<Listbox.Options
className="absolute z-10 mt-1 px-1 max-h-60 w-full overflow-auto rounded-md bg-white py-1 text-base shadow-lg border-gray-200 border-[0.5px] focus:outline-none sm:text-sm">
{languages.map((item: Item) => (
<Listbox.Option
key={item.value}
@@ -100,7 +109,7 @@ const VoiceParamConfig: FC = () => {
'absolute inset-y-0 right-0 flex items-center pr-4 text-gray-700',
)}
>
<CheckIcon className="h-5 w-5" aria-hidden="true" />
<CheckIcon className="h-5 w-5" aria-hidden="true"/>
</span>
)}
</>
@@ -112,9 +121,9 @@ const VoiceParamConfig: FC = () => {
</div>
</Listbox>
</div>
<div>
<div className='mb-2 leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.voice')}</div>
<div
className='mb-2 leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.voice')}</div>
<Listbox
value={voiceItem}
disabled={!languageItem}
@@ -126,8 +135,10 @@ const VoiceParamConfig: FC = () => {
}}
>
<div className={'relative h-9'}>
<Listbox.Button className={'w-full h-full rounded-lg border-0 bg-gray-100 py-1.5 pl-3 pr-10 sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-pointer'}>
<span className={classNames('block truncate text-left', !voiceItem?.name && 'text-gray-400')}>{voiceItem?.name ?? localVoicePlaceholder}</span>
<Listbox.Button
className={'w-full h-full rounded-lg border-0 bg-gray-100 py-1.5 pl-3 pr-10 sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-pointer'}>
<span
className={classNames('block truncate text-left', !voiceItem?.name && 'text-gray-400')}>{voiceItem?.name ?? localVoicePlaceholder}</span>
<span className="pointer-events-none absolute inset-y-0 right-0 flex items-center pr-2">
<ChevronDownIcon
className="h-5 w-5 text-gray-400"
@@ -142,7 +153,8 @@ const VoiceParamConfig: FC = () => {
leaveTo="opacity-0"
>
<Listbox.Options className="absolute z-10 mt-1 px-1 max-h-60 w-full overflow-auto rounded-md bg-white py-1 text-base shadow-lg border-gray-200 border-[0.5px] focus:outline-none sm:text-sm">
<Listbox.Options
className="absolute z-10 mt-1 px-1 max-h-60 w-full overflow-auto rounded-md bg-white py-1 text-base shadow-lg border-gray-200 border-[0.5px] focus:outline-none sm:text-sm">
{voiceItems?.map((item: Item) => (
<Listbox.Option
key={item.value}
@@ -162,7 +174,7 @@ const VoiceParamConfig: FC = () => {
'absolute inset-y-0 right-0 flex items-center pr-4 text-gray-700',
)}
>
<CheckIcon className="h-5 w-5" aria-hidden="true" />
<CheckIcon className="h-5 w-5" aria-hidden="true"/>
</span>
)}
</>
@@ -174,6 +186,30 @@ const VoiceParamConfig: FC = () => {
</div>
</Listbox>
</div>
<div>
<div
className='mb-2 leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.autoPlay')}</div>
<RadioGroup
className='space-x-3'
options={[
{
label: t('appDebug.voice.voiceSettings.autoPlayEnabled'),
value: TtsAutoPlay.enabled,
},
{
label: t('appDebug.voice.voiceSettings.autoPlayDisabled'),
value: TtsAutoPlay.disabled,
},
]}
value={textToSpeechConfig.autoPlay ? textToSpeechConfig.autoPlay : TtsAutoPlay.disabled}
onChange={(value: TtsAutoPlay) => {
setTextToSpeechConfig({
...textToSpeechConfig,
autoPlay: value,
})
}}
/>
</div>
</div>
</div>
</div>

View File

@@ -40,7 +40,6 @@ const TextToSpeech: FC = () => {
{ languageInfo?.example && (
<AudioBtn
value={languageInfo?.example}
voice={voiceItem?.value}
isAudition
noCache
/>

View File

@@ -428,8 +428,7 @@ const GenerationItem: FC<IGenerationItemProps> = ({
<>
<div className='ml-2 mr-2 h-[14px] w-[1px] bg-gray-200'></div>
<AudioBtn
value={content}
noCache={false}
id={messageId!}
className={'mr-1'}
/>
</>

View File

@@ -0,0 +1,53 @@
import AudioPlayer from '@/app/components/base/audio-btn/audio'
declare global {
// eslint-disable-next-line @typescript-eslint/consistent-type-definitions
interface AudioPlayerManager {
instance: AudioPlayerManager
}
}
export class AudioPlayerManager {
private static instance: AudioPlayerManager
private audioPlayers: AudioPlayer | null = null
private msgId: string | undefined
private constructor() {
}
public static getInstance(): AudioPlayerManager {
if (!AudioPlayerManager.instance) {
AudioPlayerManager.instance = new AudioPlayerManager()
this.instance = AudioPlayerManager.instance
}
return AudioPlayerManager.instance
}
public getAudioPlayer(url: string, isPublic: boolean, id: string | undefined, msgContent: string | null | undefined, voice: string | undefined, callback: ((event: string) => {}) | null): AudioPlayer {
if (this.msgId && this.msgId === id && this.audioPlayers) {
this.audioPlayers.setCallback(callback)
return this.audioPlayers
}
else {
if (this.audioPlayers) {
try {
this.audioPlayers.pauseAudio()
this.audioPlayers.cacheBuffers = []
this.audioPlayers.sourceBuffer?.abort()
}
catch (e) {
}
}
this.msgId = id
this.audioPlayers = new AudioPlayer(url, isPublic, id, msgContent, callback)
return this.audioPlayers
}
}
public resetMsgId(msgId: string) {
this.msgId = msgId
this.audioPlayers?.resetMsgId(msgId)
}
}

View File

@@ -0,0 +1,263 @@
import Toast from '@/app/components/base/toast'
import { textToAudioStream } from '@/service/share'
declare global {
// eslint-disable-next-line @typescript-eslint/consistent-type-definitions
interface Window {
ManagedMediaSource: any
}
}
export default class AudioPlayer {
mediaSource: MediaSource | null
audio: HTMLAudioElement
audioContext: AudioContext
sourceBuffer?: SourceBuffer
cacheBuffers: ArrayBuffer[] = []
pauseTimer: number | null = null
msgId: string | undefined
msgContent: string | null | undefined = null
voice: string | undefined = undefined
isLoadData = false
url: string
isPublic: boolean
callback: ((event: string) => {}) | null
constructor(streamUrl: string, isPublic: boolean, msgId: string | undefined, msgContent: string | null | undefined, callback: ((event: string) => {}) | null) {
this.audioContext = new AudioContext()
this.msgId = msgId
this.msgContent = msgContent
this.url = streamUrl
this.isPublic = isPublic
this.callback = callback
// Compatible with iphone ios17 ManagedMediaSource
const MediaSource = window.MediaSource || window.ManagedMediaSource
if (!MediaSource) {
Toast.notify({
message: 'Your browser does not support audio streaming, if you are using an iPhone, please update to iOS 17.1 or later.',
type: 'error',
})
}
this.mediaSource = MediaSource ? new MediaSource() : null
this.audio = new Audio()
this.setCallback(callback)
this.audio.src = this.mediaSource ? URL.createObjectURL(this.mediaSource) : ''
this.audio.autoplay = true
const source = this.audioContext.createMediaElementSource(this.audio)
source.connect(this.audioContext.destination)
this.listenMediaSource('audio/mpeg')
}
public resetMsgId(msgId: string) {
this.msgId = msgId
}
private listenMediaSource(contentType: string) {
this.mediaSource?.addEventListener('sourceopen', () => {
if (this.sourceBuffer)
return
this.sourceBuffer = this.mediaSource?.addSourceBuffer(contentType)
// this.sourceBuffer?.addEventListener('update', () => {
// if (this.cacheBuffers.length && !this.sourceBuffer?.updating) {
// const cacheBuffer = this.cacheBuffers.shift()!
// this.sourceBuffer?.appendBuffer(cacheBuffer)
// }
// // this.pauseAudio()
// })
//
// this.sourceBuffer?.addEventListener('updateend', () => {
// if (this.cacheBuffers.length && !this.sourceBuffer?.updating) {
// const cacheBuffer = this.cacheBuffers.shift()!
// this.sourceBuffer?.appendBuffer(cacheBuffer)
// }
// // this.pauseAudio()
// })
})
}
public setCallback(callback: ((event: string) => {}) | null) {
this.callback = callback
if (callback) {
this.audio.addEventListener('ended', () => {
callback('ended')
}, false)
this.audio.addEventListener('paused', () => {
callback('paused')
}, true)
this.audio.addEventListener('loaded', () => {
callback('loaded')
}, true)
this.audio.addEventListener('play', () => {
callback('play')
}, true)
this.audio.addEventListener('timeupdate', () => {
callback('timeupdate')
}, true)
this.audio.addEventListener('loadeddate', () => {
callback('loadeddate')
}, true)
this.audio.addEventListener('canplay', () => {
callback('canplay')
}, true)
this.audio.addEventListener('error', () => {
callback('error')
}, true)
}
}
private async loadAudio() {
try {
const audioResponse: any = await textToAudioStream(this.url, this.isPublic, { content_type: 'audio/mpeg' }, {
message_id: this.msgId,
streaming: true,
voice: this.voice,
text: this.msgContent,
})
if (audioResponse.status !== 200) {
this.isLoadData = false
if (this.callback)
this.callback('error')
}
const reader = audioResponse.body.getReader()
while (true) {
const { value, done } = await reader.read()
if (done) {
this.receiveAudioData(value)
break
}
this.receiveAudioData(value)
}
}
catch (error) {
this.isLoadData = false
this.callback && this.callback('error')
}
}
// play audio
public playAudio() {
if (this.isLoadData) {
if (this.audioContext.state === 'suspended') {
this.audioContext.resume().then((_) => {
this.audio.play()
this.callback && this.callback('play')
})
}
else if (this.audio.ended) {
this.audio.play()
this.callback && this.callback('play')
}
if (this.callback)
this.callback('play')
}
else {
this.isLoadData = true
this.loadAudio()
}
}
private theEndOfStream() {
const endTimer = setInterval(() => {
if (!this.sourceBuffer?.updating) {
this.mediaSource?.endOfStream()
clearInterval(endTimer)
}
console.log('finishStream endOfStream endTimer')
}, 10)
}
private finishStream() {
const timer = setInterval(() => {
if (!this.cacheBuffers.length) {
this.theEndOfStream()
clearInterval(timer)
}
if (this.cacheBuffers.length && !this.sourceBuffer?.updating) {
const arrayBuffer = this.cacheBuffers.shift()!
this.sourceBuffer?.appendBuffer(arrayBuffer)
}
console.log('finishStream timer')
}, 10)
}
public async playAudioWithAudio(audio: string, play = true) {
if (!audio || !audio.length) {
this.finishStream()
return
}
const audioContent = Buffer.from(audio, 'base64')
this.receiveAudioData(new Uint8Array(audioContent))
if (play) {
this.isLoadData = true
if (this.audio.paused) {
this.audioContext.resume().then((_) => {
this.audio.play()
this.callback && this.callback('play')
})
}
else if (this.audio.ended) {
this.audio.play()
this.callback && this.callback('play')
}
else if (this.audio.played) { /* empty */ }
else {
this.audio.play()
this.callback && this.callback('play')
}
}
}
public pauseAudio() {
this.callback && this.callback('paused')
this.audio.pause()
this.audioContext.suspend()
}
private cancer() {
}
private receiveAudioData(unit8Array: Uint8Array) {
if (!unit8Array) {
this.finishStream()
return
}
const audioData = this.byteArrayToArrayBuffer(unit8Array)
if (!audioData.byteLength) {
if (this.mediaSource?.readyState === 'open')
this.finishStream()
return
}
if (this.sourceBuffer?.updating) {
this.cacheBuffers.push(audioData)
}
else {
if (this.cacheBuffers.length && !this.sourceBuffer?.updating) {
this.cacheBuffers.push(audioData)
const cacheBuffer = this.cacheBuffers.shift()!
this.sourceBuffer?.appendBuffer(cacheBuffer)
}
else {
this.sourceBuffer?.appendBuffer(audioData)
}
}
}
private byteArrayToArrayBuffer(byteArray: Uint8Array): ArrayBuffer {
const arrayBuffer = new ArrayBuffer(byteArray.length)
const uint8Array = new Uint8Array(arrayBuffer)
uint8Array.set(byteArray)
return arrayBuffer
}
}

View File

@@ -1,124 +1,78 @@
'use client'
import { useEffect, useRef, useState } from 'react'
import { useRef, useState } from 'react'
import { t } from 'i18next'
import { useParams, usePathname } from 'next/navigation'
import s from './style.module.css'
import Tooltip from '@/app/components/base/tooltip'
import { randomString } from '@/utils'
import { textToAudio } from '@/service/share'
import Loading from '@/app/components/base/loading'
import { AudioPlayerManager } from '@/app/components/base/audio-btn/audio.player.manager'
type AudioBtnProps = {
value: string
id?: string
voice?: string
value?: string
className?: string
isAudition?: boolean
noCache: boolean
noCache?: boolean
}
type AudioState = 'initial' | 'loading' | 'playing' | 'paused' | 'ended'
const AudioBtn = ({
value,
id,
voice,
value,
className,
isAudition,
noCache,
}: AudioBtnProps) => {
const audioRef = useRef<HTMLAudioElement | null>(null)
const [audioState, setAudioState] = useState<AudioState>('initial')
const selector = useRef(`play-tooltip-${randomString(4)}`)
const params = useParams()
const pathname = usePathname()
const removeCodeBlocks = (inputText: any) => {
const codeBlockRegex = /```[\s\S]*?```/g
if (inputText)
return inputText.replace(codeBlockRegex, '')
return ''
}
const loadAudio = async () => {
const formData = new FormData()
formData.append('text', removeCodeBlocks(value))
formData.append('voice', removeCodeBlocks(voice))
if (value !== '') {
setAudioState('loading')
let url = ''
let isPublic = false
if (params.token) {
url = '/text-to-audio'
isPublic = true
}
else if (params.appId) {
if (pathname.search('explore/installed') > -1)
url = `/installed-apps/${params.appId}/text-to-audio`
else
url = `/apps/${params.appId}/text-to-audio`
}
try {
const audioResponse = await textToAudio(url, isPublic, formData)
const blob_bytes = Buffer.from(audioResponse.data, 'latin1')
const blob = new Blob([blob_bytes], { type: 'audio/wav' })
const audioUrl = URL.createObjectURL(blob)
audioRef.current!.src = audioUrl
}
catch (error) {
setAudioState('initial')
console.error('Error playing audio:', error)
}
}
}
const handleToggle = async () => {
if (audioState === 'initial' || noCache) {
await loadAudio()
}
else if (audioRef.current) {
if (audioState === 'playing') {
audioRef.current.pause()
setAudioState('paused')
}
else {
audioRef.current.play()
const audio_finished_call = (event: string): any => {
switch (event) {
case 'ended':
setAudioState('ended')
break
case 'paused':
setAudioState('ended')
break
case 'loaded':
setAudioState('loading')
break
case 'play':
setAudioState('playing')
}
break
case 'error':
setAudioState('ended')
break
}
}
let url = ''
let isPublic = false
useEffect(() => {
const currentAudio = audioRef.current
const handleLoading = () => {
if (params.token) {
url = '/text-to-audio'
isPublic = true
}
else if (params.appId) {
if (pathname.search('explore/installed') > -1)
url = `/installed-apps/${params.appId}/text-to-audio`
else
url = `/apps/${params.appId}/text-to-audio`
}
const handleToggle = async () => {
if (audioState === 'playing' || audioState === 'loading') {
setAudioState('paused')
AudioPlayerManager.getInstance().getAudioPlayer(url, isPublic, id, value, voice, audio_finished_call).pauseAudio()
}
else {
setAudioState('loading')
AudioPlayerManager.getInstance().getAudioPlayer(url, isPublic, id, value, voice, audio_finished_call).playAudio()
}
const handlePlay = () => {
currentAudio?.play()
setAudioState('playing')
}
const handleEnded = () => {
setAudioState('ended')
}
currentAudio?.addEventListener('progress', handleLoading)
currentAudio?.addEventListener('canplaythrough', handlePlay)
currentAudio?.addEventListener('ended', handleEnded)
return () => {
currentAudio?.removeEventListener('progress', handleLoading)
currentAudio?.removeEventListener('canplaythrough', handlePlay)
currentAudio?.removeEventListener('ended', handleEnded)
URL.revokeObjectURL(currentAudio?.src || '')
currentAudio?.pause()
currentAudio?.setAttribute('src', '')
}
}, [])
}
const tooltipContent = {
initial: t('appApi.play'),
@@ -151,7 +105,6 @@ const AudioBtn = ({
)}
</button>
</Tooltip>
<audio ref={audioRef} src='' className='hidden' />
</div>
)
}

View File

@@ -8,6 +8,7 @@ import type {
ChatConfig,
ChatItem,
} from '../../types'
import { useChatContext } from '../context'
import Operation from './operation'
import AgentContent from './agent-content'
import BasicContent from './basic-content'
@@ -59,23 +60,25 @@ const Answer: FC<AnswerProps> = ({
} = item
const hasAgentThoughts = !!agent_thoughts?.length
const [containerWidth, setContainerWidth] = useState(0)
const [containerWidth] = useState(0)
const [contentWidth, setContentWidth] = useState(0)
const containerRef = useRef<HTMLDivElement>(null)
const contentRef = useRef<HTMLDivElement>(null)
const getContainerWidth = () => {
if (containerRef.current)
setContainerWidth(containerRef.current?.clientWidth + 16)
}
const {
config: chatContextConfig,
} = useChatContext()
const voiceRef = useRef(chatContextConfig?.text_to_speech?.voice)
const getContentWidth = () => {
if (contentRef.current)
setContentWidth(contentRef.current?.clientWidth)
}
useEffect(() => {
getContainerWidth()
}, [])
voiceRef.current = chatContextConfig?.text_to_speech?.voice
}
, [chatContextConfig?.text_to_speech?.voice])
useEffect(() => {
if (!responding)

View File

@@ -119,9 +119,9 @@ const Operation: FC<OperationProps> = ({
<>
<div className='mx-1 w-[1px] h-[14px] bg-gray-200'/>
<AudioBtn
id={id}
value={content}
noCache={false}
voice={config?.text_to_speech?.voice}
className='hidden group-hover:block'
/>
</>

View File

@@ -6,6 +6,8 @@ import {
} from 'react'
import { useTranslation } from 'react-i18next'
import { produce, setAutoFreeze } from 'immer'
import { useParams, usePathname } from 'next/navigation'
import { v4 as uuidV4 } from 'uuid'
import type {
ChatConfig,
ChatItem,
@@ -20,6 +22,7 @@ import { replaceStringWithValues } from '@/app/components/app/configuration/prom
import type { Annotation } from '@/models/log'
import { WorkflowRunningStatus } from '@/app/components/workflow/types'
import useTimestamp from '@/hooks/use-timestamp'
import { AudioPlayerManager } from '@/app/components/base/audio-btn/audio.player.manager'
type GetAbortController = (abortController: AbortController) => void
type SendCallback = {
@@ -91,7 +94,8 @@ export const useChat = (
const conversationMessagesAbortControllerRef = useRef<AbortController | null>(null)
const suggestedQuestionsAbortControllerRef = useRef<AbortController | null>(null)
const checkPromptVariables = useCheckPromptVariables()
const params = useParams()
const pathname = usePathname()
useEffect(() => {
setAutoFreeze(false)
return () => {
@@ -262,6 +266,19 @@ export const useChat = (
let isAgentMode = false
let hasSetResponseId = false
let ttsUrl = ''
let ttsIsPublic = false
if (params.token) {
ttsUrl = '/text-to-audio'
ttsIsPublic = true
}
else if (params.appId) {
if (pathname.search('explore/installed') > -1)
ttsUrl = `/installed-apps/${params.appId}/text-to-audio`
else
ttsUrl = `/apps/${params.appId}/text-to-audio`
}
const player = AudioPlayerManager.getInstance().getAudioPlayer(ttsUrl, ttsIsPublic, uuidV4(), 'none', 'none', (_: any): any => {})
ssePost(
url,
{
@@ -530,6 +547,15 @@ export const useChat = (
}
}))
},
onTTSChunk: (messageId: string, audio: string) => {
if (!audio || audio === '')
return
player.playAudioWithAudio(audio, true)
AudioPlayerManager.getInstance().resetMsgId(messageId)
},
onTTSEnd: (messageId: string, audio: string) => {
player.playAudioWithAudio(audio, false)
},
})
return true
}, [

View File

@@ -19,6 +19,8 @@ import type { Item } from '@/app/components/base/select'
import { fetchAppVoices } from '@/service/apps'
import Tooltip from '@/app/components/base/tooltip'
import { languages } from '@/i18n/language'
import RadioGroup from '@/app/components/app/configuration/config-vision/radio-group'
import { TtsAutoPlay } from '@/types/app'
type VoiceParamConfigProps = {
onChange?: OnFeaturesChange
@@ -33,12 +35,16 @@ const VoiceParamConfig = ({
const text2speech = useFeatures(state => state.features.text2speech)
const featuresStore = useFeaturesStore()
const languageItem = languages.find(item => item.value === text2speech.language)
let languageItem = languages.find(item => item.value === text2speech?.language)
if (languages && !languageItem)
languageItem = languages[0]
const localLanguagePlaceholder = languageItem?.name || t('common.placeholder.select')
const language = languageItem?.value
const voiceItems = useSWR({ appId, language }, fetchAppVoices).data
const voiceItem = voiceItems?.find(item => item.value === text2speech.voice)
let voiceItem = voiceItems?.find(item => item.value === text2speech?.voice)
if (voiceItems && !voiceItem)
voiceItem = voiceItems[0]
const localVoicePlaceholder = voiceItem?.name || t('common.placeholder.select')
const handleChange = (value: Record<string, string>) => {
@@ -66,13 +72,14 @@ const VoiceParamConfig = ({
<div className='pt-3 space-y-6'>
<div>
<div className='mb-2 flex items-center space-x-1'>
<div className='leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.language')}</div>
<Tooltip htmlContent={<div className='w-[180px]' >
<div
className='leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.language')}</div>
<Tooltip htmlContent={<div className='w-[180px]'>
{t('appDebug.voice.voiceSettings.resolutionTooltip').split('\n').map(item => (
<div key={item}>{item}</div>
))}
</div>} selector='config-resolution-tooltip'>
<RiQuestionLine className='w-[14px] h-[14px] text-gray-400' />
<RiQuestionLine className='w-[14px] h-[14px] text-gray-400'/>
</Tooltip>
</div>
<Listbox
@@ -84,7 +91,8 @@ const VoiceParamConfig = ({
}}
>
<div className={'relative h-9'}>
<Listbox.Button className={'w-full h-full rounded-lg border-0 bg-gray-100 py-1.5 pl-3 pr-10 sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-pointer'}>
<Listbox.Button
className={'w-full h-full rounded-lg border-0 bg-gray-100 py-1.5 pl-3 pr-10 sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-pointer'}>
<span className={classNames('block truncate text-left', !languageItem?.name && 'text-gray-400')}>
{languageItem?.name ? t(`common.voice.language.${languageItem?.value.replace('-', '')}`) : localLanguagePlaceholder}
</span>
@@ -102,7 +110,8 @@ const VoiceParamConfig = ({
leaveTo="opacity-0"
>
<Listbox.Options className="absolute z-10 mt-1 px-1 max-h-60 w-full overflow-auto rounded-md bg-white py-1 text-base shadow-lg border-gray-200 border-[0.5px] focus:outline-none sm:text-sm">
<Listbox.Options
className="absolute z-10 mt-1 px-1 max-h-60 w-full overflow-auto rounded-md bg-white py-1 text-base shadow-lg border-gray-200 border-[0.5px] focus:outline-none sm:text-sm">
{languages.map((item: Item) => (
<Listbox.Option
key={item.value}
@@ -117,13 +126,13 @@ const VoiceParamConfig = ({
<>
<span
className={classNames('block', selected && 'font-normal')}>{t(`common.voice.language.${(item.value).toString().replace('-', '')}`)}</span>
{(selected || item.value === text2speech.language) && (
{(selected || item.value === text2speech?.language) && (
<span
className={classNames(
'absolute inset-y-0 right-0 flex items-center pr-4 text-gray-700',
)}
>
<CheckIcon className="h-5 w-5" aria-hidden="true" />
<CheckIcon className="h-5 w-5" aria-hidden="true"/>
</span>
)}
</>
@@ -137,7 +146,8 @@ const VoiceParamConfig = ({
</div>
<div>
<div className='mb-2 leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.voice')}</div>
<div
className='mb-2 leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.voice')}</div>
<Listbox
value={voiceItem}
disabled={!languageItem}
@@ -148,8 +158,10 @@ const VoiceParamConfig = ({
}}
>
<div className={'relative h-9'}>
<Listbox.Button className={'w-full h-full rounded-lg border-0 bg-gray-100 py-1.5 pl-3 pr-10 sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-pointer'}>
<span className={classNames('block truncate text-left', !voiceItem?.name && 'text-gray-400')}>{voiceItem?.name ?? localVoicePlaceholder}</span>
<Listbox.Button
className={'w-full h-full rounded-lg border-0 bg-gray-100 py-1.5 pl-3 pr-10 sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-pointer'}>
<span
className={classNames('block truncate text-left', !voiceItem?.name && 'text-gray-400')}>{voiceItem?.name ?? localVoicePlaceholder}</span>
<span className="pointer-events-none absolute inset-y-0 right-0 flex items-center pr-2">
<ChevronDownIcon
className="h-5 w-5 text-gray-400"
@@ -164,7 +176,8 @@ const VoiceParamConfig = ({
leaveTo="opacity-0"
>
<Listbox.Options className="absolute z-10 mt-1 px-1 max-h-60 w-full overflow-auto rounded-md bg-white py-1 text-base shadow-lg border-gray-200 border-[0.5px] focus:outline-none sm:text-sm">
<Listbox.Options
className="absolute z-10 mt-1 px-1 max-h-60 w-full overflow-auto rounded-md bg-white py-1 text-base shadow-lg border-gray-200 border-[0.5px] focus:outline-none sm:text-sm">
{voiceItems?.map((item: Item) => (
<Listbox.Option
key={item.value}
@@ -178,13 +191,13 @@ const VoiceParamConfig = ({
{({ /* active, */ selected }) => (
<>
<span className={classNames('block', selected && 'font-normal')}>{item.name}</span>
{(selected || item.value === text2speech.voice) && (
{(selected || item.value === text2speech?.voice) && (
<span
className={classNames(
'absolute inset-y-0 right-0 flex items-center pr-4 text-gray-700',
)}
>
<CheckIcon className="h-5 w-5" aria-hidden="true" />
<CheckIcon className="h-5 w-5" aria-hidden="true"/>
</span>
)}
</>
@@ -196,6 +209,29 @@ const VoiceParamConfig = ({
</div>
</Listbox>
</div>
<div>
<div
className='mb-2 leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.autoPlay')}</div>
<RadioGroup
className='space-x-3'
options={[
{
label: t('appDebug.voice.voiceSettings.autoPlayEnabled'),
value: TtsAutoPlay.enabled,
},
{
label: t('appDebug.voice.voiceSettings.autoPlayDisabled'),
value: TtsAutoPlay.disabled,
},
]}
value={text2speech?.autoPlay ? text2speech?.autoPlay : TtsAutoPlay.disabled}
onChange={(value: TtsAutoPlay) => {
handleChange({
autoPlay: value,
})
}}
/>
</div>
</div>
</div>
</div>

View File

@@ -1,4 +1,4 @@
import type { TransferMethod } from '@/types/app'
import type { TransferMethod, TtsAutoPlay } from '@/types/app'
export type EnabledOrDisabled = {
enabled?: boolean
@@ -14,6 +14,7 @@ export type SuggestedQuestionsAfterAnswer = EnabledOrDisabled
export type TextToSpeech = EnabledOrDisabled & {
language?: string
voice?: string
autoPlay?: TtsAutoPlay
}
export type SpeechToText = EnabledOrDisabled

View File

@@ -4,6 +4,8 @@ import {
useStoreApi,
} from 'reactflow'
import produce from 'immer'
import { v4 as uuidV4 } from 'uuid'
import { usePathname } from 'next/navigation'
import { useWorkflowStore } from '../store'
import { useNodesSyncDraft } from '../hooks'
import {
@@ -19,6 +21,7 @@ import {
stopWorkflowRun,
} from '@/service/workflow'
import { useFeaturesStore } from '@/app/components/base/features/hooks'
import { AudioPlayerManager } from '@/app/components/base/audio-btn/audio.player.manager'
export const useWorkflowRun = () => {
const store = useStoreApi()
@@ -27,6 +30,7 @@ export const useWorkflowRun = () => {
const featuresStore = useFeaturesStore()
const { doSyncWorkflowDraft } = useNodesSyncDraft()
const { handleUpdateWorkflowCanvas } = useWorkflowUpdate()
const pathname = usePathname()
const handleBackupDraft = useCallback(() => {
const {
@@ -134,6 +138,20 @@ export const useWorkflowRun = () => {
let isInIteration = false
let iterationLength = 0
let ttsUrl = ''
let ttsIsPublic = false
if (params.token) {
ttsUrl = '/text-to-audio'
ttsIsPublic = true
}
else if (params.appId) {
if (pathname.search('explore/installed') > -1)
ttsUrl = `/installed-apps/${params.appId}/text-to-audio`
else
ttsUrl = `/apps/${params.appId}/text-to-audio`
}
const player = AudioPlayerManager.getInstance().getAudioPlayer(ttsUrl, ttsIsPublic, uuidV4(), 'none', 'none', (_: any): any => {})
ssePost(
url,
{
@@ -468,6 +486,15 @@ export const useWorkflowRun = () => {
draft.resultText = text
}))
},
onTTSChunk: (messageId: string, audio: string, audioType?: string) => {
if (!audio || audio === '')
return
player.playAudioWithAudio(audio, true)
AudioPlayerManager.getInstance().resetMsgId(messageId)
},
onTTSEnd: (messageId: string, audio: string, audioType?: string) => {
player.playAudioWithAudio(audio, false)
},
...restCallback,
},
)

View File

@@ -323,6 +323,9 @@ const translation = {
language: 'Language',
resolutionTooltip: 'Text-to-speech voice support language。',
voice: 'Voice',
autoPlay: 'Auto Play',
autoPlayEnabled: 'Turn On',
autoPlayDisabled: 'Turn Off',
},
},
openingStatement: {

View File

@@ -319,6 +319,9 @@ const translation = {
language: '言語',
resolutionTooltip: 'テキスト読み上げの音声言語をサポートします。',
voice: '音声',
autoPlay: '自動再生',
autoPlayEnabled: '開ける',
autoPlayDisabled: '關閉',
},
},
openingStatement: {

View File

@@ -319,6 +319,9 @@ const translation = {
language: '语言',
resolutionTooltip: '文本转语音音色支持语言。',
voice: '音色',
autoPlay: '自动播放',
autoPlayEnabled: '开启',
autoPlayDisabled: '关闭',
},
},
openingStatement: {

View File

@@ -318,6 +318,9 @@ const translation = {
language: '語言',
resolutionTooltip: '文字轉語音音色支援語言。',
voice: '音色',
autoPlay: '自動播放',
autoPlayEnabled: '開啟',
autoPlayDisabled: '關閉',
},
},
openingStatement: {

View File

@@ -1,4 +1,4 @@
import type { AgentStrategy, ModelModeType, RETRIEVE_TYPE, ToolItem } from '@/types/app'
import type { AgentStrategy, ModelModeType, RETRIEVE_TYPE, ToolItem, TtsAutoPlay } from '@/types/app'
export type Inputs = Record<string, string | number | object>
export enum PromptMode {
@@ -79,6 +79,7 @@ export type TextToSpeechConfig = {
enabled: boolean
voice?: string
language?: string
autoPlay?: TtsAutoPlay
}
export type CitationConfig = MoreLikeThisConfig

View File

@@ -34,6 +34,7 @@ const nextConfig = {
// https://nextjs.org/docs/api-reference/next.config.js/ignoring-typescript-errors
ignoreBuildErrors: true,
},
reactStrictMode: true,
async redirects() {
return [
{

View File

@@ -120,6 +120,7 @@ export const generationIntroduction: Fetcher<GenerationIntroductionResponse, { u
}
export const fetchAppVoices: Fetcher<AppVoicesListResponse, { appId: string; language?: string }> = ({ appId, language }) => {
language = language || 'en-US'
return get<AppVoicesListResponse>(`apps/${appId}/text-to-audio/voices?language=${language}`)
}

View File

@@ -19,6 +19,7 @@ const TIME_OUT = 100000
const ContentType = {
json: 'application/json',
stream: 'text/event-stream',
audio: 'audio/mpeg',
form: 'application/x-www-form-urlencoded; charset=UTF-8',
download: 'application/octet-stream', // for download
upload: 'multipart/form-data', // for upload
@@ -59,6 +60,8 @@ export type IOnIterationStarted = (workflowStarted: IterationStartedResponse) =>
export type IOnIterationNexted = (workflowStarted: IterationNextedResponse) => void
export type IOnIterationFinished = (workflowFinished: IterationFinishedResponse) => void
export type IOnTextChunk = (textChunk: TextChunkResponse) => void
export type IOnTTSChunk = (messageId: string, audioStr: string, audioType?: string) => void
export type IOnTTSEnd = (messageId: string, audioStr: string, audioType?: string) => void
export type IOnTextReplace = (textReplace: TextReplaceResponse) => void
export type IOtherOptions = {
@@ -84,6 +87,8 @@ export type IOtherOptions = {
onIterationNext?: IOnIterationNexted
onIterationFinish?: IOnIterationFinished
onTextChunk?: IOnTextChunk
onTTSChunk?: IOnTTSChunk
onTTSEnd?: IOnTTSEnd
onTextReplace?: IOnTextReplace
}
@@ -135,6 +140,8 @@ const handleStream = (
onIterationNext?: IOnIterationNexted,
onIterationFinish?: IOnIterationFinished,
onTextChunk?: IOnTextChunk,
onTTSChunk?: IOnTTSChunk,
onTTSEnd?: IOnTTSEnd,
onTextReplace?: IOnTextReplace,
) => {
if (!response.ok)
@@ -227,6 +234,12 @@ const handleStream = (
else if (bufferObj.event === 'text_replace') {
onTextReplace?.(bufferObj as TextReplaceResponse)
}
else if (bufferObj.event === 'tts_message') {
onTTSChunk?.(bufferObj.message_id, bufferObj.audio, bufferObj.audio_type)
}
else if (bufferObj.event === 'tts_message_end') {
onTTSEnd?.(bufferObj.message_id, bufferObj.audio)
}
}
})
buffer = lines[lines.length - 1]
@@ -390,9 +403,10 @@ const baseFetch = <T>(
}
// return data
const data: Promise<T> = options.headers.get('Content-type') === ContentType.download ? res.blob() : res.json()
if (options.headers.get('Content-type') === ContentType.download || options.headers.get('Content-type') === ContentType.audio)
resolve(needAllResponseContent ? resClone : res.blob())
resolve(needAllResponseContent ? resClone : data)
else resolve(needAllResponseContent ? resClone : res.json())
})
.catch((err) => {
if (!silent)
@@ -475,6 +489,8 @@ export const ssePost = (
onIterationNext,
onIterationFinish,
onTextChunk,
onTTSChunk,
onTTSEnd,
onTextReplace,
onError,
getAbortController,
@@ -527,7 +543,7 @@ export const ssePost = (
return
}
onData?.(str, isFirstMessage, moreInfo)
}, onCompleted, onThought, onMessageEnd, onMessageReplace, onFile, onWorkflowStarted, onWorkflowFinished, onNodeStarted, onNodeFinished, onIterationStart, onIterationNext, onIterationFinish, onTextChunk, onTextReplace)
}, onCompleted, onThought, onMessageEnd, onMessageReplace, onFile, onWorkflowStarted, onWorkflowFinished, onNodeStarted, onNodeFinished, onIterationStart, onIterationNext, onIterationFinish, onTextChunk, onTTSChunk, onTTSEnd, onTextReplace)
}).catch((e) => {
if (e.toString() !== 'AbortError: The user aborted a request.')
Toast.notify({ type: 'error', message: e })

View File

@@ -1,4 +1,4 @@
import type { IOnCompleted, IOnData, IOnError, IOnFile, IOnIterationFinished, IOnIterationNexted, IOnIterationStarted, IOnMessageEnd, IOnMessageReplace, IOnNodeFinished, IOnNodeStarted, IOnTextChunk, IOnTextReplace, IOnThought, IOnWorkflowFinished, IOnWorkflowStarted } from './base'
import type { IOnCompleted, IOnData, IOnError, IOnFile, IOnIterationFinished, IOnIterationNexted, IOnIterationStarted, IOnMessageEnd, IOnMessageReplace, IOnNodeFinished, IOnNodeStarted, IOnTTSChunk, IOnTTSEnd, IOnTextChunk, IOnTextReplace, IOnThought, IOnWorkflowFinished, IOnWorkflowStarted } from './base'
import {
del as consoleDel, get as consoleGet, patch as consolePatch, post as consolePost,
delPublic as del, getPublic as get, patchPublic as patch, postPublic as post, ssePost,
@@ -30,7 +30,7 @@ export function getUrl(url: string, isInstalledApp: boolean, installedAppId: str
return isInstalledApp ? `installed-apps/${installedAppId}/${url.startsWith('/') ? url.slice(1) : url}` : url
}
export const sendChatMessage = async (body: Record<string, any>, { onData, onCompleted, onThought, onFile, onError, getAbortController, onMessageEnd, onMessageReplace }: {
export const sendChatMessage = async (body: Record<string, any>, { onData, onCompleted, onThought, onFile, onError, getAbortController, onMessageEnd, onMessageReplace, onTTSChunk, onTTSEnd }: {
onData: IOnData
onCompleted: IOnCompleted
onFile: IOnFile
@@ -39,13 +39,15 @@ export const sendChatMessage = async (body: Record<string, any>, { onData, onCom
onMessageEnd?: IOnMessageEnd
onMessageReplace?: IOnMessageReplace
getAbortController?: (abortController: AbortController) => void
onTTSChunk?: IOnTTSChunk
onTTSEnd?: IOnTTSEnd
}, isInstalledApp: boolean, installedAppId = '') => {
return ssePost(getUrl('chat-messages', isInstalledApp, installedAppId), {
body: {
...body,
response_mode: 'streaming',
},
}, { onData, onCompleted, onThought, onFile, isPublicAPI: !isInstalledApp, onError, getAbortController, onMessageEnd, onMessageReplace })
}, { onData, onCompleted, onThought, onFile, isPublicAPI: !isInstalledApp, onError, getAbortController, onMessageEnd, onMessageReplace, onTTSChunk, onTTSEnd })
}
export const stopChatMessageResponding = async (appId: string, taskId: string, isInstalledApp: boolean, installedAppId = '') => {
@@ -214,6 +216,10 @@ export const textToAudio = (url: string, isPublicAPI: boolean, body: FormData) =
return (getAction('post', !isPublicAPI))(url, { body }, { bodyStringify: false, deleteContentType: true }) as Promise<{ data: string }>
}
export const textToAudioStream = (url: string, isPublicAPI: boolean, header: { content_type: string }, body: { streaming: boolean; voice?: string; message_id?: string; text?: string | null | undefined }) => {
return (getAction('post', !isPublicAPI))(url, { body, header }, { needAllResponseContent: true })
}
export const fetchAccessToken = async (appCode: string) => {
const headers = new Headers()
headers.append('X-App-Code', appCode)

View File

@@ -160,6 +160,7 @@ export type ModelConfig = {
enabled: boolean
voice?: string
language?: string
autoPlay?: TtsAutoPlay
}
retriever_resource: {
enabled: boolean
@@ -349,6 +350,11 @@ export enum TransferMethod {
remote_url = 'remote_url',
}
export enum TtsAutoPlay {
enabled = 'enabled',
disabled = 'disabled',
}
export const ALLOW_FILE_EXTENSIONS = ['png', 'jpg', 'jpeg', 'webp', 'gif']
export type VisionSettings = {