Compare commits

...

4 Commits

Author SHA1 Message Date
raojianb
2855b9d9c5 feat: add language and voice rate 2024-09-23 15:54:42 -07:00
Joby
5dd784d63e feat: add task type (#18) 2024-09-23 13:38:15 -07:00
Joby
efb98f612b feat: simplify api (#14) 2024-09-22 01:54:14 -07:00
Joby
befb6228f5 feat: voice chat demo (#13) 2024-09-20 21:38:26 -07:00
2 changed files with 150 additions and 44 deletions

View File

@@ -20,3 +20,34 @@ export const AVATARS = [
name: "Joshua Heygen CEO",
},
];
export const STT_LANGUAGE_LIST = [
{ label: 'Bulgarian', value: 'bg', key: 'bg' },
{ label: 'Chinese', value: 'zh', key: 'zh' },
{ label: 'Czech', value: 'cs', key: 'cs' },
{ label: 'Danish', value: 'da', key: 'da' },
{ label: 'Dutch', value: 'nl', key: 'nl' },
{ label: 'English', value: 'en', key: 'en' },
{ label: 'Finnish', value: 'fi', key: 'fi' },
{ label: 'French', value: 'fr', key: 'fr' },
{ label: 'German', value: 'de', key: 'de' },
{ label: 'Greek', value: 'el', key: 'el' },
{ label: 'Hindi', value: 'hi', key: 'hi' },
{ label: 'Hungarian', value: 'hu', key: 'hu' },
{ label: 'Indonesian', value: 'id', key: 'id' },
{ label: 'Italian', value: 'it', key: 'it' },
{ label: 'Japanese', value: 'ja', key: 'ja' },
{ label: 'Korean', value: 'ko', key: 'ko' },
{ label: 'Malay', value: 'ms', key: 'ms' },
{ label: 'Norwegian', value: 'no', key: 'no' },
{ label: 'Polish', value: 'pl', key: 'pl' },
{ label: 'Portuguese', value: 'pt', key: 'pt' },
{ label: 'Romanian', value: 'ro', key: 'ro' },
{ label: 'Russian', value: 'ru', key: 'ru' },
{ label: 'Slovak', value: 'sk', key: 'sk' },
{ label: 'Spanish', value: 'es', key: 'es' },
{ label: 'Swedish', value: 'sv', key: 'sv' },
{ label: 'Turkish', value: 'tr', key: 'tr' },
{ label: 'Ukrainian', value: 'uk', key: 'uk' },
{ label: 'Vietnamese', value: 'vi', key: 'vi' },
];

View File

@@ -1,5 +1,9 @@
import type { StartAvatarResponse } from "@heygen/streaming-avatar";
import StreamingAvatar, {AvatarQuality, StreamingEvents} from "@heygen/streaming-avatar";
import StreamingAvatar, {
AvatarQuality,
StreamingEvents, TaskType, VoiceEmotion,
} from "@heygen/streaming-avatar";
import {
Button,
Card,
@@ -11,11 +15,15 @@ import {
SelectItem,
Spinner,
Chip,
Tabs,
Tab,
} from "@nextui-org/react";
import { useEffect, useRef, useState } from "react";
import { usePrevious } from 'ahooks'
import { useMemoizedFn, usePrevious } from "ahooks";
import InteractiveAvatarTextInput from "./InteractiveAvatarTextInput";
import { AVATARS } from "@/app/lib/constants";
import {AVATARS, STT_LANGUAGE_LIST} from "@/app/lib/constants";
export default function InteractiveAvatar() {
const [isLoadingSession, setIsLoadingSession] = useState(false);
@@ -24,10 +32,14 @@ export default function InteractiveAvatar() {
const [debug, setDebug] = useState<string>();
const [knowledgeId, setKnowledgeId] = useState<string>("");
const [avatarId, setAvatarId] = useState<string>("");
const [language, setLanguage] = useState<string>('en');
const [data, setData] = useState<StartAvatarResponse>();
const [text, setText] = useState<string>("");
const mediaStream = useRef<HTMLVideoElement>(null);
const avatar = useRef<StreamingAvatar | null>(null);
const [chatMode, setChatMode] = useState("text_mode");
const [isUserTalking, setIsUserTalking] = useState(false);
async function fetchAccessToken() {
try {
@@ -35,6 +47,7 @@ export default function InteractiveAvatar() {
method: "POST",
});
const token = await response.text();
console.log("Access Token:", token); // Log the token to verify
return token;
@@ -48,6 +61,7 @@ export default function InteractiveAvatar() {
async function startSession() {
setIsLoadingSession(true);
const newToken = await fetchAccessToken();
avatar.current = new StreamingAvatar({
token: newToken,
});
@@ -61,18 +75,34 @@ export default function InteractiveAvatar() {
console.log("Stream disconnected");
endSession();
});
avatar.current?.on(StreamingEvents.STREAM_READY, (event) => {
console.log(">>>>> Stream ready:", event.detail);
setStream(event.detail);
});
avatar.current?.on(StreamingEvents.USER_START, (event) => {
console.log(">>>>> User started talking:", event);
setIsUserTalking(true);
});
avatar.current?.on(StreamingEvents.USER_STOP, (event) => {
console.log(">>>>> User stopped talking:", event);
setIsUserTalking(false);
});
try {
const res = await avatar.current.createStartAvatar({
quality: AvatarQuality.Low,
avatarName: avatarId,
knowledgeId: knowledgeId,
voice: {
rate: 1.5, // 0.5 ~ 1.5
emotion: VoiceEmotion.EXCITED,
},
language: language,
});
setData(res);
avatar.current?.on(StreamingEvents.STREAM_READY, (event) => {
console.log('Stream ready:', event.detail);
setStream(event.detail);
});
// default to voice mode
await avatar.current?.startVoiceChat();
setChatMode("voice_mode");
} catch (error) {
console.error("Error starting avatar session:", error);
} finally {
@@ -86,11 +116,10 @@ export default function InteractiveAvatar() {
return;
}
await avatar.current
.speak({ text: text, sessionId: data?.session_id! })
.catch((e) => {
setDebug(e.message);
});
// speak({ text: text, task_type: TaskType.REPEAT })
await avatar.current.speak({ text: text }).catch((e) => {
setDebug(e.message);
});
setIsLoadingRepeat(false);
}
async function handleInterrupt() {
@@ -100,28 +129,34 @@ export default function InteractiveAvatar() {
return;
}
await avatar.current
.interrupt({ sessionId: data?.session_id! })
.interrupt()
.catch((e) => {
setDebug(e.message);
});
}
async function endSession() {
if (!avatar.current) {
setDebug("Avatar API not initialized");
return;
}
await avatar.current.stopAvatar({
sessionId: data?.session_id!,
});
await avatar.current?.stopAvatar();
setStream(undefined);
}
const handleChangeChatMode = useMemoizedFn(async (v) => {
if (v === chatMode) {
return;
}
if (v === "text_mode") {
avatar.current?.closeVoiceChat();
} else {
await avatar.current?.startVoiceChat();
}
setChatMode(v);
});
const previousText = usePrevious(text);
useEffect(() => {
if (!previousText && text) {
avatar.current?.startListening({ sessionId: data?.session_id! });
avatar.current?.startListening();
} else if (previousText && !text) {
avatar?.current?.stopListening({ sessionId: data?.session_id! });
avatar?.current?.stopListening();
}
}, [text, previousText]);
@@ -161,18 +196,18 @@ export default function InteractiveAvatar() {
</video>
<div className="flex flex-col gap-2 absolute bottom-3 right-3">
<Button
size="md"
onClick={handleInterrupt}
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white rounded-lg"
size="md"
variant="shadow"
onClick={handleInterrupt}
>
Interrupt task
</Button>
<Button
size="md"
onClick={endSession}
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white rounded-lg"
size="md"
variant="shadow"
onClick={endSession}
>
End session
</Button>
@@ -185,17 +220,17 @@ export default function InteractiveAvatar() {
Custom Knowledge ID (optional)
</p>
<Input
placeholder="Enter a custom knowledge ID"
value={knowledgeId}
onChange={(e) => setKnowledgeId(e.target.value)}
placeholder="Enter a custom knowledge ID"
/>
<p className="text-sm font-medium leading-none">
Custom Avatar ID (optional)
</p>
<Input
placeholder="Enter a custom avatar ID"
value={avatarId}
onChange={(e) => setAvatarId(e.target.value)}
placeholder="Enter a custom avatar ID"
/>
<Select
placeholder="Or select one from these example avatars"
@@ -213,34 +248,74 @@ export default function InteractiveAvatar() {
</SelectItem>
))}
</Select>
<Select
label="Select language"
placeholder="Select language"
className="max-w-xs"
selectedKeys={[language]}
onChange={(e) => {
setLanguage(e.target.value);
}}
>
{STT_LANGUAGE_LIST.map((lang) => (
<SelectItem key={lang.key}>
{lang.label}
</SelectItem>
))}
</Select>
</div>
<Button
size="md"
onClick={startSession}
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 w-full text-white"
size="md"
variant="shadow"
onClick={startSession}
>
Start session
</Button>
</div>
) : (
<Spinner size="lg" color="default" />
<Spinner color="default" size="lg" />
)}
</CardBody>
<Divider />
<CardFooter className="flex flex-col gap-3 relative">
<InteractiveAvatarTextInput
label="Chat"
placeholder="Type something for the avatar to respond"
input={text}
onSubmit={handleSpeak}
setInput={setText}
disabled={!stream}
loading={isLoadingRepeat}
/>
{
text && <Chip className='absolute right-16 top-6'>Listening</Chip>
}
<Tabs
aria-label="Options"
selectedKey={chatMode}
onSelectionChange={(v) => {
handleChangeChatMode(v);
}}
>
<Tab key="text_mode" title="Text mode" />
<Tab key="voice_mode" title="Voice mode" />
</Tabs>
{chatMode === "text_mode" ? (
<div className="w-full flex relative">
<InteractiveAvatarTextInput
disabled={!stream}
input={text}
label="Chat"
loading={isLoadingRepeat}
placeholder="Type something for the avatar to respond"
setInput={setText}
onSubmit={handleSpeak}
/>
{text && (
<Chip className="absolute right-16 top-3">Listening</Chip>
)}
</div>
) : (
<div className="w-full text-center">
<Button
isDisabled={!isUserTalking}
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white"
size="md"
variant="shadow"
>
{isUserTalking ? "Listening" : "Voice chat"}
</Button>
</div>
)}
</CardFooter>
</Card>
<p className="font-mono text-right">