Compare commits
11 Commits
feat/voice
...
fix/incorr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
654787b17b | ||
|
|
6fca8b4d42 | ||
|
|
431281d47c | ||
|
|
274a307e83 | ||
|
|
03ef24b031 | ||
|
|
21f6c6d468 | ||
|
|
d7a7e3174c | ||
|
|
e653fa74c4 | ||
|
|
5dd784d63e | ||
|
|
efb98f612b | ||
|
|
befb6228f5 |
@@ -20,3 +20,34 @@ export const AVATARS = [
|
|||||||
name: "Joshua Heygen CEO",
|
name: "Joshua Heygen CEO",
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
|
export const STT_LANGUAGE_LIST = [
|
||||||
|
{ label: 'Bulgarian', value: 'bg', key: 'bg' },
|
||||||
|
{ label: 'Chinese', value: 'zh', key: 'zh' },
|
||||||
|
{ label: 'Czech', value: 'cs', key: 'cs' },
|
||||||
|
{ label: 'Danish', value: 'da', key: 'da' },
|
||||||
|
{ label: 'Dutch', value: 'nl', key: 'nl' },
|
||||||
|
{ label: 'English', value: 'en', key: 'en' },
|
||||||
|
{ label: 'Finnish', value: 'fi', key: 'fi' },
|
||||||
|
{ label: 'French', value: 'fr', key: 'fr' },
|
||||||
|
{ label: 'German', value: 'de', key: 'de' },
|
||||||
|
{ label: 'Greek', value: 'el', key: 'el' },
|
||||||
|
{ label: 'Hindi', value: 'hi', key: 'hi' },
|
||||||
|
{ label: 'Hungarian', value: 'hu', key: 'hu' },
|
||||||
|
{ label: 'Indonesian', value: 'id', key: 'id' },
|
||||||
|
{ label: 'Italian', value: 'it', key: 'it' },
|
||||||
|
{ label: 'Japanese', value: 'ja', key: 'ja' },
|
||||||
|
{ label: 'Korean', value: 'ko', key: 'ko' },
|
||||||
|
{ label: 'Malay', value: 'ms', key: 'ms' },
|
||||||
|
{ label: 'Norwegian', value: 'no', key: 'no' },
|
||||||
|
{ label: 'Polish', value: 'pl', key: 'pl' },
|
||||||
|
{ label: 'Portuguese', value: 'pt', key: 'pt' },
|
||||||
|
{ label: 'Romanian', value: 'ro', key: 'ro' },
|
||||||
|
{ label: 'Russian', value: 'ru', key: 'ru' },
|
||||||
|
{ label: 'Slovak', value: 'sk', key: 'sk' },
|
||||||
|
{ label: 'Spanish', value: 'es', key: 'es' },
|
||||||
|
{ label: 'Swedish', value: 'sv', key: 'sv' },
|
||||||
|
{ label: 'Turkish', value: 'tr', key: 'tr' },
|
||||||
|
{ label: 'Ukrainian', value: 'uk', key: 'uk' },
|
||||||
|
{ label: 'Vietnamese', value: 'vi', key: 'vi' },
|
||||||
|
];
|
||||||
|
|||||||
@@ -1,5 +1,9 @@
|
|||||||
import type { StartAvatarResponse } from "@heygen/streaming-avatar";
|
import type { StartAvatarResponse } from "@heygen/streaming-avatar";
|
||||||
import StreamingAvatar, {AvatarQuality, StreamingEvents} from "@heygen/streaming-avatar";
|
|
||||||
|
import StreamingAvatar, {
|
||||||
|
AvatarQuality,
|
||||||
|
StreamingEvents, TaskMode, TaskType, VoiceEmotion,
|
||||||
|
} from "@heygen/streaming-avatar";
|
||||||
import {
|
import {
|
||||||
Button,
|
Button,
|
||||||
Card,
|
Card,
|
||||||
@@ -11,11 +15,15 @@ import {
|
|||||||
SelectItem,
|
SelectItem,
|
||||||
Spinner,
|
Spinner,
|
||||||
Chip,
|
Chip,
|
||||||
|
Tabs,
|
||||||
|
Tab,
|
||||||
} from "@nextui-org/react";
|
} from "@nextui-org/react";
|
||||||
import { useEffect, useRef, useState } from "react";
|
import { useEffect, useRef, useState } from "react";
|
||||||
import { usePrevious } from 'ahooks'
|
import { useMemoizedFn, usePrevious } from "ahooks";
|
||||||
|
|
||||||
import InteractiveAvatarTextInput from "./InteractiveAvatarTextInput";
|
import InteractiveAvatarTextInput from "./InteractiveAvatarTextInput";
|
||||||
import { AVATARS } from "@/app/lib/constants";
|
|
||||||
|
import {AVATARS, STT_LANGUAGE_LIST} from "@/app/lib/constants";
|
||||||
|
|
||||||
export default function InteractiveAvatar() {
|
export default function InteractiveAvatar() {
|
||||||
const [isLoadingSession, setIsLoadingSession] = useState(false);
|
const [isLoadingSession, setIsLoadingSession] = useState(false);
|
||||||
@@ -24,10 +32,14 @@ export default function InteractiveAvatar() {
|
|||||||
const [debug, setDebug] = useState<string>();
|
const [debug, setDebug] = useState<string>();
|
||||||
const [knowledgeId, setKnowledgeId] = useState<string>("");
|
const [knowledgeId, setKnowledgeId] = useState<string>("");
|
||||||
const [avatarId, setAvatarId] = useState<string>("");
|
const [avatarId, setAvatarId] = useState<string>("");
|
||||||
|
const [language, setLanguage] = useState<string>('en');
|
||||||
|
|
||||||
const [data, setData] = useState<StartAvatarResponse>();
|
const [data, setData] = useState<StartAvatarResponse>();
|
||||||
const [text, setText] = useState<string>("");
|
const [text, setText] = useState<string>("");
|
||||||
const mediaStream = useRef<HTMLVideoElement>(null);
|
const mediaStream = useRef<HTMLVideoElement>(null);
|
||||||
const avatar = useRef<StreamingAvatar | null>(null);
|
const avatar = useRef<StreamingAvatar | null>(null);
|
||||||
|
const [chatMode, setChatMode] = useState("text_mode");
|
||||||
|
const [isUserTalking, setIsUserTalking] = useState(false);
|
||||||
|
|
||||||
async function fetchAccessToken() {
|
async function fetchAccessToken() {
|
||||||
try {
|
try {
|
||||||
@@ -35,6 +47,7 @@ export default function InteractiveAvatar() {
|
|||||||
method: "POST",
|
method: "POST",
|
||||||
});
|
});
|
||||||
const token = await response.text();
|
const token = await response.text();
|
||||||
|
|
||||||
console.log("Access Token:", token); // Log the token to verify
|
console.log("Access Token:", token); // Log the token to verify
|
||||||
|
|
||||||
return token;
|
return token;
|
||||||
@@ -48,6 +61,7 @@ export default function InteractiveAvatar() {
|
|||||||
async function startSession() {
|
async function startSession() {
|
||||||
setIsLoadingSession(true);
|
setIsLoadingSession(true);
|
||||||
const newToken = await fetchAccessToken();
|
const newToken = await fetchAccessToken();
|
||||||
|
|
||||||
avatar.current = new StreamingAvatar({
|
avatar.current = new StreamingAvatar({
|
||||||
token: newToken,
|
token: newToken,
|
||||||
});
|
});
|
||||||
@@ -61,18 +75,37 @@ export default function InteractiveAvatar() {
|
|||||||
console.log("Stream disconnected");
|
console.log("Stream disconnected");
|
||||||
endSession();
|
endSession();
|
||||||
});
|
});
|
||||||
|
avatar.current?.on(StreamingEvents.STREAM_READY, (event) => {
|
||||||
|
console.log(">>>>> Stream ready:", event.detail);
|
||||||
|
setStream(event.detail);
|
||||||
|
});
|
||||||
|
avatar.current?.on(StreamingEvents.USER_START, (event) => {
|
||||||
|
console.log(">>>>> User started talking:", event);
|
||||||
|
setIsUserTalking(true);
|
||||||
|
});
|
||||||
|
avatar.current?.on(StreamingEvents.USER_STOP, (event) => {
|
||||||
|
console.log(">>>>> User stopped talking:", event);
|
||||||
|
setIsUserTalking(false);
|
||||||
|
});
|
||||||
try {
|
try {
|
||||||
const res = await avatar.current.createStartAvatar({
|
const res = await avatar.current.createStartAvatar({
|
||||||
quality: AvatarQuality.Low,
|
quality: AvatarQuality.Low,
|
||||||
avatarName: avatarId,
|
avatarName: avatarId,
|
||||||
knowledgeId: knowledgeId,
|
knowledgeId: knowledgeId, // Or use a custom `knowledgeBase`.
|
||||||
|
voice: {
|
||||||
|
rate: 1.5, // 0.5 ~ 1.5
|
||||||
|
emotion: VoiceEmotion.EXCITED,
|
||||||
|
},
|
||||||
|
language: language,
|
||||||
|
disableIdleTimeout: true,
|
||||||
});
|
});
|
||||||
|
|
||||||
setData(res);
|
setData(res);
|
||||||
avatar.current?.on(StreamingEvents.STREAM_READY, (event) => {
|
// default to voice mode
|
||||||
console.log('Stream ready:', event.detail);
|
await avatar.current?.startVoiceChat({
|
||||||
setStream(event.detail);
|
useSilencePrompt: false
|
||||||
});
|
});
|
||||||
|
setChatMode("voice_mode");
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error starting avatar session:", error);
|
console.error("Error starting avatar session:", error);
|
||||||
} finally {
|
} finally {
|
||||||
@@ -86,11 +119,10 @@ export default function InteractiveAvatar() {
|
|||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
await avatar.current
|
// speak({ text: text, task_type: TaskType.REPEAT })
|
||||||
.speak({ text: text, sessionId: data?.session_id! })
|
await avatar.current.speak({ text: text, taskType: TaskType.REPEAT, taskMode: TaskMode.SYNC }).catch((e) => {
|
||||||
.catch((e) => {
|
setDebug(e.message);
|
||||||
setDebug(e.message);
|
});
|
||||||
});
|
|
||||||
setIsLoadingRepeat(false);
|
setIsLoadingRepeat(false);
|
||||||
}
|
}
|
||||||
async function handleInterrupt() {
|
async function handleInterrupt() {
|
||||||
@@ -100,28 +132,34 @@ export default function InteractiveAvatar() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
await avatar.current
|
await avatar.current
|
||||||
.interrupt({ sessionId: data?.session_id! })
|
.interrupt()
|
||||||
.catch((e) => {
|
.catch((e) => {
|
||||||
setDebug(e.message);
|
setDebug(e.message);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
async function endSession() {
|
async function endSession() {
|
||||||
if (!avatar.current) {
|
await avatar.current?.stopAvatar();
|
||||||
setDebug("Avatar API not initialized");
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
await avatar.current.stopAvatar({
|
|
||||||
sessionId: data?.session_id!,
|
|
||||||
});
|
|
||||||
setStream(undefined);
|
setStream(undefined);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const handleChangeChatMode = useMemoizedFn(async (v) => {
|
||||||
|
if (v === chatMode) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (v === "text_mode") {
|
||||||
|
avatar.current?.closeVoiceChat();
|
||||||
|
} else {
|
||||||
|
await avatar.current?.startVoiceChat();
|
||||||
|
}
|
||||||
|
setChatMode(v);
|
||||||
|
});
|
||||||
|
|
||||||
const previousText = usePrevious(text);
|
const previousText = usePrevious(text);
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!previousText && text) {
|
if (!previousText && text) {
|
||||||
avatar.current?.startListening({ sessionId: data?.session_id! });
|
avatar.current?.startListening();
|
||||||
} else if (previousText && !text) {
|
} else if (previousText && !text) {
|
||||||
avatar?.current?.stopListening({ sessionId: data?.session_id! });
|
avatar?.current?.stopListening();
|
||||||
}
|
}
|
||||||
}, [text, previousText]);
|
}, [text, previousText]);
|
||||||
|
|
||||||
@@ -161,18 +199,18 @@ export default function InteractiveAvatar() {
|
|||||||
</video>
|
</video>
|
||||||
<div className="flex flex-col gap-2 absolute bottom-3 right-3">
|
<div className="flex flex-col gap-2 absolute bottom-3 right-3">
|
||||||
<Button
|
<Button
|
||||||
size="md"
|
|
||||||
onClick={handleInterrupt}
|
|
||||||
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white rounded-lg"
|
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white rounded-lg"
|
||||||
|
size="md"
|
||||||
variant="shadow"
|
variant="shadow"
|
||||||
|
onClick={handleInterrupt}
|
||||||
>
|
>
|
||||||
Interrupt task
|
Interrupt task
|
||||||
</Button>
|
</Button>
|
||||||
<Button
|
<Button
|
||||||
size="md"
|
|
||||||
onClick={endSession}
|
|
||||||
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white rounded-lg"
|
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white rounded-lg"
|
||||||
|
size="md"
|
||||||
variant="shadow"
|
variant="shadow"
|
||||||
|
onClick={endSession}
|
||||||
>
|
>
|
||||||
End session
|
End session
|
||||||
</Button>
|
</Button>
|
||||||
@@ -185,17 +223,17 @@ export default function InteractiveAvatar() {
|
|||||||
Custom Knowledge ID (optional)
|
Custom Knowledge ID (optional)
|
||||||
</p>
|
</p>
|
||||||
<Input
|
<Input
|
||||||
|
placeholder="Enter a custom knowledge ID"
|
||||||
value={knowledgeId}
|
value={knowledgeId}
|
||||||
onChange={(e) => setKnowledgeId(e.target.value)}
|
onChange={(e) => setKnowledgeId(e.target.value)}
|
||||||
placeholder="Enter a custom knowledge ID"
|
|
||||||
/>
|
/>
|
||||||
<p className="text-sm font-medium leading-none">
|
<p className="text-sm font-medium leading-none">
|
||||||
Custom Avatar ID (optional)
|
Custom Avatar ID (optional)
|
||||||
</p>
|
</p>
|
||||||
<Input
|
<Input
|
||||||
|
placeholder="Enter a custom avatar ID"
|
||||||
value={avatarId}
|
value={avatarId}
|
||||||
onChange={(e) => setAvatarId(e.target.value)}
|
onChange={(e) => setAvatarId(e.target.value)}
|
||||||
placeholder="Enter a custom avatar ID"
|
|
||||||
/>
|
/>
|
||||||
<Select
|
<Select
|
||||||
placeholder="Or select one from these example avatars"
|
placeholder="Or select one from these example avatars"
|
||||||
@@ -213,34 +251,74 @@ export default function InteractiveAvatar() {
|
|||||||
</SelectItem>
|
</SelectItem>
|
||||||
))}
|
))}
|
||||||
</Select>
|
</Select>
|
||||||
|
<Select
|
||||||
|
label="Select language"
|
||||||
|
placeholder="Select language"
|
||||||
|
className="max-w-xs"
|
||||||
|
selectedKeys={[language]}
|
||||||
|
onChange={(e) => {
|
||||||
|
setLanguage(e.target.value);
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{STT_LANGUAGE_LIST.map((lang) => (
|
||||||
|
<SelectItem key={lang.key}>
|
||||||
|
{lang.label}
|
||||||
|
</SelectItem>
|
||||||
|
))}
|
||||||
|
</Select>
|
||||||
</div>
|
</div>
|
||||||
<Button
|
<Button
|
||||||
size="md"
|
|
||||||
onClick={startSession}
|
|
||||||
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 w-full text-white"
|
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 w-full text-white"
|
||||||
|
size="md"
|
||||||
variant="shadow"
|
variant="shadow"
|
||||||
|
onClick={startSession}
|
||||||
>
|
>
|
||||||
Start session
|
Start session
|
||||||
</Button>
|
</Button>
|
||||||
</div>
|
</div>
|
||||||
) : (
|
) : (
|
||||||
<Spinner size="lg" color="default" />
|
<Spinner color="default" size="lg" />
|
||||||
)}
|
)}
|
||||||
</CardBody>
|
</CardBody>
|
||||||
<Divider />
|
<Divider />
|
||||||
<CardFooter className="flex flex-col gap-3 relative">
|
<CardFooter className="flex flex-col gap-3 relative">
|
||||||
<InteractiveAvatarTextInput
|
<Tabs
|
||||||
label="Chat"
|
aria-label="Options"
|
||||||
placeholder="Type something for the avatar to respond"
|
selectedKey={chatMode}
|
||||||
input={text}
|
onSelectionChange={(v) => {
|
||||||
onSubmit={handleSpeak}
|
handleChangeChatMode(v);
|
||||||
setInput={setText}
|
}}
|
||||||
disabled={!stream}
|
>
|
||||||
loading={isLoadingRepeat}
|
<Tab key="text_mode" title="Text mode" />
|
||||||
/>
|
<Tab key="voice_mode" title="Voice mode" />
|
||||||
{
|
</Tabs>
|
||||||
text && <Chip className='absolute right-16 top-6'>Listening</Chip>
|
{chatMode === "text_mode" ? (
|
||||||
}
|
<div className="w-full flex relative">
|
||||||
|
<InteractiveAvatarTextInput
|
||||||
|
disabled={!stream}
|
||||||
|
input={text}
|
||||||
|
label="Chat"
|
||||||
|
loading={isLoadingRepeat}
|
||||||
|
placeholder="Type something for the avatar to respond"
|
||||||
|
setInput={setText}
|
||||||
|
onSubmit={handleSpeak}
|
||||||
|
/>
|
||||||
|
{text && (
|
||||||
|
<Chip className="absolute right-16 top-3">Listening</Chip>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="w-full text-center">
|
||||||
|
<Button
|
||||||
|
isDisabled={!isUserTalking}
|
||||||
|
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white"
|
||||||
|
size="md"
|
||||||
|
variant="shadow"
|
||||||
|
>
|
||||||
|
{isUserTalking ? "Listening" : "Voice chat"}
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</CardFooter>
|
</CardFooter>
|
||||||
</Card>
|
</Card>
|
||||||
<p className="font-mono text-right">
|
<p className="font-mono text-right">
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ export default function NavBar() {
|
|||||||
<Link
|
<Link
|
||||||
isExternal
|
isExternal
|
||||||
color="foreground"
|
color="foreground"
|
||||||
href="https://app.heygen.com/interactive-avatar"
|
href="https://labs.heygen.com/interactive-avatar"
|
||||||
>
|
>
|
||||||
Avatars
|
Avatars
|
||||||
</Link>
|
</Link>
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@ai-sdk/openai": "^0.0.34",
|
"@ai-sdk/openai": "^0.0.34",
|
||||||
"@heygen/streaming-avatar": "^2.0.0-beta.1",
|
"@heygen/streaming-avatar": "^2.0.8",
|
||||||
"@nextui-org/button": "2.0.34",
|
"@nextui-org/button": "2.0.34",
|
||||||
"@nextui-org/chip": "^2.0.32",
|
"@nextui-org/chip": "^2.0.32",
|
||||||
"@nextui-org/code": "2.0.29",
|
"@nextui-org/code": "2.0.29",
|
||||||
|
|||||||
Reference in New Issue
Block a user