feat: voice chat demo

This commit is contained in:
raojianb
2024-09-20 21:37:53 -07:00
parent 2454a4729d
commit 73ff374fb1

View File

@@ -1,5 +1,9 @@
import type { StartAvatarResponse } from "@heygen/streaming-avatar"; import type { StartAvatarResponse } from "@heygen/streaming-avatar";
import StreamingAvatar, {AvatarQuality, StreamingEvents} from "@heygen/streaming-avatar";
import StreamingAvatar, {
AvatarQuality,
StreamingEvents,
} from "@heygen/streaming-avatar";
import { import {
Button, Button,
Card, Card,
@@ -11,10 +15,14 @@ import {
SelectItem, SelectItem,
Spinner, Spinner,
Chip, Chip,
Tabs,
Tab,
} from "@nextui-org/react"; } from "@nextui-org/react";
import { useEffect, useRef, useState } from "react"; import { useEffect, useRef, useState } from "react";
import { usePrevious } from 'ahooks' import { useMemoizedFn, usePrevious } from "ahooks";
import InteractiveAvatarTextInput from "./InteractiveAvatarTextInput"; import InteractiveAvatarTextInput from "./InteractiveAvatarTextInput";
import { AVATARS } from "@/app/lib/constants"; import { AVATARS } from "@/app/lib/constants";
export default function InteractiveAvatar() { export default function InteractiveAvatar() {
@@ -28,6 +36,8 @@ export default function InteractiveAvatar() {
const [text, setText] = useState<string>(""); const [text, setText] = useState<string>("");
const mediaStream = useRef<HTMLVideoElement>(null); const mediaStream = useRef<HTMLVideoElement>(null);
const avatar = useRef<StreamingAvatar | null>(null); const avatar = useRef<StreamingAvatar | null>(null);
const [chatMode, setChatMode] = useState("text_mode");
const [isUserTalking, setIsUserTalking] = useState(false);
async function fetchAccessToken() { async function fetchAccessToken() {
try { try {
@@ -35,6 +45,7 @@ export default function InteractiveAvatar() {
method: "POST", method: "POST",
}); });
const token = await response.text(); const token = await response.text();
console.log("Access Token:", token); // Log the token to verify console.log("Access Token:", token); // Log the token to verify
return token; return token;
@@ -48,6 +59,7 @@ export default function InteractiveAvatar() {
async function startSession() { async function startSession() {
setIsLoadingSession(true); setIsLoadingSession(true);
const newToken = await fetchAccessToken(); const newToken = await fetchAccessToken();
avatar.current = new StreamingAvatar({ avatar.current = new StreamingAvatar({
token: newToken, token: newToken,
}); });
@@ -61,6 +73,18 @@ export default function InteractiveAvatar() {
console.log("Stream disconnected"); console.log("Stream disconnected");
endSession(); endSession();
}); });
avatar.current?.on(StreamingEvents.STREAM_READY, (event) => {
console.log(">>>>> Stream ready:", event.detail);
setStream(event.detail);
});
avatar.current?.on(StreamingEvents.USER_START, (event) => {
console.log(">>>>> User started talking:", event);
setIsUserTalking(true);
});
avatar.current?.on(StreamingEvents.USER_STOP, (event) => {
console.log(">>>>> User stopped talking:", event);
setIsUserTalking(false);
});
try { try {
const res = await avatar.current.createStartAvatar({ const res = await avatar.current.createStartAvatar({
quality: AvatarQuality.Low, quality: AvatarQuality.Low,
@@ -69,10 +93,9 @@ export default function InteractiveAvatar() {
}); });
setData(res); setData(res);
avatar.current?.on(StreamingEvents.STREAM_READY, (event) => { // default to voice mode
console.log('Stream ready:', event.detail); await avatar.current?.startVoiceChat();
setStream(event.detail); setChatMode("voice_mode");
});
} catch (error) { } catch (error) {
console.error("Error starting avatar session:", error); console.error("Error starting avatar session:", error);
} finally { } finally {
@@ -116,6 +139,19 @@ export default function InteractiveAvatar() {
}); });
setStream(undefined); setStream(undefined);
} }
const handleChangeChatMode = useMemoizedFn(async (v) => {
if (v === chatMode) {
return;
}
if (v === "text_mode") {
avatar.current?.closeVoiceChat();
} else {
await avatar.current?.startVoiceChat();
}
setChatMode(v);
});
const previousText = usePrevious(text); const previousText = usePrevious(text);
useEffect(() => { useEffect(() => {
if (!previousText && text) { if (!previousText && text) {
@@ -161,18 +197,18 @@ export default function InteractiveAvatar() {
</video> </video>
<div className="flex flex-col gap-2 absolute bottom-3 right-3"> <div className="flex flex-col gap-2 absolute bottom-3 right-3">
<Button <Button
size="md"
onClick={handleInterrupt}
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white rounded-lg" className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white rounded-lg"
size="md"
variant="shadow" variant="shadow"
onClick={handleInterrupt}
> >
Interrupt task Interrupt task
</Button> </Button>
<Button <Button
size="md"
onClick={endSession}
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white rounded-lg" className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white rounded-lg"
size="md"
variant="shadow" variant="shadow"
onClick={endSession}
> >
End session End session
</Button> </Button>
@@ -185,17 +221,17 @@ export default function InteractiveAvatar() {
Custom Knowledge ID (optional) Custom Knowledge ID (optional)
</p> </p>
<Input <Input
placeholder="Enter a custom knowledge ID"
value={knowledgeId} value={knowledgeId}
onChange={(e) => setKnowledgeId(e.target.value)} onChange={(e) => setKnowledgeId(e.target.value)}
placeholder="Enter a custom knowledge ID"
/> />
<p className="text-sm font-medium leading-none"> <p className="text-sm font-medium leading-none">
Custom Avatar ID (optional) Custom Avatar ID (optional)
</p> </p>
<Input <Input
placeholder="Enter a custom avatar ID"
value={avatarId} value={avatarId}
onChange={(e) => setAvatarId(e.target.value)} onChange={(e) => setAvatarId(e.target.value)}
placeholder="Enter a custom avatar ID"
/> />
<Select <Select
placeholder="Or select one from these example avatars" placeholder="Or select one from these example avatars"
@@ -215,32 +251,57 @@ export default function InteractiveAvatar() {
</Select> </Select>
</div> </div>
<Button <Button
size="md"
onClick={startSession}
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 w-full text-white" className="bg-gradient-to-tr from-indigo-500 to-indigo-300 w-full text-white"
size="md"
variant="shadow" variant="shadow"
onClick={startSession}
> >
Start session Start session
</Button> </Button>
</div> </div>
) : ( ) : (
<Spinner size="lg" color="default" /> <Spinner color="default" size="lg" />
)} )}
</CardBody> </CardBody>
<Divider /> <Divider />
<CardFooter className="flex flex-col gap-3 relative"> <CardFooter className="flex flex-col gap-3 relative">
<InteractiveAvatarTextInput <Tabs
label="Chat" aria-label="Options"
placeholder="Type something for the avatar to respond" selectedKey={chatMode}
input={text} onSelectionChange={(v) => {
onSubmit={handleSpeak} handleChangeChatMode(v);
setInput={setText} }}
disabled={!stream} >
loading={isLoadingRepeat} <Tab key="text_mode" title="Text mode" />
/> <Tab key="voice_mode" title="Voice mode" />
{ </Tabs>
text && <Chip className='absolute right-16 top-6'>Listening</Chip> {chatMode === "text_mode" ? (
} <div className="w-full flex relative">
<InteractiveAvatarTextInput
disabled={!stream}
input={text}
label="Chat"
loading={isLoadingRepeat}
placeholder="Type something for the avatar to respond"
setInput={setText}
onSubmit={handleSpeak}
/>
{text && (
<Chip className="absolute right-16 top-3">Listening</Chip>
)}
</div>
) : (
<div className="w-full text-center">
<Button
isDisabled={!isUserTalking}
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white"
size="md"
variant="shadow"
>
{isUserTalking ? "Listening" : "Voice chat"}
</Button>
</div>
)}
</CardFooter> </CardFooter>
</Card> </Card>
<p className="font-mono text-right"> <p className="font-mono text-right">