Compare commits

..

12 Commits

Author SHA1 Message Date
raojianb
21486d27c9 feat: upgrade the sdk to v2.0.8 2024-11-17 22:33:42 -08:00
Joby
431281d47c feat: task mode (#30) 2024-10-22 16:58:38 -07:00
Joby
274a307e83 chore: update sdk version (#26) 2024-09-30 18:24:59 -07:00
Joby
03ef24b031 chore: update sdk version (#24) 2024-09-27 15:39:13 -07:00
Joby
21f6c6d468 feat: support knwoledge base (#23)
* feat: support knwoledge base

* feat: support knwoledge base
2024-09-25 19:53:26 -07:00
Joby
d7a7e3174c feat: update dependencies (#20) 2024-09-23 16:14:07 -07:00
Joby
e653fa74c4 feat: add language and voice rate (#19) 2024-09-23 15:55:33 -07:00
Joby
5dd784d63e feat: add task type (#18) 2024-09-23 13:38:15 -07:00
Joby
efb98f612b feat: simplify api (#14) 2024-09-22 01:54:14 -07:00
Joby
befb6228f5 feat: voice chat demo (#13) 2024-09-20 21:38:26 -07:00
James Zow
2454a4729d Update README.md (#8) 2024-09-06 21:00:48 -07:00
Joby
935b10279b Feat/livekit (#9)
* feat: using version 2.0 skd

* feat: using version 2.0 skd

* feat: using version 2.0 skd
2024-09-06 20:59:55 -07:00
4 changed files with 155 additions and 46 deletions

View File

@@ -15,7 +15,7 @@ Feel free to play around with the existing code and please leave any feedback fo
3. Run `npm install` (assuming you have npm installed. If not, please follow these instructions: https://docs.npmjs.com/downloading-and-installing-node-js-and-npm/)
4. Enter your HeyGen Enterprise API Token or Trial Token in the `.env` file. Replace `PLACEHOLDER-API-KEY` with your API key. This will allow the Client app to generate secure Access Tokens with which to create interactive sessions.
4. Enter your HeyGen Enterprise API Token or Trial Token in the `.env` file. Replace `HEYGEN_API_KEY` with your API key. This will allow the Client app to generate secure Access Tokens with which to create interactive sessions.
You can retrieve either the API Key or Trial Token by logging in to HeyGen and navigating to this page in your settings: [https://app.heygen.com/settings?nav=API]. NOTE: use the trial token if you don't have an enterprise API token yet.

View File

@@ -20,3 +20,34 @@ export const AVATARS = [
name: "Joshua Heygen CEO",
},
];
export const STT_LANGUAGE_LIST = [
{ label: 'Bulgarian', value: 'bg', key: 'bg' },
{ label: 'Chinese', value: 'zh', key: 'zh' },
{ label: 'Czech', value: 'cs', key: 'cs' },
{ label: 'Danish', value: 'da', key: 'da' },
{ label: 'Dutch', value: 'nl', key: 'nl' },
{ label: 'English', value: 'en', key: 'en' },
{ label: 'Finnish', value: 'fi', key: 'fi' },
{ label: 'French', value: 'fr', key: 'fr' },
{ label: 'German', value: 'de', key: 'de' },
{ label: 'Greek', value: 'el', key: 'el' },
{ label: 'Hindi', value: 'hi', key: 'hi' },
{ label: 'Hungarian', value: 'hu', key: 'hu' },
{ label: 'Indonesian', value: 'id', key: 'id' },
{ label: 'Italian', value: 'it', key: 'it' },
{ label: 'Japanese', value: 'ja', key: 'ja' },
{ label: 'Korean', value: 'ko', key: 'ko' },
{ label: 'Malay', value: 'ms', key: 'ms' },
{ label: 'Norwegian', value: 'no', key: 'no' },
{ label: 'Polish', value: 'pl', key: 'pl' },
{ label: 'Portuguese', value: 'pt', key: 'pt' },
{ label: 'Romanian', value: 'ro', key: 'ro' },
{ label: 'Russian', value: 'ru', key: 'ru' },
{ label: 'Slovak', value: 'sk', key: 'sk' },
{ label: 'Spanish', value: 'es', key: 'es' },
{ label: 'Swedish', value: 'sv', key: 'sv' },
{ label: 'Turkish', value: 'tr', key: 'tr' },
{ label: 'Ukrainian', value: 'uk', key: 'uk' },
{ label: 'Vietnamese', value: 'vi', key: 'vi' },
];

View File

@@ -1,5 +1,9 @@
import type { StartAvatarResponse } from "@heygen/streaming-avatar";
import StreamingAvatar, {AvatarQuality, StreamingEvents} from "@heygen/streaming-avatar";
import StreamingAvatar, {
AvatarQuality,
StreamingEvents, TaskMode, TaskType, VoiceEmotion,
} from "@heygen/streaming-avatar";
import {
Button,
Card,
@@ -11,11 +15,15 @@ import {
SelectItem,
Spinner,
Chip,
Tabs,
Tab,
} from "@nextui-org/react";
import { useEffect, useRef, useState } from "react";
import { usePrevious } from 'ahooks'
import { useMemoizedFn, usePrevious } from "ahooks";
import InteractiveAvatarTextInput from "./InteractiveAvatarTextInput";
import { AVATARS } from "@/app/lib/constants";
import {AVATARS, STT_LANGUAGE_LIST} from "@/app/lib/constants";
export default function InteractiveAvatar() {
const [isLoadingSession, setIsLoadingSession] = useState(false);
@@ -24,10 +32,14 @@ export default function InteractiveAvatar() {
const [debug, setDebug] = useState<string>();
const [knowledgeId, setKnowledgeId] = useState<string>("");
const [avatarId, setAvatarId] = useState<string>("");
const [language, setLanguage] = useState<string>('en');
const [data, setData] = useState<StartAvatarResponse>();
const [text, setText] = useState<string>("");
const mediaStream = useRef<HTMLVideoElement>(null);
const avatar = useRef<StreamingAvatar | null>(null);
const [chatMode, setChatMode] = useState("text_mode");
const [isUserTalking, setIsUserTalking] = useState(false);
async function fetchAccessToken() {
try {
@@ -35,6 +47,7 @@ export default function InteractiveAvatar() {
method: "POST",
});
const token = await response.text();
console.log("Access Token:", token); // Log the token to verify
return token;
@@ -48,6 +61,7 @@ export default function InteractiveAvatar() {
async function startSession() {
setIsLoadingSession(true);
const newToken = await fetchAccessToken();
avatar.current = new StreamingAvatar({
token: newToken,
});
@@ -61,18 +75,37 @@ export default function InteractiveAvatar() {
console.log("Stream disconnected");
endSession();
});
avatar.current?.on(StreamingEvents.STREAM_READY, (event) => {
console.log(">>>>> Stream ready:", event.detail);
setStream(event.detail);
});
avatar.current?.on(StreamingEvents.USER_START, (event) => {
console.log(">>>>> User started talking:", event);
setIsUserTalking(true);
});
avatar.current?.on(StreamingEvents.USER_STOP, (event) => {
console.log(">>>>> User stopped talking:", event);
setIsUserTalking(false);
});
try {
const res = await avatar.current.createStartAvatar({
quality: AvatarQuality.Low,
avatarName: avatarId,
knowledgeId: knowledgeId,
knowledgeId: knowledgeId, // Or use a custom `knowledgeBase`.
voice: {
rate: 1.5, // 0.5 ~ 1.5
emotion: VoiceEmotion.EXCITED,
},
language: language,
disableIdleTimeout: true,
});
setData(res);
avatar.current?.on(StreamingEvents.STREAM_READY, (event) => {
console.log('Stream ready:', event.detail);
setStream(event.detail);
// default to voice mode
await avatar.current?.startVoiceChat({
useSilencePrompt: false
});
setChatMode("voice_mode");
} catch (error) {
console.error("Error starting avatar session:", error);
} finally {
@@ -86,11 +119,10 @@ export default function InteractiveAvatar() {
return;
}
await avatar.current
.speak({ text: text, sessionId: data?.session_id! })
.catch((e) => {
setDebug(e.message);
});
// speak({ text: text, task_type: TaskType.REPEAT })
await avatar.current.speak({ text: text, taskType: TaskType.REPEAT, taskMode: TaskMode.SYNC }).catch((e) => {
setDebug(e.message);
});
setIsLoadingRepeat(false);
}
async function handleInterrupt() {
@@ -100,28 +132,34 @@ export default function InteractiveAvatar() {
return;
}
await avatar.current
.interrupt({ sessionId: data?.session_id! })
.interrupt()
.catch((e) => {
setDebug(e.message);
});
}
async function endSession() {
if (!avatar.current) {
setDebug("Avatar API not initialized");
return;
}
await avatar.current.stopAvatar({
sessionId: data?.session_id!,
});
await avatar.current?.stopAvatar();
setStream(undefined);
}
const handleChangeChatMode = useMemoizedFn(async (v) => {
if (v === chatMode) {
return;
}
if (v === "text_mode") {
avatar.current?.closeVoiceChat();
} else {
await avatar.current?.startVoiceChat();
}
setChatMode(v);
});
const previousText = usePrevious(text);
useEffect(() => {
if (!previousText && text) {
avatar.current?.startListening({ sessionId: data?.session_id! });
avatar.current?.startListening();
} else if (previousText && !text) {
avatar?.current?.stopListening({ sessionId: data?.session_id! });
avatar?.current?.stopListening();
}
}, [text, previousText]);
@@ -161,18 +199,18 @@ export default function InteractiveAvatar() {
</video>
<div className="flex flex-col gap-2 absolute bottom-3 right-3">
<Button
size="md"
onClick={handleInterrupt}
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white rounded-lg"
size="md"
variant="shadow"
onClick={handleInterrupt}
>
Interrupt task
</Button>
<Button
size="md"
onClick={endSession}
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white rounded-lg"
size="md"
variant="shadow"
onClick={endSession}
>
End session
</Button>
@@ -185,17 +223,17 @@ export default function InteractiveAvatar() {
Custom Knowledge ID (optional)
</p>
<Input
placeholder="Enter a custom knowledge ID"
value={knowledgeId}
onChange={(e) => setKnowledgeId(e.target.value)}
placeholder="Enter a custom knowledge ID"
/>
<p className="text-sm font-medium leading-none">
Custom Avatar ID (optional)
</p>
<Input
placeholder="Enter a custom avatar ID"
value={avatarId}
onChange={(e) => setAvatarId(e.target.value)}
placeholder="Enter a custom avatar ID"
/>
<Select
placeholder="Or select one from these example avatars"
@@ -213,34 +251,74 @@ export default function InteractiveAvatar() {
</SelectItem>
))}
</Select>
<Select
label="Select language"
placeholder="Select language"
className="max-w-xs"
selectedKeys={[language]}
onChange={(e) => {
setLanguage(e.target.value);
}}
>
{STT_LANGUAGE_LIST.map((lang) => (
<SelectItem key={lang.key}>
{lang.label}
</SelectItem>
))}
</Select>
</div>
<Button
size="md"
onClick={startSession}
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 w-full text-white"
size="md"
variant="shadow"
onClick={startSession}
>
Start session
</Button>
</div>
) : (
<Spinner size="lg" color="default" />
<Spinner color="default" size="lg" />
)}
</CardBody>
<Divider />
<CardFooter className="flex flex-col gap-3 relative">
<InteractiveAvatarTextInput
label="Chat"
placeholder="Type something for the avatar to respond"
input={text}
onSubmit={handleSpeak}
setInput={setText}
disabled={!stream}
loading={isLoadingRepeat}
/>
{
text && <Chip className='absolute right-16 top-6'>Listening</Chip>
}
<Tabs
aria-label="Options"
selectedKey={chatMode}
onSelectionChange={(v) => {
handleChangeChatMode(v);
}}
>
<Tab key="text_mode" title="Text mode" />
<Tab key="voice_mode" title="Voice mode" />
</Tabs>
{chatMode === "text_mode" ? (
<div className="w-full flex relative">
<InteractiveAvatarTextInput
disabled={!stream}
input={text}
label="Chat"
loading={isLoadingRepeat}
placeholder="Type something for the avatar to respond"
setInput={setText}
onSubmit={handleSpeak}
/>
{text && (
<Chip className="absolute right-16 top-3">Listening</Chip>
)}
</div>
) : (
<div className="w-full text-center">
<Button
isDisabled={!isUserTalking}
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white"
size="md"
variant="shadow"
>
{isUserTalking ? "Listening" : "Voice chat"}
</Button>
</div>
)}
</CardFooter>
</Card>
<p className="font-mono text-right">

View File

@@ -10,7 +10,7 @@
},
"dependencies": {
"@ai-sdk/openai": "^0.0.34",
"@heygen/streaming-avatar": "^2.0.0-beta.1",
"@heygen/streaming-avatar": "^2.0.8",
"@nextui-org/button": "2.0.34",
"@nextui-org/chip": "^2.0.32",
"@nextui-org/code": "2.0.29",