Files
InteractiveAvatarNextJSDemo/components/StreamingAvatar.tsx
2024-06-28 17:05:26 -07:00

351 lines
10 KiB
TypeScript

import {
Configuration,
NewSessionData,
StreamingAvatarApi,
} from "@heygen/streaming-avatar";
import {
Button,
Card,
CardBody,
CardFooter,
Divider,
Input,
Spinner,
Tooltip,
} from "@nextui-org/react";
import { Microphone, MicrophoneStage } from "@phosphor-icons/react";
import { useChat } from "ai/react";
import clsx from "clsx";
import OpenAI from "openai";
import { useEffect, useRef, useState } from "react";
import StreamingAvatarTextInput from "./StreamingAvatarTextInput";
const openai = new OpenAI({
apiKey: process.env.NEXT_PUBLIC_OPENAI_API_KEY,
dangerouslyAllowBrowser: true,
});
export default function StreamingAvatar() {
const [isLoadingSession, setIsLoadingSession] = useState(false);
const [isLoadingRepeat, setIsLoadingRepeat] = useState(false);
const [isLoadingChat, setIsLoadingChat] = useState(false);
const [stream, setStream] = useState<MediaStream>();
const [debug, setDebug] = useState<string>();
const [avatarId, setAvatarId] = useState<string>("");
const [voiceId, setVoiceId] = useState<string>("");
const [data, setData] = useState<NewSessionData>();
const [text, setText] = useState<string>("");
const [initialized, setInitialized] = useState(false); // Track initialization
const [recording, setRecording] = useState(false); // Track recording state
const mediaStream = useRef<HTMLVideoElement>(null);
const avatar = useRef<StreamingAvatarApi | null>(null);
const mediaRecorder = useRef<MediaRecorder | null>(null);
const audioChunks = useRef<Blob[]>([]);
const { input, setInput, handleSubmit } = useChat({
onFinish: async (message) => {
console.log("ChatGPT Response:", message);
if (!initialized || !avatar.current) {
setDebug("Avatar API not initialized");
return;
}
//send the ChatGPT response to the Streaming Avatar
await avatar.current
.speak({
taskRequest: { text: message.content, sessionId: data?.sessionId },
})
.catch((e) => {
setDebug(e.message);
});
setIsLoadingChat(false);
},
initialMessages: [
{
id: "1",
role: "system",
content: "You are a helpful assistant.",
},
],
});
async function fetchAccessToken() {
try {
const response = await fetch("/api/get-access-token", {
method: "POST",
});
const token = await response.text();
console.log("Access Token:", token); // Log the token to verify
return token;
} catch (error) {
console.error("Error fetching access token:", error);
return "";
}
}
async function startSession() {
setIsLoadingSession(true);
await updateToken();
if (!avatar.current) {
setDebug("Avatar API is not initialized");
return;
}
try {
const res = await avatar.current.createStartAvatar(
{
newSessionRequest: {
quality: "low",
avatarName: avatarId,
voice: { voiceId: voiceId },
},
},
setDebug
);
setData(res);
setStream(avatar.current.mediaStream);
setIsLoadingSession(false);
} catch (error) {
console.error("Error starting avatar session:", error);
}
}
async function updateToken() {
const newToken = await fetchAccessToken();
console.log("Updating Access Token:", newToken); // Log token for debugging
avatar.current = new StreamingAvatarApi(
new Configuration({ accessToken: newToken })
);
const startTalkCallback = (e: any) => {
console.log("Avatar started talking", e);
};
const stopTalkCallback = (e: any) => {
console.log("Avatar stopped talking", e);
};
console.log("Adding event handlers:", avatar.current);
avatar.current.addEventHandler("avatar_start_talking", startTalkCallback);
avatar.current.addEventHandler("avatar_stop_talking", stopTalkCallback);
setInitialized(true);
}
async function endSession() {
if (!initialized || !avatar.current) {
setDebug("Avatar API not initialized");
return;
}
await avatar.current.stopAvatar(
{ stopSessionRequest: { sessionId: data?.sessionId } },
setDebug
);
setStream(undefined);
}
async function handleSpeak() {
setIsLoadingRepeat(true);
if (!initialized || !avatar.current) {
setDebug("Avatar API not initialized");
return;
}
await avatar.current
.speak({ taskRequest: { text: text, sessionId: data?.sessionId } })
.catch((e) => {
setDebug(e.message);
});
setIsLoadingRepeat(false);
}
useEffect(() => {
async function init() {
const newToken = await fetchAccessToken();
console.log("Initializing with Access Token:", newToken); // Log token for debugging
avatar.current = new StreamingAvatarApi(
new Configuration({ accessToken: newToken, jitterBuffer: 200 })
);
setInitialized(true); // Set initialized to true
}
init();
return () => {
endSession();
};
}, []);
useEffect(() => {
if (stream && mediaStream.current) {
mediaStream.current.srcObject = stream;
mediaStream.current.onloadedmetadata = () => {
mediaStream.current!.play();
setDebug("Playing");
};
}
}, [mediaStream, stream]);
function startRecording() {
navigator.mediaDevices
.getUserMedia({ audio: true })
.then((stream) => {
mediaRecorder.current = new MediaRecorder(stream);
mediaRecorder.current.ondataavailable = (event) => {
audioChunks.current.push(event.data);
};
mediaRecorder.current.onstop = () => {
const audioBlob = new Blob(audioChunks.current, {
type: "audio/wav",
});
audioChunks.current = [];
transcribeAudio(audioBlob);
};
mediaRecorder.current.start();
setRecording(true);
})
.catch((error) => {
console.error("Error accessing microphone:", error);
});
}
function stopRecording() {
if (mediaRecorder.current) {
mediaRecorder.current.stop();
setRecording(false);
}
}
async function transcribeAudio(audioBlob: Blob) {
try {
// Convert Blob to File
const audioFile = new File([audioBlob], "recording.wav", {
type: "audio/wav",
});
const response = await openai.audio.transcriptions.create({
model: "whisper-1",
file: audioFile,
});
const transcription = response.text;
console.log("Transcription: ", transcription);
setInput(transcription);
} catch (error) {
console.error("Error transcribing audio:", error);
}
}
return (
<div className="w-full flex flex-col gap-4">
<Card>
<CardBody className="h-[500px] flex flex-col justify-center items-center">
{stream ? (
<div className="h-[500px] w-[900px] justify-center items-center flex rounded-lg overflow-hidden">
<video
ref={mediaStream}
autoPlay
playsInline
style={{
width: "100%",
height: "100%",
objectFit: "contain",
}}
>
<track kind="captions" />
</video>
<Button
size="md"
onClick={endSession}
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 absolute bottom-3 right-3 text-white rounded-lg"
variant="shadow"
>
End session
</Button>
</div>
) : !isLoadingSession ? (
<div className="h-full justify-center items-center flex flex-col gap-4 w-96 self-center">
<Input
value={avatarId}
onChange={(e) => setAvatarId(e.target.value)}
placeholder="Custom Avatar ID (optional)"
/>
<Input
value={voiceId}
onChange={(e) => setVoiceId(e.target.value)}
placeholder="Custom Voice ID (optional)"
/>
<Button
size="md"
onClick={startSession}
className="bg-gradient-to-tr from-indigo-500 to-indigo-300 w-full text-white"
variant="shadow"
>
Start session
</Button>
</div>
) : (
<Spinner size="lg" color="default" />
)}
</CardBody>
<Divider />
<CardFooter className="flex flex-col gap-3">
<StreamingAvatarTextInput
label="Repeat"
placeholder="Type something for the avatar to repeat"
input={text}
onSubmit={handleSpeak}
setInput={setText}
disabled={!stream}
loading={isLoadingRepeat}
/>
<StreamingAvatarTextInput
label="Chat"
placeholder="Chat with the avatar (uses ChatGPT)"
input={input}
onSubmit={() => {
setIsLoadingChat(true);
if (!input) {
setDebug("Please enter text to send to ChatGPT");
return;
}
handleSubmit();
}}
setInput={setInput}
loading={isLoadingChat}
endContent={
<Tooltip
content={!recording ? "Start recording" : "Stop recording"}
>
<Button
onClick={!recording ? startRecording : stopRecording}
isDisabled={!stream}
isIconOnly
className={clsx(
"mr-4 text-white",
!recording
? "bg-gradient-to-tr from-indigo-500 to-indigo-300"
: ""
)}
size="sm"
variant="shadow"
>
{!recording ? (
<Microphone size={20} />
) : (
<>
<div className="absolute h-full w-full bg-gradient-to-tr from-indigo-500 to-indigo-300 animate-pulse -z-10"></div>
<MicrophoneStage size={20} />
</>
)}
</Button>
</Tooltip>
}
disabled={!stream}
/>
</CardFooter>
</Card>
<p className="font-mono text-right">
<span className="font-bold">Console:</span>
<br />
{debug}
</p>
</div>
);
}