interactive avatar name change

2024-07-22 11:45:31 -07:00
parent 5d0cf3821c
commit ab85e604ef
7 changed files with 42 additions and 42 deletions
--- a/components/InteractiveAvatar.tsx
+++ b/components/InteractiveAvatar.tsx
@@ -0,0 +1,417 @@
+import { AVATARS, VOICES } from "@/app/lib/constants";
+import {
+  Configuration,
+  NewSessionData,
+  StreamingAvatarApi,
+} from "@heygen/streaming-avatar";
+import {
+  Button,
+  Card,
+  CardBody,
+  CardFooter,
+  Divider,
+  Input,
+  Select,
+  SelectItem,
+  Spinner,
+  Tooltip,
+} from "@nextui-org/react";
+import { Microphone, MicrophoneStage } from "@phosphor-icons/react";
+import { useChat } from "ai/react";
+import clsx from "clsx";
+import OpenAI from "openai";
+import { useEffect, useRef, useState } from "react";
+import InteractiveAvatarTextInput from "./InteractiveAvatarTextInput";
+
+const openai = new OpenAI({
+  apiKey: process.env.NEXT_PUBLIC_OPENAI_API_KEY,
+  dangerouslyAllowBrowser: true,
+});
+
+export default function InteractiveAvatar() {
+  const [isLoadingSession, setIsLoadingSession] = useState(false);
+  const [isLoadingRepeat, setIsLoadingRepeat] = useState(false);
+  const [isLoadingChat, setIsLoadingChat] = useState(false);
+  const [stream, setStream] = useState<MediaStream>();
+  const [debug, setDebug] = useState<string>();
+  const [avatarId, setAvatarId] = useState<string>("");
+  const [voiceId, setVoiceId] = useState<string>("");
+  const [data, setData] = useState<NewSessionData>();
+  const [text, setText] = useState<string>("");
+  const [initialized, setInitialized] = useState(false); // Track initialization
+  const [recording, setRecording] = useState(false); // Track recording state
+  const mediaStream = useRef<HTMLVideoElement>(null);
+  const avatar = useRef<StreamingAvatarApi | null>(null);
+  const mediaRecorder = useRef<MediaRecorder | null>(null);
+  const audioChunks = useRef<Blob[]>([]);
+  const { input, setInput, handleSubmit } = useChat({
+    onFinish: async (message) => {
+      console.log("ChatGPT Response:", message);
+
+      if (!initialized || !avatar.current) {
+        setDebug("Avatar API not initialized");
+        return;
+      }
+
+      //send the ChatGPT response to the Interactive Avatar
+      await avatar.current
+        .speak({
+          taskRequest: { text: message.content, sessionId: data?.sessionId },
+        })
+        .catch((e) => {
+          setDebug(e.message);
+        });
+      setIsLoadingChat(false);
+    },
+    initialMessages: [
+      {
+        id: "1",
+        role: "system",
+        content: "You are a helpful assistant.",
+      },
+    ],
+  });
+
+  async function fetchAccessToken() {
+    try {
+      const response = await fetch("/api/get-access-token", {
+        method: "POST",
+      });
+      const token = await response.text();
+      console.log("Access Token:", token); // Log the token to verify
+      return token;
+    } catch (error) {
+      console.error("Error fetching access token:", error);
+      return "";
+    }
+  }
+
+  async function startSession() {
+    setIsLoadingSession(true);
+    await updateToken();
+    if (!avatar.current) {
+      setDebug("Avatar API is not initialized");
+      return;
+    }
+    try {
+      const res = await avatar.current.createStartAvatar(
+        {
+          newSessionRequest: {
+            quality: "low",
+            avatarName: avatarId,
+            voice: { voiceId: voiceId },
+          },
+        },
+        setDebug
+      );
+      setData(res);
+      setStream(avatar.current.mediaStream);
+    } catch (error) {
+      console.error("Error starting avatar session:", error);
+      setDebug(
+        `There was an error starting the session. ${voiceId ? "This custom voice ID may not be supported." : ""}`
+      );
+    }
+    setIsLoadingSession(false);
+  }
+
+  async function updateToken() {
+    const newToken = await fetchAccessToken();
+    console.log("Updating Access Token:", newToken); // Log token for debugging
+    avatar.current = new StreamingAvatarApi(
+      new Configuration({ accessToken: newToken })
+    );
+
+    const startTalkCallback = (e: any) => {
+      console.log("Avatar started talking", e);
+    };
+
+    const stopTalkCallback = (e: any) => {
+      console.log("Avatar stopped talking", e);
+    };
+
+    console.log("Adding event handlers:", avatar.current);
+    avatar.current.addEventHandler("avatar_start_talking", startTalkCallback);
+    avatar.current.addEventHandler("avatar_stop_talking", stopTalkCallback);
+
+    setInitialized(true);
+  }
+
+  async function handleInterrupt() {
+    if (!initialized || !avatar.current) {
+      setDebug("Avatar API not initialized");
+      return;
+    }
+    await avatar.current
+      .interrupt({ interruptRequest: { sessionId: data?.sessionId } })
+      .catch((e) => {
+        setDebug(e.message);
+      });
+  }
+
+  async function endSession() {
+    if (!initialized || !avatar.current) {
+      setDebug("Avatar API not initialized");
+      return;
+    }
+    await avatar.current.stopAvatar(
+      { stopSessionRequest: { sessionId: data?.sessionId } },
+      setDebug
+    );
+    setStream(undefined);
+  }
+
+  async function handleSpeak() {
+    setIsLoadingRepeat(true);
+    if (!initialized || !avatar.current) {
+      setDebug("Avatar API not initialized");
+      return;
+    }
+    await avatar.current
+      .speak({ taskRequest: { text: text, sessionId: data?.sessionId } })
+      .catch((e) => {
+        setDebug(e.message);
+      });
+    setIsLoadingRepeat(false);
+  }
+
+  useEffect(() => {
+    async function init() {
+      const newToken = await fetchAccessToken();
+      console.log("Initializing with Access Token:", newToken); // Log token for debugging
+      avatar.current = new StreamingAvatarApi(
+        new Configuration({ accessToken: newToken, jitterBuffer: 200 })
+      );
+      setInitialized(true); // Set initialized to true
+    }
+    init();
+
+    return () => {
+      endSession();
+    };
+  }, []);
+
+  useEffect(() => {
+    if (stream && mediaStream.current) {
+      mediaStream.current.srcObject = stream;
+      mediaStream.current.onloadedmetadata = () => {
+        mediaStream.current!.play();
+        setDebug("Playing");
+      };
+    }
+  }, [mediaStream, stream]);
+
+  function startRecording() {
+    navigator.mediaDevices
+      .getUserMedia({ audio: true })
+      .then((stream) => {
+        mediaRecorder.current = new MediaRecorder(stream);
+        mediaRecorder.current.ondataavailable = (event) => {
+          audioChunks.current.push(event.data);
+        };
+        mediaRecorder.current.onstop = () => {
+          const audioBlob = new Blob(audioChunks.current, {
+            type: "audio/wav",
+          });
+          audioChunks.current = [];
+          transcribeAudio(audioBlob);
+        };
+        mediaRecorder.current.start();
+        setRecording(true);
+      })
+      .catch((error) => {
+        console.error("Error accessing microphone:", error);
+      });
+  }
+
+  function stopRecording() {
+    if (mediaRecorder.current) {
+      mediaRecorder.current.stop();
+      setRecording(false);
+    }
+  }
+
+  async function transcribeAudio(audioBlob: Blob) {
+    try {
+      // Convert Blob to File
+      const audioFile = new File([audioBlob], "recording.wav", {
+        type: "audio/wav",
+      });
+      const response = await openai.audio.transcriptions.create({
+        model: "whisper-1",
+        file: audioFile,
+      });
+      const transcription = response.text;
+      console.log("Transcription: ", transcription);
+      setInput(transcription);
+    } catch (error) {
+      console.error("Error transcribing audio:", error);
+    }
+  }
+
+  return (
+    <div className="w-full flex flex-col gap-4">
+      <Card>
+        <CardBody className="h-[500px] flex flex-col justify-center items-center">
+          {stream ? (
+            <div className="h-[500px] w-[900px] justify-center items-center flex rounded-lg overflow-hidden">
+              <video
+                ref={mediaStream}
+                autoPlay
+                playsInline
+                style={{
+                  width: "100%",
+                  height: "100%",
+                  objectFit: "contain",
+                }}
+              >
+                <track kind="captions" />
+              </video>
+              <div className="flex flex-col gap-2 absolute bottom-3 right-3">
+                <Button
+                  size="md"
+                  onClick={handleInterrupt}
+                  className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white rounded-lg"
+                  variant="shadow"
+                >
+                  Interrupt task
+                </Button>
+                <Button
+                  size="md"
+                  onClick={endSession}
+                  className="bg-gradient-to-tr from-indigo-500 to-indigo-300  text-white rounded-lg"
+                  variant="shadow"
+                >
+                  End session
+                </Button>
+              </div>
+            </div>
+          ) : !isLoadingSession ? (
+            <div className="h-full justify-center items-center flex flex-col gap-8 w-[500px] self-center">
+              <div className="flex flex-col gap-2 w-full">
+                <p className="text-sm font-medium leading-none">
+                  Custom Avatar ID (optional)
+                </p>
+                <Input
+                  value={avatarId}
+                  onChange={(e) => setAvatarId(e.target.value)}
+                  placeholder="Enter a custom avatar ID"
+                />
+                <Select
+                  placeholder="Or select one from these example avatars"
+                  size="md"
+                  onChange={(e) => {
+                    setAvatarId(e.target.value);
+                  }}
+                >
+                  {AVATARS.map((avatar) => (
+                    <SelectItem
+                      key={avatar.avatar_id}
+                      textValue={avatar.avatar_id}
+                    >
+                      {avatar.name}
+                    </SelectItem>
+                  ))}
+                </Select>
+              </div>
+              <div className="flex flex-col gap-2 w-full">
+                <p className="text-sm font-medium leading-none">
+                  Custom Voice ID (optional)
+                </p>
+                <Input
+                  value={voiceId}
+                  onChange={(e) => setVoiceId(e.target.value)}
+                  placeholder="Enter a custom voice ID"
+                />
+                <Select
+                  placeholder="Or select one from these example voices"
+                  size="md"
+                  onChange={(e) => {
+                    setVoiceId(e.target.value);
+                  }}
+                >
+                  {VOICES.map((voice) => (
+                    <SelectItem key={voice.voice_id} textValue={voice.voice_id}>
+                      {voice.name} | {voice.language} | {voice.gender}
+                    </SelectItem>
+                  ))}
+                </Select>
+              </div>
+              <Button
+                size="md"
+                onClick={startSession}
+                className="bg-gradient-to-tr from-indigo-500 to-indigo-300 w-full text-white"
+                variant="shadow"
+              >
+                Start session
+              </Button>
+            </div>
+          ) : (
+            <Spinner size="lg" color="default" />
+          )}
+        </CardBody>
+        <Divider />
+        <CardFooter className="flex flex-col gap-3">
+          <InteractiveAvatarTextInput
+            label="Repeat"
+            placeholder="Type something for the avatar to repeat"
+            input={text}
+            onSubmit={handleSpeak}
+            setInput={setText}
+            disabled={!stream}
+            loading={isLoadingRepeat}
+          />
+          <InteractiveAvatarTextInput
+            label="Chat"
+            placeholder="Chat with the avatar (uses ChatGPT)"
+            input={input}
+            onSubmit={() => {
+              setIsLoadingChat(true);
+              if (!input) {
+                setDebug("Please enter text to send to ChatGPT");
+                return;
+              }
+              handleSubmit();
+            }}
+            setInput={setInput}
+            loading={isLoadingChat}
+            endContent={
+              <Tooltip
+                content={!recording ? "Start recording" : "Stop recording"}
+              >
+                <Button
+                  onClick={!recording ? startRecording : stopRecording}
+                  isDisabled={!stream}
+                  isIconOnly
+                  className={clsx(
+                    "mr-4 text-white",
+                    !recording
+                      ? "bg-gradient-to-tr from-indigo-500 to-indigo-300"
+                      : ""
+                  )}
+                  size="sm"
+                  variant="shadow"
+                >
+                  {!recording ? (
+                    <Microphone size={20} />
+                  ) : (
+                    <>
+                      <div className="absolute h-full w-full bg-gradient-to-tr from-indigo-500 to-indigo-300 animate-pulse -z-10"></div>
+                      <MicrophoneStage size={20} />
+                    </>
+                  )}
+                </Button>
+              </Tooltip>
+            }
+            disabled={!stream}
+          />
+        </CardFooter>
+      </Card>
+      <p className="font-mono text-right">
+        <span className="font-bold">Console:</span>
+        <br />
+        {debug}
+      </p>
+    </div>
+  );
+}