feat: update 2.0.9

fix: remove unused package (#49 )
feat: add package lock (#46 )
2025-01-17 17:56:14 -08:00 · 2024-12-23 11:12:14 -08:00 · 2024-12-19 11:42:12 -08:00 · 2024-12-03 14:39:57 -08:00 · 2024-12-03 14:30:05 -08:00 · 2024-11-19 10:23:59 -08:00
9 changed files with 8739 additions and 86 deletions
--- a/.env
+++ b/.env
@@ -1,3 +1 @@
-HEYGEN_API_KEY=your Heygen API key
-OPENAI_API_KEY=your OpenAI API key
-NEXT_PUBLIC_OPENAI_API_KEY=your OpenAI API key
+HEYGEN_API_KEY=your Heygen API key
--- a/.npmrc
+++ b/.npmrc
@@ -1 +1 @@
-package-lock=false
+
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ Feel free to play around with the existing code and please leave any feedback fo

 3. Run `npm install` (assuming you have npm installed. If not, please follow these instructions: https://docs.npmjs.com/downloading-and-installing-node-js-and-npm/)

-4. Enter your HeyGen Enterprise API Token or Trial Token in the `.env` file. Replace `PLACEHOLDER-API-KEY` with your API key. This will allow the Client app to generate secure Access Tokens with which to create interactive sessions.
+4. Enter your HeyGen Enterprise API Token or Trial Token in the `.env` file. Replace `HEYGEN_API_KEY` with your API key. This will allow the Client app to generate secure Access Tokens with which to create interactive sessions.

   You can retrieve either the API Key or Trial Token by logging in to HeyGen and navigating to this page in your settings: [https://app.heygen.com/settings?nav=API]. NOTE: use the trial token if you don't have an enterprise API token yet.

@@ -41,24 +41,6 @@ After you see Monica appear on the screen, you can enter text into the input lab

 If you want to see a different Avatar or try a different voice, you can close the session and enter the IDs and then 'start' the session again. Please see below for information on where to retrieve different Avatar and voice IDs that you can use.

-### Connecting to OpenAI
-
-A common use case for a Interactive Avatar is to use it as the 'face' of an LLM that users can interact with. In this demo we have included functionality to showcase this by both accepting user input via voice (using OpenAI's Whisper library) and also sending that input to an OpenAI LLM model (using their Chat Completions endpoint).
-
-Both of these features of this demo require an OpenAI API Key. If you do not have a paid OpenAI account, you can learn more on their website: [https://openai.com/index/openai-api/]
-
-Without an OpenAI API Key, this functionality will not work, and the Interactive Avatar will only be able to repeat text input that you provide, and not demonstrate being the 'face' of an LLM. Regardless, this demo is meant to demonstrate what kinds of apps and experiences you can build with our Interactive Avatar SDK, so you can code your own connection to a different LLM if you so choose.
-
-To add your Open AI API Key, fill copy it to the `OPENAI_API_KEY` and `NEXT_PUBLIC_OPENAI_API_KEY` variables in the `.env` file.
-
-### How does the integration with OpenAI / ChatGPT work?
-
-In this demo, we are calling the Chat Completions API from OpenAI in order to come up with some response to user input. You can see the relevant code in components/InteractiveAvatar.tsx.
-
-In the initialMessages parameter, you can replace the content of the 'system' message with whatever 'knowledge base' or context that you would like the GPT-4o model to reply to the user's input with.
-
-You can explore this API and the different parameters and models available here: [https://platform.openai.com/docs/guides/text-generation/chat-completions-api]
-
 ### Which Avatars can I use with this project?

 By default, there are several Public Avatars that can be used in Interactive Avatar. (AKA Interactive Avatars.) You can find the Avatar IDs for these Public Avatars by navigating to [app.heygen.com/interactive-avatar](https://app.heygen.com/interactive-avatar) and clicking 'Select Avatar' and copying the avatar id.
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -1,16 +0,0 @@
-import { openai } from "@ai-sdk/openai";
-import { streamText } from "ai";
-
-// Allow streaming responses up to 30 seconds
-export const maxDuration = 30;
-
-export async function POST(req: Request) {
-  const { messages } = await req.json();
-
-  const result = await streamText({
-    model: openai("gpt-4-turbo"),
-    messages,
-  });
-
-  return result.toAIStreamResponse();
-}
--- a/app/lib/constants.ts
+++ b/app/lib/constants.ts
@@ -20,3 +20,34 @@ export const AVATARS = [
    name: "Joshua Heygen CEO",
  },
 ];
+
+export const STT_LANGUAGE_LIST = [
+  { label: 'Bulgarian', value: 'bg', key: 'bg' },
+  { label: 'Chinese', value: 'zh', key: 'zh' },
+  { label: 'Czech', value: 'cs', key: 'cs' },
+  { label: 'Danish', value: 'da', key: 'da' },
+  { label: 'Dutch', value: 'nl', key: 'nl' },
+  { label: 'English', value: 'en', key: 'en' },
+  { label: 'Finnish', value: 'fi', key: 'fi' },
+  { label: 'French', value: 'fr', key: 'fr' },
+  { label: 'German', value: 'de', key: 'de' },
+  { label: 'Greek', value: 'el', key: 'el' },
+  { label: 'Hindi', value: 'hi', key: 'hi' },
+  { label: 'Hungarian', value: 'hu', key: 'hu' },
+  { label: 'Indonesian', value: 'id', key: 'id' },
+  { label: 'Italian', value: 'it', key: 'it' },
+  { label: 'Japanese', value: 'ja', key: 'ja' },
+  { label: 'Korean', value: 'ko', key: 'ko' },
+  { label: 'Malay', value: 'ms', key: 'ms' },
+  { label: 'Norwegian', value: 'no', key: 'no' },
+  { label: 'Polish', value: 'pl', key: 'pl' },
+  { label: 'Portuguese', value: 'pt', key: 'pt' },
+  { label: 'Romanian', value: 'ro', key: 'ro' },
+  { label: 'Russian', value: 'ru', key: 'ru' },
+  { label: 'Slovak', value: 'sk', key: 'sk' },
+  { label: 'Spanish', value: 'es', key: 'es' },
+  { label: 'Swedish', value: 'sv', key: 'sv' },
+  { label: 'Turkish', value: 'tr', key: 'tr' },
+  { label: 'Ukrainian', value: 'uk', key: 'uk' },
+  { label: 'Vietnamese', value: 'vi', key: 'vi' },
+];
--- a/components/InteractiveAvatar.tsx
+++ b/components/InteractiveAvatar.tsx
@@ -1,5 +1,9 @@
 import type { StartAvatarResponse } from "@heygen/streaming-avatar";
-import StreamingAvatar, {AvatarQuality, StreamingEvents} from "@heygen/streaming-avatar";
+
+import StreamingAvatar, {
+  AvatarQuality,
+  StreamingEvents, TaskMode, TaskType, VoiceEmotion,
+} from "@heygen/streaming-avatar";
 import {
  Button,
  Card,
@@ -11,11 +15,15 @@ import {
  SelectItem,
  Spinner,
  Chip,
+  Tabs,
+  Tab,
 } from "@nextui-org/react";
 import { useEffect, useRef, useState } from "react";
-import { usePrevious } from 'ahooks'
+import { useMemoizedFn, usePrevious } from "ahooks";
+
 import InteractiveAvatarTextInput from "./InteractiveAvatarTextInput";
-import { AVATARS } from "@/app/lib/constants";
+
+import {AVATARS, STT_LANGUAGE_LIST} from "@/app/lib/constants";

 export default function InteractiveAvatar() {
  const [isLoadingSession, setIsLoadingSession] = useState(false);
@@ -24,10 +32,14 @@ export default function InteractiveAvatar() {
  const [debug, setDebug] = useState<string>();
  const [knowledgeId, setKnowledgeId] = useState<string>("");
  const [avatarId, setAvatarId] = useState<string>("");
+  const [language, setLanguage] = useState<string>('en');
+
  const [data, setData] = useState<StartAvatarResponse>();
  const [text, setText] = useState<string>("");
  const mediaStream = useRef<HTMLVideoElement>(null);
  const avatar = useRef<StreamingAvatar | null>(null);
+  const [chatMode, setChatMode] = useState("text_mode");
+  const [isUserTalking, setIsUserTalking] = useState(false);

  async function fetchAccessToken() {
    try {
@@ -35,6 +47,7 @@ export default function InteractiveAvatar() {
        method: "POST",
      });
      const token = await response.text();
+
      console.log("Access Token:", token); // Log the token to verify

      return token;
@@ -48,6 +61,7 @@ export default function InteractiveAvatar() {
  async function startSession() {
    setIsLoadingSession(true);
    const newToken = await fetchAccessToken();
+
    avatar.current = new StreamingAvatar({
      token: newToken,
    });
@@ -61,18 +75,43 @@ export default function InteractiveAvatar() {
      console.log("Stream disconnected");
      endSession();
    });
+    avatar.current?.on(StreamingEvents.STREAM_READY, (event) => {
+      console.log(">>>>> Stream ready:", event.detail);
+      setStream(event.detail);
+    });
+    avatar.current?.on(StreamingEvents.USER_START, (event) => {
+      console.log(">>>>> User started talking:", event);
+      setIsUserTalking(true);
+    });
+    avatar.current?.on(StreamingEvents.USER_STOP, (event) => {
+      console.log(">>>>> User stopped talking:", event);
+      setIsUserTalking(false);
+    });
    try {
      const res = await avatar.current.createStartAvatar({
        quality: AvatarQuality.Low,
        avatarName: avatarId,
-        knowledgeId: knowledgeId,
+        knowledgeId: knowledgeId, // Or use a custom `knowledgeBase`.
+        voice: {
+          rate: 1.5, // 0.5 ~ 1.5
+          emotion: VoiceEmotion.EXCITED,
+          // elevenlabsSettings: {
+          //   stability: 1,
+          //   similarity_boost: 1,
+          //   style: 1,
+          //   use_speaker_boost: false,
+          // },
+        },
+        language: language,
+        disableIdleTimeout: true,
      });

      setData(res);
-      avatar.current?.on(StreamingEvents.STREAM_READY, (event) => {
-        console.log('Stream ready:', event.detail);
-        setStream(event.detail);
+      // default to voice mode
+      await avatar.current?.startVoiceChat({
+        useSilencePrompt: false
      });
+      setChatMode("voice_mode");
    } catch (error) {
      console.error("Error starting avatar session:", error);
    } finally {
@@ -86,11 +125,10 @@ export default function InteractiveAvatar() {

      return;
    }
-    await avatar.current
-      .speak({ text: text, sessionId: data?.session_id! })
-      .catch((e) => {
-        setDebug(e.message);
-      });
+    // speak({ text: text, task_type: TaskType.REPEAT })
+    await avatar.current.speak({ text: text, taskType: TaskType.REPEAT, taskMode: TaskMode.SYNC }).catch((e) => {
+      setDebug(e.message);
+    });
    setIsLoadingRepeat(false);
  }
  async function handleInterrupt() {
@@ -100,28 +138,34 @@ export default function InteractiveAvatar() {
      return;
    }
    await avatar.current
-      .interrupt({ sessionId: data?.session_id! })
+      .interrupt()
      .catch((e) => {
        setDebug(e.message);
      });
  }
  async function endSession() {
-    if (!avatar.current) {
-      setDebug("Avatar API not initialized");
-
-      return;
-    }
-    await avatar.current.stopAvatar({
-      sessionId: data?.session_id!,
-    });
+    await avatar.current?.stopAvatar();
    setStream(undefined);
  }
+
+  const handleChangeChatMode = useMemoizedFn(async (v) => {
+    if (v === chatMode) {
+      return;
+    }
+    if (v === "text_mode") {
+      avatar.current?.closeVoiceChat();
+    } else {
+      await avatar.current?.startVoiceChat();
+    }
+    setChatMode(v);
+  });
+
  const previousText = usePrevious(text);
  useEffect(() => {
    if (!previousText && text) {
-      avatar.current?.startListening({ sessionId: data?.session_id! });
+      avatar.current?.startListening();
    } else if (previousText && !text) {
-      avatar?.current?.stopListening({ sessionId: data?.session_id! });
+      avatar?.current?.stopListening();
    }
  }, [text, previousText]);

@@ -161,18 +205,18 @@ export default function InteractiveAvatar() {
              </video>
              <div className="flex flex-col gap-2 absolute bottom-3 right-3">
                <Button
-                  size="md"
-                  onClick={handleInterrupt}
                  className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white rounded-lg"
+                  size="md"
                  variant="shadow"
+                  onClick={handleInterrupt}
                >
                  Interrupt task
                </Button>
                <Button
-                  size="md"
-                  onClick={endSession}
                  className="bg-gradient-to-tr from-indigo-500 to-indigo-300  text-white rounded-lg"
+                  size="md"
                  variant="shadow"
+                  onClick={endSession}
                >
                  End session
                </Button>
@@ -185,17 +229,17 @@ export default function InteractiveAvatar() {
                  Custom Knowledge ID (optional)
                </p>
                <Input
+                  placeholder="Enter a custom knowledge ID"
                  value={knowledgeId}
                  onChange={(e) => setKnowledgeId(e.target.value)}
-                  placeholder="Enter a custom knowledge ID"
                />
                <p className="text-sm font-medium leading-none">
                  Custom Avatar ID (optional)
                </p>
                <Input
+                  placeholder="Enter a custom avatar ID"
                  value={avatarId}
                  onChange={(e) => setAvatarId(e.target.value)}
-                  placeholder="Enter a custom avatar ID"
                />
                <Select
                  placeholder="Or select one from these example avatars"
@@ -213,34 +257,74 @@ export default function InteractiveAvatar() {
                    </SelectItem>
                  ))}
                </Select>
+                <Select
+                  label="Select language"
+                  placeholder="Select language"
+                  className="max-w-xs"
+                  selectedKeys={[language]}
+                  onChange={(e) => {
+                    setLanguage(e.target.value);
+                  }}
+                >
+                  {STT_LANGUAGE_LIST.map((lang) => (
+                    <SelectItem key={lang.key}>
+                      {lang.label}
+                    </SelectItem>
+                  ))}
+                </Select>
              </div>
              <Button
-                size="md"
-                onClick={startSession}
                className="bg-gradient-to-tr from-indigo-500 to-indigo-300 w-full text-white"
+                size="md"
                variant="shadow"
+                onClick={startSession}
              >
                Start session
              </Button>
            </div>
          ) : (
-            <Spinner size="lg" color="default" />
+            <Spinner color="default" size="lg" />
          )}
        </CardBody>
        <Divider />
        <CardFooter className="flex flex-col gap-3 relative">
-          <InteractiveAvatarTextInput
-            label="Chat"
-            placeholder="Type something for the avatar to respond"
-            input={text}
-            onSubmit={handleSpeak}
-            setInput={setText}
-            disabled={!stream}
-            loading={isLoadingRepeat}
-          />
-          {
-            text && <Chip className='absolute right-16 top-6'>Listening</Chip>
-          }
+          <Tabs
+            aria-label="Options"
+            selectedKey={chatMode}
+            onSelectionChange={(v) => {
+              handleChangeChatMode(v);
+            }}
+          >
+            <Tab key="text_mode" title="Text mode" />
+            <Tab key="voice_mode" title="Voice mode" />
+          </Tabs>
+          {chatMode === "text_mode" ? (
+            <div className="w-full flex relative">
+              <InteractiveAvatarTextInput
+                disabled={!stream}
+                input={text}
+                label="Chat"
+                loading={isLoadingRepeat}
+                placeholder="Type something for the avatar to respond"
+                setInput={setText}
+                onSubmit={handleSpeak}
+              />
+              {text && (
+                <Chip className="absolute right-16 top-3">Listening</Chip>
+              )}
+            </div>
+          ) : (
+            <div className="w-full text-center">
+              <Button
+                isDisabled={!isUserTalking}
+                className="bg-gradient-to-tr from-indigo-500 to-indigo-300 text-white"
+                size="md"
+                variant="shadow"
+              >
+                {isUserTalking ? "Listening" : "Voice chat"}
+              </Button>
+            </div>
+          )}
        </CardFooter>
      </Card>
      <p className="font-mono text-right">
--- a/components/NavBar.tsx
+++ b/components/NavBar.tsx
@@ -28,7 +28,7 @@ export default function NavBar() {
          <Link
            isExternal
            color="foreground"
-            href="https://app.heygen.com/interactive-avatar"
+            href="https://labs.heygen.com/interactive-avatar"
          >
            Avatars
          </Link>
--- a/package.json
+++ b/package.json
@@ -10,7 +10,7 @@
  },
  "dependencies": {
    "@ai-sdk/openai": "^0.0.34",
-    "@heygen/streaming-avatar": "^2.0.0-beta.1",
+    "@heygen/streaming-avatar": "^2.0.9",
    "@nextui-org/button": "2.0.34",
    "@nextui-org/chip": "^2.0.32",
    "@nextui-org/code": "2.0.29",
@@ -32,7 +32,6 @@
    "ahooks": "^3.8.1",
    "ai": "^3.2.15",
    "clsx": "2.1.1",
-    "framer-motion": "~11.1.1",
    "intl-messageformat": "^10.5.0",
    "next": "14.2.4",
    "next-themes": "^0.2.1",
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
Author	SHA1	Message	Date
raojianb	dfb99e64c8	feat: update 2.0.9	2025-01-17 17:56:14 -08:00
Joby	d012ef3e3c	fix: remove unused package (#49 )	2024-12-23 11:12:14 -08:00
Joby	e2763a2cb4	feat: add package lock (#46 )	2024-12-19 11:42:12 -08:00
eddy-heygen	ba8fbf7be4	Merge pull request #43 from HeyGen-Official/deprecate-openai-example remove deprecated openai example	2024-12-03 14:39:57 -08:00
Eddy Kim	bcaea9916d	remove deprecated openai example	2024-12-03 14:30:05 -08:00
Joby	0fa4f0385e	fix: incorrect home page url in header banner (#40 )	2024-11-19 10:23:59 -08:00
Joby	6fca8b4d42	feat: upgrade the sdk to v2.0.8 (#39 )	2024-11-17 22:38:11 -08:00
Joby	431281d47c	feat: task mode (#30 )	2024-10-22 16:58:38 -07:00
Joby	274a307e83	chore: update sdk version (#26 )	2024-09-30 18:24:59 -07:00
Joby	03ef24b031	chore: update sdk version (#24 )	2024-09-27 15:39:13 -07:00
Joby	21f6c6d468	feat: support knwoledge base (#23 ) * feat: support knwoledge base * feat: support knwoledge base	2024-09-25 19:53:26 -07:00
Joby	d7a7e3174c	feat: update dependencies (#20 )	2024-09-23 16:14:07 -07:00
Joby	e653fa74c4	feat: add language and voice rate (#19 )	2024-09-23 15:55:33 -07:00
Joby	5dd784d63e	feat: add task type (#18 )	2024-09-23 13:38:15 -07:00
Joby	efb98f612b	feat: simplify api (#14 )	2024-09-22 01:54:14 -07:00
Joby	befb6228f5	feat: voice chat demo (#13 )	2024-09-20 21:38:26 -07:00
James Zow	2454a4729d	Update README.md (#8 )	2024-09-06 21:00:48 -07:00
Joby	935b10279b	Feat/livekit (#9 ) * feat: using version 2.0 skd * feat: using version 2.0 skd * feat: using version 2.0 skd	2024-09-06 20:59:55 -07:00