Complete mvp for a conversational ai avatar

This commit is contained in:
maumruiz
2025-02-11 18:10:19 -06:00
parent 6819f35fba
commit 1319e71f3e
22 changed files with 4511 additions and 49 deletions

29
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,29 @@
{
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.fixAll.eslint": "explicit",
"source.addMissingImports": "explicit"
},
"prettier.tabWidth": 2,
"prettier.useTabs": false,
"prettier.semi": true,
"prettier.singleQuote": false,
"prettier.jsxSingleQuote": false,
"prettier.trailingComma": "es5",
"prettier.arrowParens": "always",
"prettier.printWidth": 100,
"[json]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"[typescript]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"[typescriptreact]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"[javascriptreact]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"typescript.tsdk": "node_modules/typescript/lib",
"eslint.workingDirectories": ["./webapp"],
}

3715
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
{
"name": "researcher",
"name": "ai-avatar",
"version": "0.1.0",
"private": true,
"scripts": {
@@ -10,13 +10,25 @@
},
"dependencies": {
"@hookform/resolvers": "^3.10.0",
"@langchain/community": "^0.3.29",
"@langchain/core": "^0.3.39",
"@langchain/langgraph": "^0.2.45",
"@langchain/openai": "^0.4.3",
"@radix-ui/react-label": "^2.1.2",
"@radix-ui/react-slot": "^1.1.2",
"@react-three/drei": "^9.121.4",
"@react-three/fiber": "^9.0.0-rc.7",
"@types/three": "^0.173.0",
"ai": "^4.1.34",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"langchain": "^0.3.15",
"leva": "^0.10.0",
"lucide-react": "^0.475.0",
"microsoft-cognitiveservices-speech-sdk": "^1.42.0",
"next": "15.1.6",
"next-themes": "^0.4.4",
"openai": "^4.83.0",
"pino": "^9.6.0",
"pino-pretty": "^13.0.0",
"react": "^19.0.0",
@@ -25,7 +37,9 @@
"sonner": "^1.7.4",
"tailwind-merge": "^3.0.1",
"tailwindcss-animate": "^1.0.7",
"zod": "^3.24.1"
"three": "^0.173.0",
"zod": "^3.24.1",
"zustand": "^5.0.3"
},
"devDependencies": {
"@types/node": "^20",
@@ -41,5 +55,12 @@
"prettier": "^3.4.2",
"tailwindcss": "^3.4.1",
"typescript": "^5"
},
"overrides": {
"@react-three/drei": {
"@react-three/fiber": "^9.0.0-rc.7",
"react": "^19.0.0",
"react-dom": "^19.0.0"
}
}
}

BIN
public/animations.glb Normal file

Binary file not shown.

BIN
public/briefIdleAnim.fbx Normal file

Binary file not shown.

BIN
public/idleAnim.fbx Normal file

Binary file not shown.

BIN
public/mau.glb Normal file

Binary file not shown.

View File

@@ -0,0 +1,104 @@
// import { PassThrough } from "stream";
import { PassThrough } from "stream";
import { AIMessage, HumanMessage } from "@langchain/core/messages";
import { Messages } from "@langchain/langgraph";
import * as sdk from "microsoft-cognitiveservices-speech-sdk";
import OpenAI from "openai";
import { graph } from "@/lib/graph";
import logger from "@/lib/logger";
const client = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
const speechConfig = sdk.SpeechConfig.fromSubscription(
process.env.SPEECH_KEY || "",
process.env.SPEECH_REGION || ""
);
speechConfig.speechSynthesisVoiceName = "en-US-BrianMultilingualNeural";
// Allow streaming responses up to 30 seconds
// export const maxDuration = 30;
// Define POST method for chat route
export async function POST(req: Request) {
const formData = await req.formData();
const audio = formData.get("audio") as File;
const messages = JSON.parse(formData.get("messages") as string);
logger.info(JSON.stringify(messages, null, 2));
//* Speech to text
const transcription = await client.audio.transcriptions.create({
file: audio,
model: "whisper-1",
});
logger.info(JSON.stringify(transcription, null, 2));
// create new message with transcription
const userMessage = {
role: "user",
content: transcription.text,
id: Date.now().toString(),
};
const updatedMessages = [...messages, userMessage];
//* Text to text
const allMessages: Messages = updatedMessages.map((message) =>
message.role === "user" ? new HumanMessage(message.content) : new AIMessage(message.content)
);
// Stream of messages
const result = await graph.invoke({ messages: allMessages });
const lastMessage = result.messages[result.messages.length - 1];
//* Text to speech (and visemes)
// Use Microsoft Speech SDK to synthesize speech and get visemes
const speechSynthesizer = new sdk.SpeechSynthesizer(speechConfig);
const visemes: [number, number][] = [];
speechSynthesizer.visemeReceived = function (s, e) {
// logger.info(
// "(Viseme), Audio offset: " + e.audioOffset / 10000 + "ms. Viseme ID: " + e.visemeId
// );
visemes.push([e.audioOffset / 10000, e.visemeId]);
};
const audioStream = await new Promise((resolve, reject) => {
speechSynthesizer.speakTextAsync(
`${lastMessage.content}`,
(result) => {
const { audioData } = result;
speechSynthesizer.close();
// convert arrayBuffer to stream
const bufferStream = new PassThrough();
bufferStream.end(Buffer.from(audioData));
resolve(bufferStream);
},
(error) => {
logger.error(error);
speechSynthesizer.close();
reject(error);
}
);
});
//* Return processed response
logger.info(`Response: ${lastMessage.content}`);
const safeLastMessageContent = lastMessage.content
.toString()
.replace(/[\u2018\u2019]/g, "'")
.replace(/\u2014/g, "-");
return new Response(audioStream, {
headers: {
"Content-Type": "audio/mpeg",
"Content-Disposition": `inline; filename=tts.mp3`,
Visemes: JSON.stringify(visemes),
Result: JSON.stringify({
id: lastMessage.id,
role: "assistant",
content: safeLastMessageContent,
}),
UserMessage: JSON.stringify(userMessage),
},
});
}

View File

@@ -0,0 +1,82 @@
// import { PassThrough } from "stream";
import { PassThrough } from "stream";
import { AIMessage, HumanMessage } from "@langchain/core/messages";
import { Messages } from "@langchain/langgraph";
import { Message } from "ai";
import * as sdk from "microsoft-cognitiveservices-speech-sdk";
import { graph } from "@/lib/graph";
import logger from "@/lib/logger";
const speechConfig = sdk.SpeechConfig.fromSubscription(
process.env.SPEECH_KEY || "",
process.env.SPEECH_REGION || ""
);
speechConfig.speechSynthesisVoiceName = "en-US-BrianMultilingualNeural";
// Allow streaming responses up to 30 seconds
// export const maxDuration = 30;
// Define POST method for chat route
export async function POST(req: Request) {
const {
messages,
}: {
messages: Message[];
} = await req.json();
// TODO: Filter to only include last message when using langgraph memory
const allMessages: Messages = messages.map((message) =>
message.role === "user" ? new HumanMessage(message.content) : new AIMessage(message.content)
);
// Stream of messages
const result = await graph.invoke({ messages: allMessages });
const lastMessage = result.messages[result.messages.length - 1];
// Use Microsoft Speech SDK to synthesize speech and get visemes
const speechSynthesizer = new sdk.SpeechSynthesizer(speechConfig);
const visemes: [number, number][] = [];
speechSynthesizer.visemeReceived = function (s, e) {
// logger.info(
// "(Viseme), Audio offset: " + e.audioOffset / 10000 + "ms. Viseme ID: " + e.visemeId
// );
visemes.push([e.audioOffset / 10000, e.visemeId]);
};
const audioStream = await new Promise((resolve, reject) => {
speechSynthesizer.speakTextAsync(
`${lastMessage.content}`,
(result) => {
const { audioData } = result;
speechSynthesizer.close();
// convert arrayBuffer to stream
const bufferStream = new PassThrough();
bufferStream.end(Buffer.from(audioData));
resolve(bufferStream);
},
(error) => {
logger.error(error);
speechSynthesizer.close();
reject(error);
}
);
});
logger.info(`Response: ${lastMessage.content}`);
return new Response(audioStream, {
headers: {
"Content-Type": "audio/mpeg",
"Content-Disposition": `inline; filename=tts.mp3`,
Visemes: JSON.stringify(visemes),
Message: JSON.stringify({
id: lastMessage.id,
role: "assistant",
content: lastMessage.content,
}),
},
});
}

View File

@@ -1,13 +0,0 @@
import { NextResponse } from "next/server";
export async function POST(req: Request) {
const { message }: { message: string } = await req.json();
// // TODO: Filter to only include last message when using langgraph
// const allMessages: Messages = messages.map((message) =>
// message.role === "user" ? new HumanMessage(message.content) : new AIMessage(message.content)
// );
// // Stream of messages
// const result = await app.invoke({ messages: allMessages });
// const lastMessage = result.messages[result.messages.length - 1];
return NextResponse.json({ message });
}

30
src/app/api/stt/route.ts Normal file
View File

@@ -0,0 +1,30 @@
import { NextResponse } from "next/server";
import OpenAI from "openai";
import logger from "@/lib/logger";
const client = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
export async function POST(req: Request) {
const formData = await req.formData();
const audio = formData.get("audio") as File;
const messages = JSON.parse(formData.get("messages") as string);
try {
const transcription = await client.audio.transcriptions.create({
file: audio,
model: "whisper-1",
});
logger.info(JSON.stringify(transcription, null, 2));
logger.info(JSON.stringify(messages, null, 2));
// Return the transcription data as JSON
return NextResponse.json(transcription.text);
} catch (error) {
console.error("Error transcribing audio:", error);
return NextResponse.json({ error: "Transcription failed." }, { status: 500 });
}
}

0
src/app/api/tts/route.ts Normal file
View File

View File

@@ -4,6 +4,18 @@
body {
font-family: Arial, Helvetica, sans-serif;
background-color: #8baaaa;
/* background-image: linear-gradient(to top, #6a85b6 0%, #bac8e0 100%); */
/* background-image: linear-gradient(15deg, #13547a 0%, #80d0c7 100%); */
/* background-image: linear-gradient(to right, #868f96 0%, #596164 100%); */
background-image: linear-gradient(45deg, #8baaaa 0%, #596164 100%);
}
html,
body,
#root {
height: 100%;
margin: unset;
}
@layer base {

View File

@@ -1,9 +1,18 @@
import AudioRecorder from "@/components/AudioRecorder";
import Experience from "@/components/Experience";
import SimpleInput from "@/components/SimpleInput";
// import SimpleInput from "@/components/SimpleInput";
export default function Home() {
return (
<div className="flex min-h-screen items-center justify-center bg-gray-100">
<SimpleInput />
</div>
<>
<Experience />
<div className="absolute left-0 top-0 flex min-h-screen w-screen items-end justify-center gap-5 p-24">
<div className="flex items-center gap-5">
<SimpleInput />
<AudioRecorder />
</div>
</div>
</>
);
}

View File

@@ -0,0 +1,95 @@
"use client";
import { Mic, Square } from "lucide-react";
import { useCallback, useEffect } from "react";
import { useConversation } from "@/lib/store";
import { useAudioRecorder } from "../hooks/useAudioRecorder";
export default function AudioRecorder() {
const { isRecording, audioBlob, startRecording, stopRecording } = useAudioRecorder();
const messages = useConversation((state) => state.messages);
const addMessage = useConversation((state) => state.addMessage);
const setMessageResult = useConversation((state) => state.setMessageResult);
const handleRecordClick = () => {
if (isRecording) {
stopRecording();
} else {
startRecording();
}
};
const fetchConversation = useCallback(
async (audioBlob: Blob) => {
const formData = new FormData();
formData.append("audio", audioBlob, "audio.ogg");
formData.append("messages", JSON.stringify(messages));
try {
const response = await fetch("/api/conversation/speech", {
method: "POST",
body: formData,
});
const result = JSON.parse((await response.headers.get("result")) || "{}");
const userMessage = JSON.parse((await response.headers.get("usermessage")) || "{}");
const audio = await response.blob();
const visemes = JSON.parse((await response.headers.get("visemes")) || "[]");
const audioUrl = URL.createObjectURL(audio);
const audioPlayer = new Audio(audioUrl);
console.log(userMessage);
console.log(result);
console.log(visemes);
setMessageResult({
visemes,
audioPlayer,
});
audioPlayer.onended = () => {
setMessageResult(null);
};
audioPlayer.currentTime = 0;
audioPlayer.play();
addMessage(userMessage);
addMessage(result);
} catch (err) {
console.error("Error sending audio file:", err);
}
},
[messages]
);
useEffect(() => {
if (audioBlob) {
fetchConversation(audioBlob);
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [audioBlob]);
return (
<div>
<button
onClick={handleRecordClick}
className={`flex size-16 items-center justify-center rounded-full text-sm font-bold text-white transition-all duration-300 ease-in-out hover:scale-110 ${
isRecording
? "animate-pulse bg-red-600"
: "bg-slate-900 shadow-lg hover:bg-slate-950 hover:shadow-xl"
}`}
>
{isRecording ? <Square className="size-6" /> : <Mic className="size-8" />}
</button>
{/* <p className="mt-4 text-xl font-semibold text-white">
{isRecording ? "Recording..." : "Tap to Record"}
</p> */}
{/* {hasRecorded && audioURL && (
<div className="mt-8">
<audio src={audioURL} controls className="w-64" />
</div>
)} */}
</div>
);
}

208
src/components/Avatar.tsx Normal file
View File

@@ -0,0 +1,208 @@
/*
Auto-generated by: https://github.com/pmndrs/gltfjsx
*/
import { useAnimations, useGLTF } from "@react-three/drei";
import { useFrame } from "@react-three/fiber";
import { useControls } from "leva";
import { useEffect, useRef, useState } from "react";
import * as THREE from "three";
import { GLTF } from "three-stdlib";
import { visemeList, visemesMapping } from "@/lib/constants";
import { useConversation } from "@/lib/store";
type GLTFResult = GLTF & {
nodes: {
EyeLeft: THREE.SkinnedMesh;
EyeRight: THREE.SkinnedMesh;
Wolf3D_Head: THREE.SkinnedMesh;
Wolf3D_Teeth: THREE.SkinnedMesh;
Wolf3D_Hair: THREE.SkinnedMesh;
Wolf3D_Glasses: THREE.SkinnedMesh;
Wolf3D_Body: THREE.SkinnedMesh;
Wolf3D_Outfit_Bottom: THREE.SkinnedMesh;
Wolf3D_Outfit_Footwear: THREE.SkinnedMesh;
Wolf3D_Outfit_Top: THREE.SkinnedMesh;
Hips: THREE.Bone;
};
materials: {
Wolf3D_Eye: THREE.MeshStandardMaterial;
Wolf3D_Skin: THREE.MeshStandardMaterial;
Wolf3D_Teeth: THREE.MeshStandardMaterial;
Wolf3D_Hair: THREE.MeshStandardMaterial;
Wolf3D_Glasses: THREE.MeshStandardMaterial;
Wolf3D_Body: THREE.MeshStandardMaterial;
Wolf3D_Outfit_Bottom: THREE.MeshStandardMaterial;
Wolf3D_Outfit_Footwear: THREE.MeshStandardMaterial;
Wolf3D_Outfit_Top: THREE.MeshStandardMaterial;
};
};
export function Avatar(props: JSX.IntrinsicElements["group"]) {
const groupRef = useRef(null);
const [animation, setAnimation] = useState("Idle");
const [blink, setBlink] = useState(false);
const message = useConversation((state) => state.messageResult);
const { nodes, materials, scene } = useGLTF("/mau.glb") as GLTFResult;
const { animations } = useGLTF("/animations.glb");
const { actions } = useAnimations(animations, groupRef);
useEffect(() => {
actions[animation].play();
}, [animation, actions]);
useEffect(() => {
let blinkTimeout;
const nextBlink = () => {
blinkTimeout = setTimeout(
() => {
setBlink(true);
setTimeout(() => {
setBlink(false);
nextBlink();
}, 200);
},
THREE.MathUtils.randInt(1000, 5000)
);
};
nextBlink();
return () => clearTimeout(blinkTimeout);
}, []);
useFrame((state, delta) => {
if (!message) {
lerpMorphTarget("mouthSmileLeft", 0.36, 0.5);
lerpMorphTarget("mouthSmileRight", 0.36, 0.5);
} else {
lerpMorphTarget("mouthSmileLeft", 0, 0.5);
lerpMorphTarget("mouthSmileRight", 0, 0.5);
}
lerpMorphTarget("eyeBlinkLeft", blink ? 1 : 0, 0.5);
lerpMorphTarget("eyeBlinkRight", blink ? 1 : 0, 0.5);
for (let i = 0; i <= 21; i++) {
lerpMorphTarget(visemeList[i], 0, 0.5); // reset morph targets
}
if (message && message.visemes && message.audioPlayer) {
for (let i = message.visemes.length - 1; i >= 0; i--) {
const viseme = message.visemes[i];
if (message.audioPlayer.currentTime * 1000 >= viseme[0]) {
const visemeMapped = visemesMapping[viseme[1]];
lerpMorphTarget(visemeMapped, 1, 0.5);
break;
}
}
}
});
const lerpMorphTarget = (target, value, speed = 0.1) => {
scene.traverse((child) => {
if (child.isSkinnedMesh && child.morphTargetDictionary) {
const index = child.morphTargetDictionary[target];
if (index === undefined || child.morphTargetInfluences[index] === undefined) {
return;
}
child.morphTargetInfluences[index] = THREE.MathUtils.lerp(
child.morphTargetInfluences[index],
value,
speed
);
}
});
};
const [, set] = useControls("MorphTarget", () =>
Object.assign(
{},
...Object.keys(nodes.EyeLeft.morphTargetDictionary).map((key) => {
// if (key.startsWith("viseme")) {
return {
[key]: {
label: key,
value: 0,
min: nodes.EyeLeft.morphTargetInfluences[nodes.EyeLeft.morphTargetDictionary[key]],
max: 1,
onChange: (val) => {
lerpMorphTarget(key, val, 1);
},
},
};
// }
})
)
);
return (
<group {...props} ref={groupRef} dispose={null} position={[0, -1.6, -0.5]}>
<primitive object={nodes.Hips} />
<skinnedMesh
name="EyeLeft"
geometry={nodes.EyeLeft.geometry}
material={materials.Wolf3D_Eye}
skeleton={nodes.EyeLeft.skeleton}
morphTargetDictionary={nodes.EyeLeft.morphTargetDictionary}
morphTargetInfluences={nodes.EyeLeft.morphTargetInfluences}
/>
<skinnedMesh
name="EyeRight"
geometry={nodes.EyeRight.geometry}
material={materials.Wolf3D_Eye}
skeleton={nodes.EyeRight.skeleton}
morphTargetDictionary={nodes.EyeRight.morphTargetDictionary}
morphTargetInfluences={nodes.EyeRight.morphTargetInfluences}
/>
<skinnedMesh
name="Wolf3D_Head"
geometry={nodes.Wolf3D_Head.geometry}
material={materials.Wolf3D_Skin}
skeleton={nodes.Wolf3D_Head.skeleton}
morphTargetDictionary={nodes.Wolf3D_Head.morphTargetDictionary}
morphTargetInfluences={nodes.Wolf3D_Head.morphTargetInfluences}
/>
<skinnedMesh
name="Wolf3D_Teeth"
geometry={nodes.Wolf3D_Teeth.geometry}
material={materials.Wolf3D_Teeth}
skeleton={nodes.Wolf3D_Teeth.skeleton}
morphTargetDictionary={nodes.Wolf3D_Teeth.morphTargetDictionary}
morphTargetInfluences={nodes.Wolf3D_Teeth.morphTargetInfluences}
/>
<skinnedMesh
geometry={nodes.Wolf3D_Hair.geometry}
material={materials.Wolf3D_Hair}
skeleton={nodes.Wolf3D_Hair.skeleton}
/>
<skinnedMesh
geometry={nodes.Wolf3D_Glasses.geometry}
material={materials.Wolf3D_Glasses}
skeleton={nodes.Wolf3D_Glasses.skeleton}
/>
<skinnedMesh
geometry={nodes.Wolf3D_Body.geometry}
material={materials.Wolf3D_Body}
skeleton={nodes.Wolf3D_Body.skeleton}
/>
<skinnedMesh
geometry={nodes.Wolf3D_Outfit_Bottom.geometry}
material={materials.Wolf3D_Outfit_Bottom}
skeleton={nodes.Wolf3D_Outfit_Bottom.skeleton}
/>
<skinnedMesh
geometry={nodes.Wolf3D_Outfit_Footwear.geometry}
material={materials.Wolf3D_Outfit_Footwear}
skeleton={nodes.Wolf3D_Outfit_Footwear.skeleton}
/>
<skinnedMesh
geometry={nodes.Wolf3D_Outfit_Top.geometry}
material={materials.Wolf3D_Outfit_Top}
skeleton={nodes.Wolf3D_Outfit_Top.skeleton}
/>
</group>
);
}
useGLTF.preload("/mau.glb");
useGLTF.preload("/animations.glb");

View File

@@ -0,0 +1,23 @@
"use client";
import { Environment, Loader } from "@react-three/drei";
import { Canvas } from "@react-three/fiber";
import { Leva } from "leva";
import { Avatar } from "./Avatar";
function Experience() {
return (
<>
<Loader />
<Leva hidden />
{/* <Canvas shadows camera={{ position: [0, 0, 1], fov: 30 }}> */}
<Canvas shadows camera={{ position: [0, 0, 1], fov: 30 }}>
<Environment preset="warehouse" />
<Avatar />
</Canvas>
</>
);
}
export default Experience;

View File

@@ -1,6 +1,7 @@
"use client";
import { zodResolver } from "@hookform/resolvers/zod";
import { SendIcon } from "lucide-react";
import { useState } from "react";
import { useForm } from "react-hook-form";
import { toast } from "sonner";
@@ -9,13 +10,25 @@ import { z } from "zod";
import { Button } from "@/components/ui/button";
import { Form, FormControl, FormField, FormItem, FormMessage } from "@/components/ui/form";
import { Input } from "@/components/ui/input";
import { useConversation } from "@/lib/store";
const formSchema = z.object({
message: z.string().min(2).max(50),
});
interface MessageType {
id: string;
role: string;
content: string;
}
export default function SimpleInput() {
const [isSubmitting, setIsSubmitting] = useState(false);
// const [messages, setMessages] = useState<MessageType[]>([]);
const messages = useConversation((state) => state.messages);
const addMessage = useConversation((state) => state.addMessage);
const setMessageResult = useConversation((state) => state.setMessageResult);
const form = useForm<z.infer<typeof formSchema>>({
resolver: zodResolver(formSchema),
defaultValues: {
@@ -25,12 +38,44 @@ export default function SimpleInput() {
async function onSubmit(values: z.infer<typeof formSchema>) {
setIsSubmitting(true);
const response = await fetch("http://localhost:3000/api/researcher", {
const userMessage = {
role: "user",
content: values.message,
id: Date.now().toString(),
};
addMessage(userMessage);
const updatedMessages: MessageType[] = [...messages, userMessage];
const response = await fetch("/api/conversation/text", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ message: values.message }),
body: JSON.stringify({ messages: updatedMessages }),
});
const data = await response.json();
const message = JSON.parse((await response.headers.get("message")) || "{}");
if (Object.keys(message).length === 0) {
console.error("No message returned from server");
setIsSubmitting(false);
return;
}
console.log(message);
const audio = await response.blob();
const visemes = JSON.parse((await response.headers.get("visemes")) || "[]");
const audioUrl = URL.createObjectURL(audio);
const audioPlayer = new Audio(audioUrl);
setMessageResult({
visemes,
audioPlayer,
});
audioPlayer.onended = () => {
setMessageResult(null);
};
audioPlayer.currentTime = 0;
audioPlayer.play();
console.log(visemes);
addMessage(message);
toast(
<div className="flex flex-col space-y-2">
@@ -38,7 +83,7 @@ export default function SimpleInput() {
<code className="text-white">{JSON.stringify(values, null, 2)}</code>
</pre>
<pre className="mt-2 w-[320px] rounded-md bg-gray-200 p-4">
<code className="text-black">{data.message}</code>
<code className="text-black">{JSON.stringify(message, null, 2)}</code>
</pre>
</div>
);
@@ -48,10 +93,7 @@ export default function SimpleInput() {
return (
<Form {...form}>
<form
onSubmit={form.handleSubmit(onSubmit)}
className="flex w-full max-w-sm items-center space-x-2"
>
<form onSubmit={form.handleSubmit(onSubmit)} className="flex w-80 items-center space-x-2">
<FormField
control={form.control}
name="message"
@@ -59,7 +101,7 @@ export default function SimpleInput() {
<FormItem className="grow">
{/* <FormLabel>Message</FormLabel> */}
<FormControl>
<Input placeholder="message..." {...field} disabled={isSubmitting} />
<Input placeholder="Talk to me..." {...field} disabled={isSubmitting} />
</FormControl>
{/* <FormDescription>This is your public display name.</FormDescription> */}
<FormMessage />
@@ -67,7 +109,7 @@ export default function SimpleInput() {
)}
/>
<Button type="submit" disabled={isSubmitting}>
Submit
<SendIcon strokeWidth={3} />
</Button>
</form>
</Form>

View File

@@ -0,0 +1,40 @@
"use client";
import { useState, useCallback } from "react";
export const useAudioRecorder = () => {
const [isRecording, setIsRecording] = useState(false);
// const [audioURL, setAudioURL] = useState<string | null>(null);
const [audioBlob, setAudioBlob] = useState<Blob | null>(null);
const [mediaRecorder, setMediaRecorder] = useState<MediaRecorder | null>(null);
const startRecording = useCallback(() => {
navigator.mediaDevices
.getUserMedia({ audio: true })
.then((stream) => {
const recorder = new MediaRecorder(stream);
setMediaRecorder(recorder);
const chunks: Blob[] = [];
recorder.ondataavailable = (e) => chunks.push(e.data);
recorder.onstop = () => {
const blob = new Blob(chunks, { type: "audio/ogg; codecs=opus" });
// setAudioURL(URL.createObjectURL(blob));
setAudioBlob(blob);
};
recorder.start();
setIsRecording(true);
})
.catch((err) => console.error("Error accessing microphone:", err));
}, []);
const stopRecording = useCallback(() => {
if (mediaRecorder) {
mediaRecorder.stop();
setIsRecording(false);
}
}, [mediaRecorder]);
return { isRecording, audioBlob, startRecording, stopRecording };
};

42
src/lib/constants.ts Normal file
View File

@@ -0,0 +1,42 @@
export const visemesMapping = [
"viseme_sil",
"viseme_aa",
"viseme_aa",
"viseme_O",
"viseme_E",
"viseme_E",
"viseme_I",
"viseme_U",
"viseme_O",
"viseme_aa",
"viseme_O",
"viseme_I",
"viseme_DD",
"viseme_RR",
"viseme_nn",
"viseme_SS",
"viseme_U",
"viseme_TH",
"viseme_FF",
"viseme_DD",
"viseme_kk",
"viseme_PP",
];
export const visemeList = [
"viseme_sil",
"viseme_PP",
"viseme_FF",
"viseme_TH",
"viseme_DD",
"viseme_kk",
"viseme_CH",
"viseme_SS",
"viseme_nn",
"viseme_RR",
"viseme_aa",
"viseme_E",
"viseme_I",
"viseme_O",
"viseme_U",
];

39
src/lib/graph.ts Normal file
View File

@@ -0,0 +1,39 @@
import { ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";
import { MessagesAnnotation, StateGraph } from "@langchain/langgraph";
import { ChatOpenAI } from "@langchain/openai";
// Init llm model
const llm = new ChatOpenAI({
model: "gpt-4o-mini",
});
// Define system prompt
const prompt = ChatPromptTemplate.fromMessages([
[
"system",
`You are a conversational AI assistant that provides information about anything that the user wants.
Provide short answers to give a concise response to the user and go directly to the point.
Don't provide long answers or give numbered or bulleted lists. Your answer should span from a single sentence to a single paragraph.
Alwatys be polite and some times throw a joke if it is appropriate in the response.
`,
],
new MessagesPlaceholder("messages"),
]);
// Nodes
async function chatModelNode(state: typeof MessagesAnnotation.State) {
const chain = prompt.pipe(llm);
const response = await chain.invoke({
messages: state.messages,
});
// logger.info(response);
return { messages: [response] };
}
// Define the graph
const builder = new StateGraph(MessagesAnnotation)
.addNode("agent", chatModelNode)
.addEdge("__start__", "agent")
.addEdge("agent", "__end__");
export const graph = builder.compile();

26
src/lib/store.ts Normal file
View File

@@ -0,0 +1,26 @@
import { create } from "zustand";
interface MessageResultType {
visemes: [number, number][];
audioPlayer: HTMLAudioElement;
}
interface MessageType {
role: string;
content: string;
id: string;
}
interface ConversationState {
messageResult: MessageResultType | null;
messages: MessageType[];
setMessageResult: (message: MessageResultType | null) => void;
addMessage: (message: MessageType) => void;
}
export const useConversation = create<ConversationState>()((set) => ({
messageResult: null,
messages: [],
setMessageResult: (messageResult) => set({ messageResult }),
addMessage: (message) => set((state) => ({ messages: [...state.messages, message] })),
}));