Complete mvp for a conversational ai avatar

2025-02-11 18:10:19 -06:00
parent 6819f35fba
commit 1319e71f3e
22 changed files with 4511 additions and 49 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -0,0 +1,29 @@
+{
+  "editor.formatOnSave": true,
+  "editor.codeActionsOnSave": {
+    "source.fixAll.eslint": "explicit",
+    "source.addMissingImports": "explicit"
+  },
+  "prettier.tabWidth": 2,
+  "prettier.useTabs": false,
+  "prettier.semi": true,
+  "prettier.singleQuote": false,
+  "prettier.jsxSingleQuote": false,
+  "prettier.trailingComma": "es5",
+  "prettier.arrowParens": "always",
+  "prettier.printWidth": 100,
+  "[json]": {
+    "editor.defaultFormatter": "esbenp.prettier-vscode"
+  },
+  "[typescript]": {
+    "editor.defaultFormatter": "esbenp.prettier-vscode"
+  },
+  "[typescriptreact]": {
+    "editor.defaultFormatter": "esbenp.prettier-vscode"
+  },
+  "[javascriptreact]": {
+    "editor.defaultFormatter": "esbenp.prettier-vscode"
+  },
+  "typescript.tsdk": "node_modules/typescript/lib",
+  "eslint.workingDirectories": ["./webapp"],
+}
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -1,5 +1,5 @@
 {
-  "name": "researcher",
+  "name": "ai-avatar",
  "version": "0.1.0",
  "private": true,
  "scripts": {
@@ -10,13 +10,25 @@
  },
  "dependencies": {
    "@hookform/resolvers": "^3.10.0",
+    "@langchain/community": "^0.3.29",
+    "@langchain/core": "^0.3.39",
+    "@langchain/langgraph": "^0.2.45",
+    "@langchain/openai": "^0.4.3",
    "@radix-ui/react-label": "^2.1.2",
    "@radix-ui/react-slot": "^1.1.2",
+    "@react-three/drei": "^9.121.4",
+    "@react-three/fiber": "^9.0.0-rc.7",
+    "@types/three": "^0.173.0",
+    "ai": "^4.1.34",
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
+    "langchain": "^0.3.15",
+    "leva": "^0.10.0",
    "lucide-react": "^0.475.0",
+    "microsoft-cognitiveservices-speech-sdk": "^1.42.0",
    "next": "15.1.6",
    "next-themes": "^0.4.4",
+    "openai": "^4.83.0",
    "pino": "^9.6.0",
    "pino-pretty": "^13.0.0",
    "react": "^19.0.0",
@@ -25,7 +37,9 @@
    "sonner": "^1.7.4",
    "tailwind-merge": "^3.0.1",
    "tailwindcss-animate": "^1.0.7",
-    "zod": "^3.24.1"
+    "three": "^0.173.0",
+    "zod": "^3.24.1",
+    "zustand": "^5.0.3"
  },
  "devDependencies": {
    "@types/node": "^20",
@@ -41,5 +55,12 @@
    "prettier": "^3.4.2",
    "tailwindcss": "^3.4.1",
    "typescript": "^5"
+  },
+  "overrides": {
+    "@react-three/drei": {
+      "@react-three/fiber": "^9.0.0-rc.7",
+      "react": "^19.0.0",
+      "react-dom": "^19.0.0"
+    }
  }
 }
--- a/public/animations.glb
+++ b/public/animations.glb
--- a/public/briefIdleAnim.fbx
+++ b/public/briefIdleAnim.fbx
--- a/public/idleAnim.fbx
+++ b/public/idleAnim.fbx
--- a/public/mau.glb
+++ b/public/mau.glb
--- a/src/app/api/conversation/speech/route.ts
+++ b/src/app/api/conversation/speech/route.ts
@@ -0,0 +1,104 @@
+// import { PassThrough } from "stream";
+
+import { PassThrough } from "stream";
+
+import { AIMessage, HumanMessage } from "@langchain/core/messages";
+import { Messages } from "@langchain/langgraph";
+import * as sdk from "microsoft-cognitiveservices-speech-sdk";
+import OpenAI from "openai";
+
+import { graph } from "@/lib/graph";
+import logger from "@/lib/logger";
+
+const client = new OpenAI({
+  apiKey: process.env.OPENAI_API_KEY,
+});
+const speechConfig = sdk.SpeechConfig.fromSubscription(
+  process.env.SPEECH_KEY || "",
+  process.env.SPEECH_REGION || ""
+);
+speechConfig.speechSynthesisVoiceName = "en-US-BrianMultilingualNeural";
+
+// Allow streaming responses up to 30 seconds
+// export const maxDuration = 30;
+
+// Define POST method for chat route
+export async function POST(req: Request) {
+  const formData = await req.formData();
+  const audio = formData.get("audio") as File;
+  const messages = JSON.parse(formData.get("messages") as string);
+  logger.info(JSON.stringify(messages, null, 2));
+
+  //* Speech to text
+  const transcription = await client.audio.transcriptions.create({
+    file: audio,
+    model: "whisper-1",
+  });
+  logger.info(JSON.stringify(transcription, null, 2));
+  // create new message with transcription
+  const userMessage = {
+    role: "user",
+    content: transcription.text,
+    id: Date.now().toString(),
+  };
+  const updatedMessages = [...messages, userMessage];
+
+  //* Text to text
+  const allMessages: Messages = updatedMessages.map((message) =>
+    message.role === "user" ? new HumanMessage(message.content) : new AIMessage(message.content)
+  );
+  // Stream of messages
+  const result = await graph.invoke({ messages: allMessages });
+  const lastMessage = result.messages[result.messages.length - 1];
+
+  //* Text to speech (and visemes)
+  // Use Microsoft Speech SDK to synthesize speech and get visemes
+  const speechSynthesizer = new sdk.SpeechSynthesizer(speechConfig);
+  const visemes: [number, number][] = [];
+  speechSynthesizer.visemeReceived = function (s, e) {
+    // logger.info(
+    //   "(Viseme), Audio offset: " + e.audioOffset / 10000 + "ms. Viseme ID: " + e.visemeId
+    // );
+    visemes.push([e.audioOffset / 10000, e.visemeId]);
+  };
+  const audioStream = await new Promise((resolve, reject) => {
+    speechSynthesizer.speakTextAsync(
+      `${lastMessage.content}`,
+      (result) => {
+        const { audioData } = result;
+
+        speechSynthesizer.close();
+
+        // convert arrayBuffer to stream
+        const bufferStream = new PassThrough();
+        bufferStream.end(Buffer.from(audioData));
+        resolve(bufferStream);
+      },
+      (error) => {
+        logger.error(error);
+        speechSynthesizer.close();
+        reject(error);
+      }
+    );
+  });
+
+  //* Return processed response
+  logger.info(`Response: ${lastMessage.content}`);
+  const safeLastMessageContent = lastMessage.content
+    .toString()
+    .replace(/[\u2018\u2019]/g, "'")
+    .replace(/\u2014/g, "-");
+  return new Response(audioStream, {
+    headers: {
+      "Content-Type": "audio/mpeg",
+      "Content-Disposition": `inline; filename=tts.mp3`,
+      Visemes: JSON.stringify(visemes),
+      Result: JSON.stringify({
+        id: lastMessage.id,
+        role: "assistant",
+        content: safeLastMessageContent,
+      }),
+      UserMessage: JSON.stringify(userMessage),
+    },
+  });
+}
--- a/src/app/api/conversation/text/route.ts
+++ b/src/app/api/conversation/text/route.ts
@@ -0,0 +1,82 @@
+// import { PassThrough } from "stream";
+
+import { PassThrough } from "stream";
+
+import { AIMessage, HumanMessage } from "@langchain/core/messages";
+import { Messages } from "@langchain/langgraph";
+import { Message } from "ai";
+import * as sdk from "microsoft-cognitiveservices-speech-sdk";
+
+import { graph } from "@/lib/graph";
+import logger from "@/lib/logger";
+
+const speechConfig = sdk.SpeechConfig.fromSubscription(
+  process.env.SPEECH_KEY || "",
+  process.env.SPEECH_REGION || ""
+);
+speechConfig.speechSynthesisVoiceName = "en-US-BrianMultilingualNeural";
+
+// Allow streaming responses up to 30 seconds
+// export const maxDuration = 30;
+
+// Define POST method for chat route
+export async function POST(req: Request) {
+  const {
+    messages,
+  }: {
+    messages: Message[];
+  } = await req.json();
+
+  // TODO: Filter to only include last message when using langgraph memory
+  const allMessages: Messages = messages.map((message) =>
+    message.role === "user" ? new HumanMessage(message.content) : new AIMessage(message.content)
+  );
+
+  // Stream of messages
+  const result = await graph.invoke({ messages: allMessages });
+  const lastMessage = result.messages[result.messages.length - 1];
+
+  // Use Microsoft Speech SDK to synthesize speech and get visemes
+  const speechSynthesizer = new sdk.SpeechSynthesizer(speechConfig);
+  const visemes: [number, number][] = [];
+  speechSynthesizer.visemeReceived = function (s, e) {
+    // logger.info(
+    //   "(Viseme), Audio offset: " + e.audioOffset / 10000 + "ms. Viseme ID: " + e.visemeId
+    // );
+    visemes.push([e.audioOffset / 10000, e.visemeId]);
+  };
+  const audioStream = await new Promise((resolve, reject) => {
+    speechSynthesizer.speakTextAsync(
+      `${lastMessage.content}`,
+      (result) => {
+        const { audioData } = result;
+
+        speechSynthesizer.close();
+
+        // convert arrayBuffer to stream
+        const bufferStream = new PassThrough();
+        bufferStream.end(Buffer.from(audioData));
+        resolve(bufferStream);
+      },
+      (error) => {
+        logger.error(error);
+        speechSynthesizer.close();
+        reject(error);
+      }
+    );
+  });
+
+  logger.info(`Response: ${lastMessage.content}`);
+  return new Response(audioStream, {
+    headers: {
+      "Content-Type": "audio/mpeg",
+      "Content-Disposition": `inline; filename=tts.mp3`,
+      Visemes: JSON.stringify(visemes),
+      Message: JSON.stringify({
+        id: lastMessage.id,
+        role: "assistant",
+        content: lastMessage.content,
+      }),
+    },
+  });
+}
--- a/src/app/api/researcher/route.ts
+++ b/src/app/api/researcher/route.ts
@@ -1,13 +0,0 @@
-import { NextResponse } from "next/server";
-
-export async function POST(req: Request) {
-  const { message }: { message: string } = await req.json();
-  // // TODO: Filter to only include last message when using langgraph
-  // const allMessages: Messages = messages.map((message) =>
-  //   message.role === "user" ? new HumanMessage(message.content) : new AIMessage(message.content)
-  // );
-  // // Stream of messages
-  // const result = await app.invoke({ messages: allMessages });
-  // const lastMessage = result.messages[result.messages.length - 1];
-  return NextResponse.json({ message });
-}
--- a/src/app/api/stt/route.ts
+++ b/src/app/api/stt/route.ts
@@ -0,0 +1,30 @@
+import { NextResponse } from "next/server";
+import OpenAI from "openai";
+
+import logger from "@/lib/logger";
+
+const client = new OpenAI({
+  apiKey: process.env.OPENAI_API_KEY,
+});
+
+export async function POST(req: Request) {
+  const formData = await req.formData();
+  const audio = formData.get("audio") as File;
+  const messages = JSON.parse(formData.get("messages") as string);
+
+  try {
+    const transcription = await client.audio.transcriptions.create({
+      file: audio,
+      model: "whisper-1",
+    });
+
+    logger.info(JSON.stringify(transcription, null, 2));
+    logger.info(JSON.stringify(messages, null, 2));
+
+    // Return the transcription data as JSON
+    return NextResponse.json(transcription.text);
+  } catch (error) {
+    console.error("Error transcribing audio:", error);
+    return NextResponse.json({ error: "Transcription failed." }, { status: 500 });
+  }
+}
--- a/src/app/api/tts/route.ts
+++ b/src/app/api/tts/route.ts
--- a/src/app/globals.css
+++ b/src/app/globals.css
@@ -4,6 +4,18 @@

 body {
  font-family: Arial, Helvetica, sans-serif;
+  background-color: #8baaaa;
+  /* background-image: linear-gradient(to top, #6a85b6 0%, #bac8e0 100%); */
+  /* background-image: linear-gradient(15deg, #13547a 0%, #80d0c7 100%); */
+  /* background-image: linear-gradient(to right, #868f96 0%, #596164 100%); */
+  background-image: linear-gradient(45deg, #8baaaa 0%, #596164 100%);
+}
+
+html,
+body,
+#root {
+  height: 100%;
+  margin: unset;
 }

@layer base {
--- a/src/app/page.tsx
+++ b/src/app/page.tsx
@@ -1,9 +1,18 @@
+import AudioRecorder from "@/components/AudioRecorder";
+import Experience from "@/components/Experience";
 import SimpleInput from "@/components/SimpleInput";
+// import SimpleInput from "@/components/SimpleInput";

 export default function Home() {
  return (
-    <div className="flex min-h-screen items-center justify-center bg-gray-100">
-      <SimpleInput />
-    </div>
+    <>
+      <Experience />
+      <div className="absolute left-0 top-0 flex min-h-screen w-screen items-end justify-center gap-5 p-24">
+        <div className="flex items-center gap-5">
+          <SimpleInput />
+          <AudioRecorder />
+        </div>
+      </div>
+    </>
  );
 }
--- a/src/components/AudioRecorder.tsx
+++ b/src/components/AudioRecorder.tsx
@@ -0,0 +1,95 @@
+"use client";
+
+import { Mic, Square } from "lucide-react";
+import { useCallback, useEffect } from "react";
+
+import { useConversation } from "@/lib/store";
+
+import { useAudioRecorder } from "../hooks/useAudioRecorder";
+
+export default function AudioRecorder() {
+  const { isRecording, audioBlob, startRecording, stopRecording } = useAudioRecorder();
+  const messages = useConversation((state) => state.messages);
+  const addMessage = useConversation((state) => state.addMessage);
+  const setMessageResult = useConversation((state) => state.setMessageResult);
+
+  const handleRecordClick = () => {
+    if (isRecording) {
+      stopRecording();
+    } else {
+      startRecording();
+    }
+  };
+
+  const fetchConversation = useCallback(
+    async (audioBlob: Blob) => {
+      const formData = new FormData();
+      formData.append("audio", audioBlob, "audio.ogg");
+      formData.append("messages", JSON.stringify(messages));
+
+      try {
+        const response = await fetch("/api/conversation/speech", {
+          method: "POST",
+          body: formData,
+        });
+
+        const result = JSON.parse((await response.headers.get("result")) || "{}");
+        const userMessage = JSON.parse((await response.headers.get("usermessage")) || "{}");
+        const audio = await response.blob();
+        const visemes = JSON.parse((await response.headers.get("visemes")) || "[]");
+        const audioUrl = URL.createObjectURL(audio);
+        const audioPlayer = new Audio(audioUrl);
+
+        console.log(userMessage);
+        console.log(result);
+        console.log(visemes);
+
+        setMessageResult({
+          visemes,
+          audioPlayer,
+        });
+        audioPlayer.onended = () => {
+          setMessageResult(null);
+        };
+        audioPlayer.currentTime = 0;
+        audioPlayer.play();
+
+        addMessage(userMessage);
+        addMessage(result);
+      } catch (err) {
+        console.error("Error sending audio file:", err);
+      }
+    },
+    [messages]
+  );
+
+  useEffect(() => {
+    if (audioBlob) {
+      fetchConversation(audioBlob);
+    }
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [audioBlob]);
+
+  return (
+    <div>
+      <button
+        onClick={handleRecordClick}
+        className={`flex size-16 items-center justify-center rounded-full text-sm font-bold text-white transition-all duration-300 ease-in-out hover:scale-110 ${
+          isRecording
+            ? "animate-pulse bg-red-600"
+            : "bg-slate-900 shadow-lg hover:bg-slate-950 hover:shadow-xl"
+        }`}
+      >
+        {isRecording ? <Square className="size-6" /> : <Mic className="size-8" />}
+      </button>
+      {/* <p className="mt-4 text-xl font-semibold text-white">
+        {isRecording ? "Recording..." : "Tap to Record"}
+      </p> */}
+      {/* {hasRecorded && audioURL && (
+        <div className="mt-8">
+          <audio src={audioURL} controls className="w-64" />
+        </div>
+      )} */}
+    </div>
+  );
+}
--- a/src/components/Avatar.tsx
+++ b/src/components/Avatar.tsx
@@ -0,0 +1,208 @@
+/*
+Auto-generated by: https://github.com/pmndrs/gltfjsx
+*/
+
+import { useAnimations, useGLTF } from "@react-three/drei";
+import { useFrame } from "@react-three/fiber";
+import { useControls } from "leva";
+import { useEffect, useRef, useState } from "react";
+import * as THREE from "three";
+import { GLTF } from "three-stdlib";
+
+import { visemeList, visemesMapping } from "@/lib/constants";
+import { useConversation } from "@/lib/store";
+
+type GLTFResult = GLTF & {
+  nodes: {
+    EyeLeft: THREE.SkinnedMesh;
+    EyeRight: THREE.SkinnedMesh;
+    Wolf3D_Head: THREE.SkinnedMesh;
+    Wolf3D_Teeth: THREE.SkinnedMesh;
+    Wolf3D_Hair: THREE.SkinnedMesh;
+    Wolf3D_Glasses: THREE.SkinnedMesh;
+    Wolf3D_Body: THREE.SkinnedMesh;
+    Wolf3D_Outfit_Bottom: THREE.SkinnedMesh;
+    Wolf3D_Outfit_Footwear: THREE.SkinnedMesh;
+    Wolf3D_Outfit_Top: THREE.SkinnedMesh;
+    Hips: THREE.Bone;
+  };
+  materials: {
+    Wolf3D_Eye: THREE.MeshStandardMaterial;
+    Wolf3D_Skin: THREE.MeshStandardMaterial;
+    Wolf3D_Teeth: THREE.MeshStandardMaterial;
+    Wolf3D_Hair: THREE.MeshStandardMaterial;
+    Wolf3D_Glasses: THREE.MeshStandardMaterial;
+    Wolf3D_Body: THREE.MeshStandardMaterial;
+    Wolf3D_Outfit_Bottom: THREE.MeshStandardMaterial;
+    Wolf3D_Outfit_Footwear: THREE.MeshStandardMaterial;
+    Wolf3D_Outfit_Top: THREE.MeshStandardMaterial;
+  };
+};
+
+export function Avatar(props: JSX.IntrinsicElements["group"]) {
+  const groupRef = useRef(null);
+  const [animation, setAnimation] = useState("Idle");
+  const [blink, setBlink] = useState(false);
+  const message = useConversation((state) => state.messageResult);
+
+  const { nodes, materials, scene } = useGLTF("/mau.glb") as GLTFResult;
+  const { animations } = useGLTF("/animations.glb");
+  const { actions } = useAnimations(animations, groupRef);
+
+  useEffect(() => {
+    actions[animation].play();
+  }, [animation, actions]);
+
+  useEffect(() => {
+    let blinkTimeout;
+    const nextBlink = () => {
+      blinkTimeout = setTimeout(
+        () => {
+          setBlink(true);
+          setTimeout(() => {
+            setBlink(false);
+            nextBlink();
+          }, 200);
+        },
+        THREE.MathUtils.randInt(1000, 5000)
+      );
+    };
+    nextBlink();
+    return () => clearTimeout(blinkTimeout);
+  }, []);
+
+  useFrame((state, delta) => {
+    if (!message) {
+      lerpMorphTarget("mouthSmileLeft", 0.36, 0.5);
+      lerpMorphTarget("mouthSmileRight", 0.36, 0.5);
+    } else {
+      lerpMorphTarget("mouthSmileLeft", 0, 0.5);
+      lerpMorphTarget("mouthSmileRight", 0, 0.5);
+    }
+    lerpMorphTarget("eyeBlinkLeft", blink ? 1 : 0, 0.5);
+    lerpMorphTarget("eyeBlinkRight", blink ? 1 : 0, 0.5);
+
+    for (let i = 0; i <= 21; i++) {
+      lerpMorphTarget(visemeList[i], 0, 0.5); // reset morph targets
+    }
+
+    if (message && message.visemes && message.audioPlayer) {
+      for (let i = message.visemes.length - 1; i >= 0; i--) {
+        const viseme = message.visemes[i];
+        if (message.audioPlayer.currentTime * 1000 >= viseme[0]) {
+          const visemeMapped = visemesMapping[viseme[1]];
+          lerpMorphTarget(visemeMapped, 1, 0.5);
+          break;
+        }
+      }
+    }
+  });
+
+  const lerpMorphTarget = (target, value, speed = 0.1) => {
+    scene.traverse((child) => {
+      if (child.isSkinnedMesh && child.morphTargetDictionary) {
+        const index = child.morphTargetDictionary[target];
+        if (index === undefined || child.morphTargetInfluences[index] === undefined) {
+          return;
+        }
+        child.morphTargetInfluences[index] = THREE.MathUtils.lerp(
+          child.morphTargetInfluences[index],
+          value,
+          speed
+        );
+      }
+    });
+  };
+
+  const [, set] = useControls("MorphTarget", () =>
+    Object.assign(
+      {},
+      ...Object.keys(nodes.EyeLeft.morphTargetDictionary).map((key) => {
+        // if (key.startsWith("viseme")) {
+        return {
+          [key]: {
+            label: key,
+            value: 0,
+            min: nodes.EyeLeft.morphTargetInfluences[nodes.EyeLeft.morphTargetDictionary[key]],
+            max: 1,
+            onChange: (val) => {
+              lerpMorphTarget(key, val, 1);
+            },
+          },
+        };
+        // }
+      })
+    )
+  );
+
+  return (
+    <group {...props} ref={groupRef} dispose={null} position={[0, -1.6, -0.5]}>
+      <primitive object={nodes.Hips} />
+      <skinnedMesh
+        name="EyeLeft"
+        geometry={nodes.EyeLeft.geometry}
+        material={materials.Wolf3D_Eye}
+        skeleton={nodes.EyeLeft.skeleton}
+        morphTargetDictionary={nodes.EyeLeft.morphTargetDictionary}
+        morphTargetInfluences={nodes.EyeLeft.morphTargetInfluences}
+      />
+      <skinnedMesh
+        name="EyeRight"
+        geometry={nodes.EyeRight.geometry}
+        material={materials.Wolf3D_Eye}
+        skeleton={nodes.EyeRight.skeleton}
+        morphTargetDictionary={nodes.EyeRight.morphTargetDictionary}
+        morphTargetInfluences={nodes.EyeRight.morphTargetInfluences}
+      />
+      <skinnedMesh
+        name="Wolf3D_Head"
+        geometry={nodes.Wolf3D_Head.geometry}
+        material={materials.Wolf3D_Skin}
+        skeleton={nodes.Wolf3D_Head.skeleton}
+        morphTargetDictionary={nodes.Wolf3D_Head.morphTargetDictionary}
+        morphTargetInfluences={nodes.Wolf3D_Head.morphTargetInfluences}
+      />
+      <skinnedMesh
+        name="Wolf3D_Teeth"
+        geometry={nodes.Wolf3D_Teeth.geometry}
+        material={materials.Wolf3D_Teeth}
+        skeleton={nodes.Wolf3D_Teeth.skeleton}
+        morphTargetDictionary={nodes.Wolf3D_Teeth.morphTargetDictionary}
+        morphTargetInfluences={nodes.Wolf3D_Teeth.morphTargetInfluences}
+      />
+      <skinnedMesh
+        geometry={nodes.Wolf3D_Hair.geometry}
+        material={materials.Wolf3D_Hair}
+        skeleton={nodes.Wolf3D_Hair.skeleton}
+      />
+      <skinnedMesh
+        geometry={nodes.Wolf3D_Glasses.geometry}
+        material={materials.Wolf3D_Glasses}
+        skeleton={nodes.Wolf3D_Glasses.skeleton}
+      />
+      <skinnedMesh
+        geometry={nodes.Wolf3D_Body.geometry}
+        material={materials.Wolf3D_Body}
+        skeleton={nodes.Wolf3D_Body.skeleton}
+      />
+      <skinnedMesh
+        geometry={nodes.Wolf3D_Outfit_Bottom.geometry}
+        material={materials.Wolf3D_Outfit_Bottom}
+        skeleton={nodes.Wolf3D_Outfit_Bottom.skeleton}
+      />
+      <skinnedMesh
+        geometry={nodes.Wolf3D_Outfit_Footwear.geometry}
+        material={materials.Wolf3D_Outfit_Footwear}
+        skeleton={nodes.Wolf3D_Outfit_Footwear.skeleton}
+      />
+      <skinnedMesh
+        geometry={nodes.Wolf3D_Outfit_Top.geometry}
+        material={materials.Wolf3D_Outfit_Top}
+        skeleton={nodes.Wolf3D_Outfit_Top.skeleton}
+      />
+    </group>
+  );
+}
+
+useGLTF.preload("/mau.glb");
+useGLTF.preload("/animations.glb");
--- a/src/components/Experience.tsx
+++ b/src/components/Experience.tsx
@@ -0,0 +1,23 @@
+"use client";
+
+import { Environment, Loader } from "@react-three/drei";
+import { Canvas } from "@react-three/fiber";
+import { Leva } from "leva";
+
+import { Avatar } from "./Avatar";
+
+function Experience() {
+  return (
+    <>
+      <Loader />
+      <Leva hidden />
+      {/* <Canvas shadows camera={{ position: [0, 0, 1], fov: 30 }}> */}
+      <Canvas shadows camera={{ position: [0, 0, 1], fov: 30 }}>
+        <Environment preset="warehouse" />
+        <Avatar />
+      </Canvas>
+    </>
+  );
+}
+
+export default Experience;
--- a/src/components/SimpleInput.tsx
+++ b/src/components/SimpleInput.tsx
@@ -1,6 +1,7 @@
 "use client";

 import { zodResolver } from "@hookform/resolvers/zod";
+import { SendIcon } from "lucide-react";
 import { useState } from "react";
 import { useForm } from "react-hook-form";
 import { toast } from "sonner";
@@ -9,13 +10,25 @@ import { z } from "zod";
 import { Button } from "@/components/ui/button";
 import { Form, FormControl, FormField, FormItem, FormMessage } from "@/components/ui/form";
 import { Input } from "@/components/ui/input";
+import { useConversation } from "@/lib/store";

 const formSchema = z.object({
  message: z.string().min(2).max(50),
 });

+interface MessageType {
+  id: string;
+  role: string;
+  content: string;
+}
+
 export default function SimpleInput() {
  const [isSubmitting, setIsSubmitting] = useState(false);
+  // const [messages, setMessages] = useState<MessageType[]>([]);
+  const messages = useConversation((state) => state.messages);
+  const addMessage = useConversation((state) => state.addMessage);
+  const setMessageResult = useConversation((state) => state.setMessageResult);
+
  const form = useForm<z.infer<typeof formSchema>>({
    resolver: zodResolver(formSchema),
    defaultValues: {
@@ -25,12 +38,44 @@ export default function SimpleInput() {

  async function onSubmit(values: z.infer<typeof formSchema>) {
    setIsSubmitting(true);
-    const response = await fetch("http://localhost:3000/api/researcher", {
+
+    const userMessage = {
+      role: "user",
+      content: values.message,
+      id: Date.now().toString(),
+    };
+    addMessage(userMessage);
+    const updatedMessages: MessageType[] = [...messages, userMessage];
+
+    const response = await fetch("/api/conversation/text", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
-      body: JSON.stringify({ message: values.message }),
+      body: JSON.stringify({ messages: updatedMessages }),
    });
-    const data = await response.json();
+
+    const message = JSON.parse((await response.headers.get("message")) || "{}");
+    if (Object.keys(message).length === 0) {
+      console.error("No message returned from server");
+      setIsSubmitting(false);
+      return;
+    }
+
+    console.log(message);
+    const audio = await response.blob();
+    const visemes = JSON.parse((await response.headers.get("visemes")) || "[]");
+    const audioUrl = URL.createObjectURL(audio);
+    const audioPlayer = new Audio(audioUrl);
+    setMessageResult({
+      visemes,
+      audioPlayer,
+    });
+    audioPlayer.onended = () => {
+      setMessageResult(null);
+    };
+    audioPlayer.currentTime = 0;
+    audioPlayer.play();
+    console.log(visemes);
+    addMessage(message);

    toast(
      <div className="flex flex-col space-y-2">
@@ -38,7 +83,7 @@ export default function SimpleInput() {
          <code className="text-white">{JSON.stringify(values, null, 2)}</code>
        </pre>
        <pre className="mt-2 w-[320px] rounded-md bg-gray-200 p-4">
-          <code className="text-black">{data.message}</code>
+          <code className="text-black">{JSON.stringify(message, null, 2)}</code>
        </pre>
      </div>
    );
@@ -48,10 +93,7 @@ export default function SimpleInput() {

  return (
    <Form {...form}>
-      <form
-        onSubmit={form.handleSubmit(onSubmit)}
-        className="flex w-full max-w-sm items-center space-x-2"
-      >
+      <form onSubmit={form.handleSubmit(onSubmit)} className="flex w-80 items-center space-x-2">
        <FormField
          control={form.control}
          name="message"
@@ -59,7 +101,7 @@ export default function SimpleInput() {
            <FormItem className="grow">
              {/* <FormLabel>Message</FormLabel> */}
              <FormControl>
-                <Input placeholder="message..." {...field} disabled={isSubmitting} />
+                <Input placeholder="Talk to me..." {...field} disabled={isSubmitting} />
              </FormControl>
              {/* <FormDescription>This is your public display name.</FormDescription> */}
              <FormMessage />
@@ -67,7 +109,7 @@ export default function SimpleInput() {
          )}
        />
        <Button type="submit" disabled={isSubmitting}>
-          Submit
+          <SendIcon strokeWidth={3} />
        </Button>
      </form>
    </Form>
--- a/src/hooks/useAudioRecorder.ts
+++ b/src/hooks/useAudioRecorder.ts
@@ -0,0 +1,40 @@
+"use client";
+
+import { useState, useCallback } from "react";
+
+export const useAudioRecorder = () => {
+  const [isRecording, setIsRecording] = useState(false);
+  // const [audioURL, setAudioURL] = useState<string | null>(null);
+  const [audioBlob, setAudioBlob] = useState<Blob | null>(null);
+  const [mediaRecorder, setMediaRecorder] = useState<MediaRecorder | null>(null);
+
+  const startRecording = useCallback(() => {
+    navigator.mediaDevices
+      .getUserMedia({ audio: true })
+      .then((stream) => {
+        const recorder = new MediaRecorder(stream);
+        setMediaRecorder(recorder);
+
+        const chunks: Blob[] = [];
+        recorder.ondataavailable = (e) => chunks.push(e.data);
+        recorder.onstop = () => {
+          const blob = new Blob(chunks, { type: "audio/ogg; codecs=opus" });
+          // setAudioURL(URL.createObjectURL(blob));
+          setAudioBlob(blob);
+        };
+
+        recorder.start();
+        setIsRecording(true);
+      })
+      .catch((err) => console.error("Error accessing microphone:", err));
+  }, []);
+
+  const stopRecording = useCallback(() => {
+    if (mediaRecorder) {
+      mediaRecorder.stop();
+      setIsRecording(false);
+    }
+  }, [mediaRecorder]);
+
+  return { isRecording, audioBlob, startRecording, stopRecording };
+};
--- a/src/lib/constants.ts
+++ b/src/lib/constants.ts
@@ -0,0 +1,42 @@
+export const visemesMapping = [
+  "viseme_sil",
+  "viseme_aa",
+  "viseme_aa",
+  "viseme_O",
+  "viseme_E",
+  "viseme_E",
+  "viseme_I",
+  "viseme_U",
+  "viseme_O",
+  "viseme_aa",
+  "viseme_O",
+  "viseme_I",
+  "viseme_DD",
+  "viseme_RR",
+  "viseme_nn",
+  "viseme_SS",
+  "viseme_U",
+  "viseme_TH",
+  "viseme_FF",
+  "viseme_DD",
+  "viseme_kk",
+  "viseme_PP",
+];
+
+export const visemeList = [
+  "viseme_sil",
+  "viseme_PP",
+  "viseme_FF",
+  "viseme_TH",
+  "viseme_DD",
+  "viseme_kk",
+  "viseme_CH",
+  "viseme_SS",
+  "viseme_nn",
+  "viseme_RR",
+  "viseme_aa",
+  "viseme_E",
+  "viseme_I",
+  "viseme_O",
+  "viseme_U",
+];
--- a/src/lib/graph.ts
+++ b/src/lib/graph.ts
@@ -0,0 +1,39 @@
+import { ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";
+import { MessagesAnnotation, StateGraph } from "@langchain/langgraph";
+import { ChatOpenAI } from "@langchain/openai";
+
+// Init llm model
+const llm = new ChatOpenAI({
+  model: "gpt-4o-mini",
+});
+
+// Define system prompt
+const prompt = ChatPromptTemplate.fromMessages([
+  [
+    "system",
+    `You are a conversational AI assistant that provides information about anything that the user wants. 
+    Provide short answers to give a concise response to the user and go directly to the point.
+    Don't provide long answers or give numbered or bulleted lists. Your answer should span from a single sentence to a single paragraph.
+    Alwatys be polite and some times throw a joke if it is appropriate in the response.
+    `,
+  ],
+  new MessagesPlaceholder("messages"),
+]);
+
+// Nodes
+async function chatModelNode(state: typeof MessagesAnnotation.State) {
+  const chain = prompt.pipe(llm);
+  const response = await chain.invoke({
+    messages: state.messages,
+  });
+  // logger.info(response);
+  return { messages: [response] };
+}
+
+// Define the graph
+const builder = new StateGraph(MessagesAnnotation)
+  .addNode("agent", chatModelNode)
+  .addEdge("__start__", "agent")
+  .addEdge("agent", "__end__");
+
+export const graph = builder.compile();
--- a/src/lib/store.ts
+++ b/src/lib/store.ts
@@ -0,0 +1,26 @@
+import { create } from "zustand";
+
+interface MessageResultType {
+  visemes: [number, number][];
+  audioPlayer: HTMLAudioElement;
+}
+
+interface MessageType {
+  role: string;
+  content: string;
+  id: string;
+}
+
+interface ConversationState {
+  messageResult: MessageResultType | null;
+  messages: MessageType[];
+  setMessageResult: (message: MessageResultType | null) => void;
+  addMessage: (message: MessageType) => void;
+}
+
+export const useConversation = create<ConversationState>()((set) => ({
+  messageResult: null,
+  messages: [],
+  setMessageResult: (messageResult) => set({ messageResult }),
+  addMessage: (message) => set((state) => ({ messages: [...state.messages, message] })),
+}));