Complete mvp for a conversational ai avatar
This commit is contained in:
29
.vscode/settings.json
vendored
Normal file
29
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"editor.formatOnSave": true,
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.fixAll.eslint": "explicit",
|
||||
"source.addMissingImports": "explicit"
|
||||
},
|
||||
"prettier.tabWidth": 2,
|
||||
"prettier.useTabs": false,
|
||||
"prettier.semi": true,
|
||||
"prettier.singleQuote": false,
|
||||
"prettier.jsxSingleQuote": false,
|
||||
"prettier.trailingComma": "es5",
|
||||
"prettier.arrowParens": "always",
|
||||
"prettier.printWidth": 100,
|
||||
"[json]": {
|
||||
"editor.defaultFormatter": "esbenp.prettier-vscode"
|
||||
},
|
||||
"[typescript]": {
|
||||
"editor.defaultFormatter": "esbenp.prettier-vscode"
|
||||
},
|
||||
"[typescriptreact]": {
|
||||
"editor.defaultFormatter": "esbenp.prettier-vscode"
|
||||
},
|
||||
"[javascriptreact]": {
|
||||
"editor.defaultFormatter": "esbenp.prettier-vscode"
|
||||
},
|
||||
"typescript.tsdk": "node_modules/typescript/lib",
|
||||
"eslint.workingDirectories": ["./webapp"],
|
||||
}
|
||||
3715
package-lock.json
generated
3715
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
25
package.json
25
package.json
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"name": "researcher",
|
||||
"name": "ai-avatar",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
@@ -10,13 +10,25 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@hookform/resolvers": "^3.10.0",
|
||||
"@langchain/community": "^0.3.29",
|
||||
"@langchain/core": "^0.3.39",
|
||||
"@langchain/langgraph": "^0.2.45",
|
||||
"@langchain/openai": "^0.4.3",
|
||||
"@radix-ui/react-label": "^2.1.2",
|
||||
"@radix-ui/react-slot": "^1.1.2",
|
||||
"@react-three/drei": "^9.121.4",
|
||||
"@react-three/fiber": "^9.0.0-rc.7",
|
||||
"@types/three": "^0.173.0",
|
||||
"ai": "^4.1.34",
|
||||
"class-variance-authority": "^0.7.1",
|
||||
"clsx": "^2.1.1",
|
||||
"langchain": "^0.3.15",
|
||||
"leva": "^0.10.0",
|
||||
"lucide-react": "^0.475.0",
|
||||
"microsoft-cognitiveservices-speech-sdk": "^1.42.0",
|
||||
"next": "15.1.6",
|
||||
"next-themes": "^0.4.4",
|
||||
"openai": "^4.83.0",
|
||||
"pino": "^9.6.0",
|
||||
"pino-pretty": "^13.0.0",
|
||||
"react": "^19.0.0",
|
||||
@@ -25,7 +37,9 @@
|
||||
"sonner": "^1.7.4",
|
||||
"tailwind-merge": "^3.0.1",
|
||||
"tailwindcss-animate": "^1.0.7",
|
||||
"zod": "^3.24.1"
|
||||
"three": "^0.173.0",
|
||||
"zod": "^3.24.1",
|
||||
"zustand": "^5.0.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20",
|
||||
@@ -41,5 +55,12 @@
|
||||
"prettier": "^3.4.2",
|
||||
"tailwindcss": "^3.4.1",
|
||||
"typescript": "^5"
|
||||
},
|
||||
"overrides": {
|
||||
"@react-three/drei": {
|
||||
"@react-three/fiber": "^9.0.0-rc.7",
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.0.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BIN
public/animations.glb
Normal file
BIN
public/animations.glb
Normal file
Binary file not shown.
BIN
public/briefIdleAnim.fbx
Normal file
BIN
public/briefIdleAnim.fbx
Normal file
Binary file not shown.
BIN
public/idleAnim.fbx
Normal file
BIN
public/idleAnim.fbx
Normal file
Binary file not shown.
BIN
public/mau.glb
Normal file
BIN
public/mau.glb
Normal file
Binary file not shown.
104
src/app/api/conversation/speech/route.ts
Normal file
104
src/app/api/conversation/speech/route.ts
Normal file
@@ -0,0 +1,104 @@
|
||||
// import { PassThrough } from "stream";
|
||||
|
||||
import { PassThrough } from "stream";
|
||||
|
||||
import { AIMessage, HumanMessage } from "@langchain/core/messages";
|
||||
import { Messages } from "@langchain/langgraph";
|
||||
import * as sdk from "microsoft-cognitiveservices-speech-sdk";
|
||||
import OpenAI from "openai";
|
||||
|
||||
import { graph } from "@/lib/graph";
|
||||
import logger from "@/lib/logger";
|
||||
|
||||
const client = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
const speechConfig = sdk.SpeechConfig.fromSubscription(
|
||||
process.env.SPEECH_KEY || "",
|
||||
process.env.SPEECH_REGION || ""
|
||||
);
|
||||
speechConfig.speechSynthesisVoiceName = "en-US-BrianMultilingualNeural";
|
||||
|
||||
// Allow streaming responses up to 30 seconds
|
||||
// export const maxDuration = 30;
|
||||
|
||||
// Define POST method for chat route
|
||||
export async function POST(req: Request) {
|
||||
const formData = await req.formData();
|
||||
const audio = formData.get("audio") as File;
|
||||
const messages = JSON.parse(formData.get("messages") as string);
|
||||
logger.info(JSON.stringify(messages, null, 2));
|
||||
|
||||
//* Speech to text
|
||||
const transcription = await client.audio.transcriptions.create({
|
||||
file: audio,
|
||||
model: "whisper-1",
|
||||
});
|
||||
logger.info(JSON.stringify(transcription, null, 2));
|
||||
// create new message with transcription
|
||||
const userMessage = {
|
||||
role: "user",
|
||||
content: transcription.text,
|
||||
id: Date.now().toString(),
|
||||
};
|
||||
const updatedMessages = [...messages, userMessage];
|
||||
|
||||
//* Text to text
|
||||
const allMessages: Messages = updatedMessages.map((message) =>
|
||||
message.role === "user" ? new HumanMessage(message.content) : new AIMessage(message.content)
|
||||
);
|
||||
// Stream of messages
|
||||
const result = await graph.invoke({ messages: allMessages });
|
||||
const lastMessage = result.messages[result.messages.length - 1];
|
||||
|
||||
//* Text to speech (and visemes)
|
||||
// Use Microsoft Speech SDK to synthesize speech and get visemes
|
||||
const speechSynthesizer = new sdk.SpeechSynthesizer(speechConfig);
|
||||
const visemes: [number, number][] = [];
|
||||
speechSynthesizer.visemeReceived = function (s, e) {
|
||||
// logger.info(
|
||||
// "(Viseme), Audio offset: " + e.audioOffset / 10000 + "ms. Viseme ID: " + e.visemeId
|
||||
// );
|
||||
visemes.push([e.audioOffset / 10000, e.visemeId]);
|
||||
};
|
||||
const audioStream = await new Promise((resolve, reject) => {
|
||||
speechSynthesizer.speakTextAsync(
|
||||
`${lastMessage.content}`,
|
||||
(result) => {
|
||||
const { audioData } = result;
|
||||
|
||||
speechSynthesizer.close();
|
||||
|
||||
// convert arrayBuffer to stream
|
||||
const bufferStream = new PassThrough();
|
||||
bufferStream.end(Buffer.from(audioData));
|
||||
resolve(bufferStream);
|
||||
},
|
||||
(error) => {
|
||||
logger.error(error);
|
||||
speechSynthesizer.close();
|
||||
reject(error);
|
||||
}
|
||||
);
|
||||
});
|
||||
|
||||
//* Return processed response
|
||||
logger.info(`Response: ${lastMessage.content}`);
|
||||
const safeLastMessageContent = lastMessage.content
|
||||
.toString()
|
||||
.replace(/[\u2018\u2019]/g, "'")
|
||||
.replace(/\u2014/g, "-");
|
||||
return new Response(audioStream, {
|
||||
headers: {
|
||||
"Content-Type": "audio/mpeg",
|
||||
"Content-Disposition": `inline; filename=tts.mp3`,
|
||||
Visemes: JSON.stringify(visemes),
|
||||
Result: JSON.stringify({
|
||||
id: lastMessage.id,
|
||||
role: "assistant",
|
||||
content: safeLastMessageContent,
|
||||
}),
|
||||
UserMessage: JSON.stringify(userMessage),
|
||||
},
|
||||
});
|
||||
}
|
||||
82
src/app/api/conversation/text/route.ts
Normal file
82
src/app/api/conversation/text/route.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
// import { PassThrough } from "stream";
|
||||
|
||||
import { PassThrough } from "stream";
|
||||
|
||||
import { AIMessage, HumanMessage } from "@langchain/core/messages";
|
||||
import { Messages } from "@langchain/langgraph";
|
||||
import { Message } from "ai";
|
||||
import * as sdk from "microsoft-cognitiveservices-speech-sdk";
|
||||
|
||||
import { graph } from "@/lib/graph";
|
||||
import logger from "@/lib/logger";
|
||||
|
||||
const speechConfig = sdk.SpeechConfig.fromSubscription(
|
||||
process.env.SPEECH_KEY || "",
|
||||
process.env.SPEECH_REGION || ""
|
||||
);
|
||||
speechConfig.speechSynthesisVoiceName = "en-US-BrianMultilingualNeural";
|
||||
|
||||
// Allow streaming responses up to 30 seconds
|
||||
// export const maxDuration = 30;
|
||||
|
||||
// Define POST method for chat route
|
||||
export async function POST(req: Request) {
|
||||
const {
|
||||
messages,
|
||||
}: {
|
||||
messages: Message[];
|
||||
} = await req.json();
|
||||
|
||||
// TODO: Filter to only include last message when using langgraph memory
|
||||
const allMessages: Messages = messages.map((message) =>
|
||||
message.role === "user" ? new HumanMessage(message.content) : new AIMessage(message.content)
|
||||
);
|
||||
|
||||
// Stream of messages
|
||||
const result = await graph.invoke({ messages: allMessages });
|
||||
const lastMessage = result.messages[result.messages.length - 1];
|
||||
|
||||
// Use Microsoft Speech SDK to synthesize speech and get visemes
|
||||
const speechSynthesizer = new sdk.SpeechSynthesizer(speechConfig);
|
||||
const visemes: [number, number][] = [];
|
||||
speechSynthesizer.visemeReceived = function (s, e) {
|
||||
// logger.info(
|
||||
// "(Viseme), Audio offset: " + e.audioOffset / 10000 + "ms. Viseme ID: " + e.visemeId
|
||||
// );
|
||||
visemes.push([e.audioOffset / 10000, e.visemeId]);
|
||||
};
|
||||
const audioStream = await new Promise((resolve, reject) => {
|
||||
speechSynthesizer.speakTextAsync(
|
||||
`${lastMessage.content}`,
|
||||
(result) => {
|
||||
const { audioData } = result;
|
||||
|
||||
speechSynthesizer.close();
|
||||
|
||||
// convert arrayBuffer to stream
|
||||
const bufferStream = new PassThrough();
|
||||
bufferStream.end(Buffer.from(audioData));
|
||||
resolve(bufferStream);
|
||||
},
|
||||
(error) => {
|
||||
logger.error(error);
|
||||
speechSynthesizer.close();
|
||||
reject(error);
|
||||
}
|
||||
);
|
||||
});
|
||||
|
||||
logger.info(`Response: ${lastMessage.content}`);
|
||||
return new Response(audioStream, {
|
||||
headers: {
|
||||
"Content-Type": "audio/mpeg",
|
||||
"Content-Disposition": `inline; filename=tts.mp3`,
|
||||
Visemes: JSON.stringify(visemes),
|
||||
Message: JSON.stringify({
|
||||
id: lastMessage.id,
|
||||
role: "assistant",
|
||||
content: lastMessage.content,
|
||||
}),
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
import { NextResponse } from "next/server";
|
||||
|
||||
export async function POST(req: Request) {
|
||||
const { message }: { message: string } = await req.json();
|
||||
// // TODO: Filter to only include last message when using langgraph
|
||||
// const allMessages: Messages = messages.map((message) =>
|
||||
// message.role === "user" ? new HumanMessage(message.content) : new AIMessage(message.content)
|
||||
// );
|
||||
// // Stream of messages
|
||||
// const result = await app.invoke({ messages: allMessages });
|
||||
// const lastMessage = result.messages[result.messages.length - 1];
|
||||
return NextResponse.json({ message });
|
||||
}
|
||||
30
src/app/api/stt/route.ts
Normal file
30
src/app/api/stt/route.ts
Normal file
@@ -0,0 +1,30 @@
|
||||
import { NextResponse } from "next/server";
|
||||
import OpenAI from "openai";
|
||||
|
||||
import logger from "@/lib/logger";
|
||||
|
||||
const client = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
export async function POST(req: Request) {
|
||||
const formData = await req.formData();
|
||||
const audio = formData.get("audio") as File;
|
||||
const messages = JSON.parse(formData.get("messages") as string);
|
||||
|
||||
try {
|
||||
const transcription = await client.audio.transcriptions.create({
|
||||
file: audio,
|
||||
model: "whisper-1",
|
||||
});
|
||||
|
||||
logger.info(JSON.stringify(transcription, null, 2));
|
||||
logger.info(JSON.stringify(messages, null, 2));
|
||||
|
||||
// Return the transcription data as JSON
|
||||
return NextResponse.json(transcription.text);
|
||||
} catch (error) {
|
||||
console.error("Error transcribing audio:", error);
|
||||
return NextResponse.json({ error: "Transcription failed." }, { status: 500 });
|
||||
}
|
||||
}
|
||||
0
src/app/api/tts/route.ts
Normal file
0
src/app/api/tts/route.ts
Normal file
@@ -4,6 +4,18 @@
|
||||
|
||||
body {
|
||||
font-family: Arial, Helvetica, sans-serif;
|
||||
background-color: #8baaaa;
|
||||
/* background-image: linear-gradient(to top, #6a85b6 0%, #bac8e0 100%); */
|
||||
/* background-image: linear-gradient(15deg, #13547a 0%, #80d0c7 100%); */
|
||||
/* background-image: linear-gradient(to right, #868f96 0%, #596164 100%); */
|
||||
background-image: linear-gradient(45deg, #8baaaa 0%, #596164 100%);
|
||||
}
|
||||
|
||||
html,
|
||||
body,
|
||||
#root {
|
||||
height: 100%;
|
||||
margin: unset;
|
||||
}
|
||||
|
||||
@layer base {
|
||||
|
||||
@@ -1,9 +1,18 @@
|
||||
import AudioRecorder from "@/components/AudioRecorder";
|
||||
import Experience from "@/components/Experience";
|
||||
import SimpleInput from "@/components/SimpleInput";
|
||||
// import SimpleInput from "@/components/SimpleInput";
|
||||
|
||||
export default function Home() {
|
||||
return (
|
||||
<div className="flex min-h-screen items-center justify-center bg-gray-100">
|
||||
<SimpleInput />
|
||||
</div>
|
||||
<>
|
||||
<Experience />
|
||||
<div className="absolute left-0 top-0 flex min-h-screen w-screen items-end justify-center gap-5 p-24">
|
||||
<div className="flex items-center gap-5">
|
||||
<SimpleInput />
|
||||
<AudioRecorder />
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
95
src/components/AudioRecorder.tsx
Normal file
95
src/components/AudioRecorder.tsx
Normal file
@@ -0,0 +1,95 @@
|
||||
"use client";
|
||||
|
||||
import { Mic, Square } from "lucide-react";
|
||||
import { useCallback, useEffect } from "react";
|
||||
|
||||
import { useConversation } from "@/lib/store";
|
||||
|
||||
import { useAudioRecorder } from "../hooks/useAudioRecorder";
|
||||
|
||||
export default function AudioRecorder() {
|
||||
const { isRecording, audioBlob, startRecording, stopRecording } = useAudioRecorder();
|
||||
const messages = useConversation((state) => state.messages);
|
||||
const addMessage = useConversation((state) => state.addMessage);
|
||||
const setMessageResult = useConversation((state) => state.setMessageResult);
|
||||
|
||||
const handleRecordClick = () => {
|
||||
if (isRecording) {
|
||||
stopRecording();
|
||||
} else {
|
||||
startRecording();
|
||||
}
|
||||
};
|
||||
|
||||
const fetchConversation = useCallback(
|
||||
async (audioBlob: Blob) => {
|
||||
const formData = new FormData();
|
||||
formData.append("audio", audioBlob, "audio.ogg");
|
||||
formData.append("messages", JSON.stringify(messages));
|
||||
|
||||
try {
|
||||
const response = await fetch("/api/conversation/speech", {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
|
||||
const result = JSON.parse((await response.headers.get("result")) || "{}");
|
||||
const userMessage = JSON.parse((await response.headers.get("usermessage")) || "{}");
|
||||
const audio = await response.blob();
|
||||
const visemes = JSON.parse((await response.headers.get("visemes")) || "[]");
|
||||
const audioUrl = URL.createObjectURL(audio);
|
||||
const audioPlayer = new Audio(audioUrl);
|
||||
|
||||
console.log(userMessage);
|
||||
console.log(result);
|
||||
console.log(visemes);
|
||||
|
||||
setMessageResult({
|
||||
visemes,
|
||||
audioPlayer,
|
||||
});
|
||||
audioPlayer.onended = () => {
|
||||
setMessageResult(null);
|
||||
};
|
||||
audioPlayer.currentTime = 0;
|
||||
audioPlayer.play();
|
||||
|
||||
addMessage(userMessage);
|
||||
addMessage(result);
|
||||
} catch (err) {
|
||||
console.error("Error sending audio file:", err);
|
||||
}
|
||||
},
|
||||
[messages]
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
if (audioBlob) {
|
||||
fetchConversation(audioBlob);
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [audioBlob]);
|
||||
|
||||
return (
|
||||
<div>
|
||||
<button
|
||||
onClick={handleRecordClick}
|
||||
className={`flex size-16 items-center justify-center rounded-full text-sm font-bold text-white transition-all duration-300 ease-in-out hover:scale-110 ${
|
||||
isRecording
|
||||
? "animate-pulse bg-red-600"
|
||||
: "bg-slate-900 shadow-lg hover:bg-slate-950 hover:shadow-xl"
|
||||
}`}
|
||||
>
|
||||
{isRecording ? <Square className="size-6" /> : <Mic className="size-8" />}
|
||||
</button>
|
||||
{/* <p className="mt-4 text-xl font-semibold text-white">
|
||||
{isRecording ? "Recording..." : "Tap to Record"}
|
||||
</p> */}
|
||||
{/* {hasRecorded && audioURL && (
|
||||
<div className="mt-8">
|
||||
<audio src={audioURL} controls className="w-64" />
|
||||
</div>
|
||||
)} */}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
208
src/components/Avatar.tsx
Normal file
208
src/components/Avatar.tsx
Normal file
@@ -0,0 +1,208 @@
|
||||
/*
|
||||
Auto-generated by: https://github.com/pmndrs/gltfjsx
|
||||
*/
|
||||
|
||||
import { useAnimations, useGLTF } from "@react-three/drei";
|
||||
import { useFrame } from "@react-three/fiber";
|
||||
import { useControls } from "leva";
|
||||
import { useEffect, useRef, useState } from "react";
|
||||
import * as THREE from "three";
|
||||
import { GLTF } from "three-stdlib";
|
||||
|
||||
import { visemeList, visemesMapping } from "@/lib/constants";
|
||||
import { useConversation } from "@/lib/store";
|
||||
|
||||
type GLTFResult = GLTF & {
|
||||
nodes: {
|
||||
EyeLeft: THREE.SkinnedMesh;
|
||||
EyeRight: THREE.SkinnedMesh;
|
||||
Wolf3D_Head: THREE.SkinnedMesh;
|
||||
Wolf3D_Teeth: THREE.SkinnedMesh;
|
||||
Wolf3D_Hair: THREE.SkinnedMesh;
|
||||
Wolf3D_Glasses: THREE.SkinnedMesh;
|
||||
Wolf3D_Body: THREE.SkinnedMesh;
|
||||
Wolf3D_Outfit_Bottom: THREE.SkinnedMesh;
|
||||
Wolf3D_Outfit_Footwear: THREE.SkinnedMesh;
|
||||
Wolf3D_Outfit_Top: THREE.SkinnedMesh;
|
||||
Hips: THREE.Bone;
|
||||
};
|
||||
materials: {
|
||||
Wolf3D_Eye: THREE.MeshStandardMaterial;
|
||||
Wolf3D_Skin: THREE.MeshStandardMaterial;
|
||||
Wolf3D_Teeth: THREE.MeshStandardMaterial;
|
||||
Wolf3D_Hair: THREE.MeshStandardMaterial;
|
||||
Wolf3D_Glasses: THREE.MeshStandardMaterial;
|
||||
Wolf3D_Body: THREE.MeshStandardMaterial;
|
||||
Wolf3D_Outfit_Bottom: THREE.MeshStandardMaterial;
|
||||
Wolf3D_Outfit_Footwear: THREE.MeshStandardMaterial;
|
||||
Wolf3D_Outfit_Top: THREE.MeshStandardMaterial;
|
||||
};
|
||||
};
|
||||
|
||||
export function Avatar(props: JSX.IntrinsicElements["group"]) {
|
||||
const groupRef = useRef(null);
|
||||
const [animation, setAnimation] = useState("Idle");
|
||||
const [blink, setBlink] = useState(false);
|
||||
const message = useConversation((state) => state.messageResult);
|
||||
|
||||
const { nodes, materials, scene } = useGLTF("/mau.glb") as GLTFResult;
|
||||
const { animations } = useGLTF("/animations.glb");
|
||||
const { actions } = useAnimations(animations, groupRef);
|
||||
|
||||
useEffect(() => {
|
||||
actions[animation].play();
|
||||
}, [animation, actions]);
|
||||
|
||||
useEffect(() => {
|
||||
let blinkTimeout;
|
||||
const nextBlink = () => {
|
||||
blinkTimeout = setTimeout(
|
||||
() => {
|
||||
setBlink(true);
|
||||
setTimeout(() => {
|
||||
setBlink(false);
|
||||
nextBlink();
|
||||
}, 200);
|
||||
},
|
||||
THREE.MathUtils.randInt(1000, 5000)
|
||||
);
|
||||
};
|
||||
nextBlink();
|
||||
return () => clearTimeout(blinkTimeout);
|
||||
}, []);
|
||||
|
||||
useFrame((state, delta) => {
|
||||
if (!message) {
|
||||
lerpMorphTarget("mouthSmileLeft", 0.36, 0.5);
|
||||
lerpMorphTarget("mouthSmileRight", 0.36, 0.5);
|
||||
} else {
|
||||
lerpMorphTarget("mouthSmileLeft", 0, 0.5);
|
||||
lerpMorphTarget("mouthSmileRight", 0, 0.5);
|
||||
}
|
||||
lerpMorphTarget("eyeBlinkLeft", blink ? 1 : 0, 0.5);
|
||||
lerpMorphTarget("eyeBlinkRight", blink ? 1 : 0, 0.5);
|
||||
|
||||
for (let i = 0; i <= 21; i++) {
|
||||
lerpMorphTarget(visemeList[i], 0, 0.5); // reset morph targets
|
||||
}
|
||||
|
||||
if (message && message.visemes && message.audioPlayer) {
|
||||
for (let i = message.visemes.length - 1; i >= 0; i--) {
|
||||
const viseme = message.visemes[i];
|
||||
if (message.audioPlayer.currentTime * 1000 >= viseme[0]) {
|
||||
const visemeMapped = visemesMapping[viseme[1]];
|
||||
lerpMorphTarget(visemeMapped, 1, 0.5);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const lerpMorphTarget = (target, value, speed = 0.1) => {
|
||||
scene.traverse((child) => {
|
||||
if (child.isSkinnedMesh && child.morphTargetDictionary) {
|
||||
const index = child.morphTargetDictionary[target];
|
||||
if (index === undefined || child.morphTargetInfluences[index] === undefined) {
|
||||
return;
|
||||
}
|
||||
child.morphTargetInfluences[index] = THREE.MathUtils.lerp(
|
||||
child.morphTargetInfluences[index],
|
||||
value,
|
||||
speed
|
||||
);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const [, set] = useControls("MorphTarget", () =>
|
||||
Object.assign(
|
||||
{},
|
||||
...Object.keys(nodes.EyeLeft.morphTargetDictionary).map((key) => {
|
||||
// if (key.startsWith("viseme")) {
|
||||
return {
|
||||
[key]: {
|
||||
label: key,
|
||||
value: 0,
|
||||
min: nodes.EyeLeft.morphTargetInfluences[nodes.EyeLeft.morphTargetDictionary[key]],
|
||||
max: 1,
|
||||
onChange: (val) => {
|
||||
lerpMorphTarget(key, val, 1);
|
||||
},
|
||||
},
|
||||
};
|
||||
// }
|
||||
})
|
||||
)
|
||||
);
|
||||
|
||||
return (
|
||||
<group {...props} ref={groupRef} dispose={null} position={[0, -1.6, -0.5]}>
|
||||
<primitive object={nodes.Hips} />
|
||||
<skinnedMesh
|
||||
name="EyeLeft"
|
||||
geometry={nodes.EyeLeft.geometry}
|
||||
material={materials.Wolf3D_Eye}
|
||||
skeleton={nodes.EyeLeft.skeleton}
|
||||
morphTargetDictionary={nodes.EyeLeft.morphTargetDictionary}
|
||||
morphTargetInfluences={nodes.EyeLeft.morphTargetInfluences}
|
||||
/>
|
||||
<skinnedMesh
|
||||
name="EyeRight"
|
||||
geometry={nodes.EyeRight.geometry}
|
||||
material={materials.Wolf3D_Eye}
|
||||
skeleton={nodes.EyeRight.skeleton}
|
||||
morphTargetDictionary={nodes.EyeRight.morphTargetDictionary}
|
||||
morphTargetInfluences={nodes.EyeRight.morphTargetInfluences}
|
||||
/>
|
||||
<skinnedMesh
|
||||
name="Wolf3D_Head"
|
||||
geometry={nodes.Wolf3D_Head.geometry}
|
||||
material={materials.Wolf3D_Skin}
|
||||
skeleton={nodes.Wolf3D_Head.skeleton}
|
||||
morphTargetDictionary={nodes.Wolf3D_Head.morphTargetDictionary}
|
||||
morphTargetInfluences={nodes.Wolf3D_Head.morphTargetInfluences}
|
||||
/>
|
||||
<skinnedMesh
|
||||
name="Wolf3D_Teeth"
|
||||
geometry={nodes.Wolf3D_Teeth.geometry}
|
||||
material={materials.Wolf3D_Teeth}
|
||||
skeleton={nodes.Wolf3D_Teeth.skeleton}
|
||||
morphTargetDictionary={nodes.Wolf3D_Teeth.morphTargetDictionary}
|
||||
morphTargetInfluences={nodes.Wolf3D_Teeth.morphTargetInfluences}
|
||||
/>
|
||||
<skinnedMesh
|
||||
geometry={nodes.Wolf3D_Hair.geometry}
|
||||
material={materials.Wolf3D_Hair}
|
||||
skeleton={nodes.Wolf3D_Hair.skeleton}
|
||||
/>
|
||||
<skinnedMesh
|
||||
geometry={nodes.Wolf3D_Glasses.geometry}
|
||||
material={materials.Wolf3D_Glasses}
|
||||
skeleton={nodes.Wolf3D_Glasses.skeleton}
|
||||
/>
|
||||
<skinnedMesh
|
||||
geometry={nodes.Wolf3D_Body.geometry}
|
||||
material={materials.Wolf3D_Body}
|
||||
skeleton={nodes.Wolf3D_Body.skeleton}
|
||||
/>
|
||||
<skinnedMesh
|
||||
geometry={nodes.Wolf3D_Outfit_Bottom.geometry}
|
||||
material={materials.Wolf3D_Outfit_Bottom}
|
||||
skeleton={nodes.Wolf3D_Outfit_Bottom.skeleton}
|
||||
/>
|
||||
<skinnedMesh
|
||||
geometry={nodes.Wolf3D_Outfit_Footwear.geometry}
|
||||
material={materials.Wolf3D_Outfit_Footwear}
|
||||
skeleton={nodes.Wolf3D_Outfit_Footwear.skeleton}
|
||||
/>
|
||||
<skinnedMesh
|
||||
geometry={nodes.Wolf3D_Outfit_Top.geometry}
|
||||
material={materials.Wolf3D_Outfit_Top}
|
||||
skeleton={nodes.Wolf3D_Outfit_Top.skeleton}
|
||||
/>
|
||||
</group>
|
||||
);
|
||||
}
|
||||
|
||||
useGLTF.preload("/mau.glb");
|
||||
useGLTF.preload("/animations.glb");
|
||||
23
src/components/Experience.tsx
Normal file
23
src/components/Experience.tsx
Normal file
@@ -0,0 +1,23 @@
|
||||
"use client";
|
||||
|
||||
import { Environment, Loader } from "@react-three/drei";
|
||||
import { Canvas } from "@react-three/fiber";
|
||||
import { Leva } from "leva";
|
||||
|
||||
import { Avatar } from "./Avatar";
|
||||
|
||||
function Experience() {
|
||||
return (
|
||||
<>
|
||||
<Loader />
|
||||
<Leva hidden />
|
||||
{/* <Canvas shadows camera={{ position: [0, 0, 1], fov: 30 }}> */}
|
||||
<Canvas shadows camera={{ position: [0, 0, 1], fov: 30 }}>
|
||||
<Environment preset="warehouse" />
|
||||
<Avatar />
|
||||
</Canvas>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
export default Experience;
|
||||
@@ -1,6 +1,7 @@
|
||||
"use client";
|
||||
|
||||
import { zodResolver } from "@hookform/resolvers/zod";
|
||||
import { SendIcon } from "lucide-react";
|
||||
import { useState } from "react";
|
||||
import { useForm } from "react-hook-form";
|
||||
import { toast } from "sonner";
|
||||
@@ -9,13 +10,25 @@ import { z } from "zod";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Form, FormControl, FormField, FormItem, FormMessage } from "@/components/ui/form";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { useConversation } from "@/lib/store";
|
||||
|
||||
const formSchema = z.object({
|
||||
message: z.string().min(2).max(50),
|
||||
});
|
||||
|
||||
interface MessageType {
|
||||
id: string;
|
||||
role: string;
|
||||
content: string;
|
||||
}
|
||||
|
||||
export default function SimpleInput() {
|
||||
const [isSubmitting, setIsSubmitting] = useState(false);
|
||||
// const [messages, setMessages] = useState<MessageType[]>([]);
|
||||
const messages = useConversation((state) => state.messages);
|
||||
const addMessage = useConversation((state) => state.addMessage);
|
||||
const setMessageResult = useConversation((state) => state.setMessageResult);
|
||||
|
||||
const form = useForm<z.infer<typeof formSchema>>({
|
||||
resolver: zodResolver(formSchema),
|
||||
defaultValues: {
|
||||
@@ -25,12 +38,44 @@ export default function SimpleInput() {
|
||||
|
||||
async function onSubmit(values: z.infer<typeof formSchema>) {
|
||||
setIsSubmitting(true);
|
||||
const response = await fetch("http://localhost:3000/api/researcher", {
|
||||
|
||||
const userMessage = {
|
||||
role: "user",
|
||||
content: values.message,
|
||||
id: Date.now().toString(),
|
||||
};
|
||||
addMessage(userMessage);
|
||||
const updatedMessages: MessageType[] = [...messages, userMessage];
|
||||
|
||||
const response = await fetch("/api/conversation/text", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ message: values.message }),
|
||||
body: JSON.stringify({ messages: updatedMessages }),
|
||||
});
|
||||
const data = await response.json();
|
||||
|
||||
const message = JSON.parse((await response.headers.get("message")) || "{}");
|
||||
if (Object.keys(message).length === 0) {
|
||||
console.error("No message returned from server");
|
||||
setIsSubmitting(false);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(message);
|
||||
const audio = await response.blob();
|
||||
const visemes = JSON.parse((await response.headers.get("visemes")) || "[]");
|
||||
const audioUrl = URL.createObjectURL(audio);
|
||||
const audioPlayer = new Audio(audioUrl);
|
||||
setMessageResult({
|
||||
visemes,
|
||||
audioPlayer,
|
||||
});
|
||||
audioPlayer.onended = () => {
|
||||
setMessageResult(null);
|
||||
};
|
||||
audioPlayer.currentTime = 0;
|
||||
audioPlayer.play();
|
||||
console.log(visemes);
|
||||
addMessage(message);
|
||||
|
||||
toast(
|
||||
<div className="flex flex-col space-y-2">
|
||||
@@ -38,7 +83,7 @@ export default function SimpleInput() {
|
||||
<code className="text-white">{JSON.stringify(values, null, 2)}</code>
|
||||
</pre>
|
||||
<pre className="mt-2 w-[320px] rounded-md bg-gray-200 p-4">
|
||||
<code className="text-black">{data.message}</code>
|
||||
<code className="text-black">{JSON.stringify(message, null, 2)}</code>
|
||||
</pre>
|
||||
</div>
|
||||
);
|
||||
@@ -48,10 +93,7 @@ export default function SimpleInput() {
|
||||
|
||||
return (
|
||||
<Form {...form}>
|
||||
<form
|
||||
onSubmit={form.handleSubmit(onSubmit)}
|
||||
className="flex w-full max-w-sm items-center space-x-2"
|
||||
>
|
||||
<form onSubmit={form.handleSubmit(onSubmit)} className="flex w-80 items-center space-x-2">
|
||||
<FormField
|
||||
control={form.control}
|
||||
name="message"
|
||||
@@ -59,7 +101,7 @@ export default function SimpleInput() {
|
||||
<FormItem className="grow">
|
||||
{/* <FormLabel>Message</FormLabel> */}
|
||||
<FormControl>
|
||||
<Input placeholder="message..." {...field} disabled={isSubmitting} />
|
||||
<Input placeholder="Talk to me..." {...field} disabled={isSubmitting} />
|
||||
</FormControl>
|
||||
{/* <FormDescription>This is your public display name.</FormDescription> */}
|
||||
<FormMessage />
|
||||
@@ -67,7 +109,7 @@ export default function SimpleInput() {
|
||||
)}
|
||||
/>
|
||||
<Button type="submit" disabled={isSubmitting}>
|
||||
Submit
|
||||
<SendIcon strokeWidth={3} />
|
||||
</Button>
|
||||
</form>
|
||||
</Form>
|
||||
|
||||
40
src/hooks/useAudioRecorder.ts
Normal file
40
src/hooks/useAudioRecorder.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useCallback } from "react";
|
||||
|
||||
export const useAudioRecorder = () => {
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
// const [audioURL, setAudioURL] = useState<string | null>(null);
|
||||
const [audioBlob, setAudioBlob] = useState<Blob | null>(null);
|
||||
const [mediaRecorder, setMediaRecorder] = useState<MediaRecorder | null>(null);
|
||||
|
||||
const startRecording = useCallback(() => {
|
||||
navigator.mediaDevices
|
||||
.getUserMedia({ audio: true })
|
||||
.then((stream) => {
|
||||
const recorder = new MediaRecorder(stream);
|
||||
setMediaRecorder(recorder);
|
||||
|
||||
const chunks: Blob[] = [];
|
||||
recorder.ondataavailable = (e) => chunks.push(e.data);
|
||||
recorder.onstop = () => {
|
||||
const blob = new Blob(chunks, { type: "audio/ogg; codecs=opus" });
|
||||
// setAudioURL(URL.createObjectURL(blob));
|
||||
setAudioBlob(blob);
|
||||
};
|
||||
|
||||
recorder.start();
|
||||
setIsRecording(true);
|
||||
})
|
||||
.catch((err) => console.error("Error accessing microphone:", err));
|
||||
}, []);
|
||||
|
||||
const stopRecording = useCallback(() => {
|
||||
if (mediaRecorder) {
|
||||
mediaRecorder.stop();
|
||||
setIsRecording(false);
|
||||
}
|
||||
}, [mediaRecorder]);
|
||||
|
||||
return { isRecording, audioBlob, startRecording, stopRecording };
|
||||
};
|
||||
42
src/lib/constants.ts
Normal file
42
src/lib/constants.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
export const visemesMapping = [
|
||||
"viseme_sil",
|
||||
"viseme_aa",
|
||||
"viseme_aa",
|
||||
"viseme_O",
|
||||
"viseme_E",
|
||||
"viseme_E",
|
||||
"viseme_I",
|
||||
"viseme_U",
|
||||
"viseme_O",
|
||||
"viseme_aa",
|
||||
"viseme_O",
|
||||
"viseme_I",
|
||||
"viseme_DD",
|
||||
"viseme_RR",
|
||||
"viseme_nn",
|
||||
"viseme_SS",
|
||||
"viseme_U",
|
||||
"viseme_TH",
|
||||
"viseme_FF",
|
||||
"viseme_DD",
|
||||
"viseme_kk",
|
||||
"viseme_PP",
|
||||
];
|
||||
|
||||
export const visemeList = [
|
||||
"viseme_sil",
|
||||
"viseme_PP",
|
||||
"viseme_FF",
|
||||
"viseme_TH",
|
||||
"viseme_DD",
|
||||
"viseme_kk",
|
||||
"viseme_CH",
|
||||
"viseme_SS",
|
||||
"viseme_nn",
|
||||
"viseme_RR",
|
||||
"viseme_aa",
|
||||
"viseme_E",
|
||||
"viseme_I",
|
||||
"viseme_O",
|
||||
"viseme_U",
|
||||
];
|
||||
39
src/lib/graph.ts
Normal file
39
src/lib/graph.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
import { ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";
|
||||
import { MessagesAnnotation, StateGraph } from "@langchain/langgraph";
|
||||
import { ChatOpenAI } from "@langchain/openai";
|
||||
|
||||
// Init llm model
|
||||
const llm = new ChatOpenAI({
|
||||
model: "gpt-4o-mini",
|
||||
});
|
||||
|
||||
// Define system prompt
|
||||
const prompt = ChatPromptTemplate.fromMessages([
|
||||
[
|
||||
"system",
|
||||
`You are a conversational AI assistant that provides information about anything that the user wants.
|
||||
Provide short answers to give a concise response to the user and go directly to the point.
|
||||
Don't provide long answers or give numbered or bulleted lists. Your answer should span from a single sentence to a single paragraph.
|
||||
Alwatys be polite and some times throw a joke if it is appropriate in the response.
|
||||
`,
|
||||
],
|
||||
new MessagesPlaceholder("messages"),
|
||||
]);
|
||||
|
||||
// Nodes
|
||||
async function chatModelNode(state: typeof MessagesAnnotation.State) {
|
||||
const chain = prompt.pipe(llm);
|
||||
const response = await chain.invoke({
|
||||
messages: state.messages,
|
||||
});
|
||||
// logger.info(response);
|
||||
return { messages: [response] };
|
||||
}
|
||||
|
||||
// Define the graph
|
||||
const builder = new StateGraph(MessagesAnnotation)
|
||||
.addNode("agent", chatModelNode)
|
||||
.addEdge("__start__", "agent")
|
||||
.addEdge("agent", "__end__");
|
||||
|
||||
export const graph = builder.compile();
|
||||
26
src/lib/store.ts
Normal file
26
src/lib/store.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
import { create } from "zustand";
|
||||
|
||||
interface MessageResultType {
|
||||
visemes: [number, number][];
|
||||
audioPlayer: HTMLAudioElement;
|
||||
}
|
||||
|
||||
interface MessageType {
|
||||
role: string;
|
||||
content: string;
|
||||
id: string;
|
||||
}
|
||||
|
||||
interface ConversationState {
|
||||
messageResult: MessageResultType | null;
|
||||
messages: MessageType[];
|
||||
setMessageResult: (message: MessageResultType | null) => void;
|
||||
addMessage: (message: MessageType) => void;
|
||||
}
|
||||
|
||||
export const useConversation = create<ConversationState>()((set) => ({
|
||||
messageResult: null,
|
||||
messages: [],
|
||||
setMessageResult: (messageResult) => set({ messageResult }),
|
||||
addMessage: (message) => set((state) => ({ messages: [...state.messages, message] })),
|
||||
}));
|
||||
Reference in New Issue
Block a user