122 lines
3.9 KiB
TypeScript
122 lines
3.9 KiB
TypeScript
import { PassThrough } from "stream";
|
|
|
|
import { AIMessage, HumanMessage } from "@langchain/core/messages";
|
|
import { Messages } from "@langchain/langgraph";
|
|
import { ElevenLabsClient } from "elevenlabs";
|
|
import OpenAI from "openai";
|
|
|
|
import { graph } from "@/lib/graph";
|
|
import logger from "@/lib/logger";
|
|
import { generateApproximateVisemes } from "@/utils/visemes";
|
|
|
|
const client = new OpenAI({
|
|
apiKey: process.env.OPENAI_API_KEY,
|
|
});
|
|
|
|
const elevenlabs = new ElevenLabsClient({
|
|
apiKey: process.env.ELEVENLABS_API_KEY,
|
|
});
|
|
|
|
// Configure voice settings
|
|
const VOICE_ID = process.env.ELEVENLABS_VOICE_ID || "21m00Tcm4TlvDq8ikWAM";
|
|
const VOICE_SETTINGS = {
|
|
stability: 0.5,
|
|
similarity_boost: 0.75,
|
|
style: 0.0,
|
|
use_speaker_boost: true,
|
|
};
|
|
|
|
// Function to estimate audio duration from text (rough approximation)
|
|
function estimateAudioDuration(text: string): number {
|
|
// Average speaking rate is about 150-160 words per minute
|
|
const wordsPerMinute = 150;
|
|
const words = text.split(/\s+/).length;
|
|
const minutes = words / wordsPerMinute;
|
|
return minutes * 60 * 1000; // Convert to milliseconds
|
|
}
|
|
|
|
// Define POST method for chat route
|
|
export async function POST(req: Request) {
|
|
try {
|
|
const formData = await req.formData();
|
|
const audio = formData.get("audio") as File;
|
|
const messages = JSON.parse(formData.get("messages") as string);
|
|
logger.info(JSON.stringify(messages, null, 2));
|
|
|
|
//* Speech to text (OpenAI Whisper)
|
|
const transcription = await client.audio.transcriptions.create({
|
|
file: audio,
|
|
model: "whisper-1",
|
|
});
|
|
logger.info(JSON.stringify(transcription, null, 2));
|
|
|
|
// Create new message with transcription
|
|
const userMessage = {
|
|
role: "user",
|
|
content: transcription.text,
|
|
id: Date.now().toString(),
|
|
};
|
|
const updatedMessages = [...messages, userMessage];
|
|
|
|
//* Text to text
|
|
const allMessages: Messages = updatedMessages.map((message) =>
|
|
message.role === "user" ? new HumanMessage(message.content) : new AIMessage(message.content)
|
|
);
|
|
|
|
// Stream of messages
|
|
const result = await graph.invoke({ messages: allMessages });
|
|
const lastMessage = result.messages[result.messages.length - 1];
|
|
const messageText = lastMessage.content.toString();
|
|
|
|
//* Text to speech with ElevenLabs
|
|
const audioStream = await elevenlabs.generate({
|
|
voice: VOICE_ID,
|
|
text: messageText,
|
|
model_id: "eleven_multilingual_v2",
|
|
voice_settings: VOICE_SETTINGS,
|
|
});
|
|
|
|
// Convert the audio stream to a PassThrough stream
|
|
const bufferStream = new PassThrough();
|
|
|
|
// ElevenLabs returns an async iterator, so we need to collect the chunks
|
|
const chunks: Uint8Array[] = [];
|
|
for await (const chunk of audioStream) {
|
|
chunks.push(chunk);
|
|
}
|
|
|
|
// Combine all chunks into a single buffer
|
|
const audioBuffer = Buffer.concat(chunks);
|
|
bufferStream.end(audioBuffer);
|
|
|
|
// Generate approximate visemes
|
|
const estimatedDuration = estimateAudioDuration(messageText);
|
|
const visemes = generateApproximateVisemes(messageText, estimatedDuration);
|
|
|
|
//* Return processed response
|
|
logger.info(`Response: ${lastMessage.content}`);
|
|
const safeLastMessageContent = messageText
|
|
.replace(/[\u2018\u2019]/g, "'")
|
|
.replace(/\u2014/g, "-");
|
|
|
|
return new Response(bufferStream, {
|
|
headers: {
|
|
"Content-Type": "audio/mpeg",
|
|
"Content-Disposition": `inline; filename=tts.mp3`,
|
|
Visemes: JSON.stringify(visemes),
|
|
Result: JSON.stringify({
|
|
id: lastMessage.id,
|
|
role: "assistant",
|
|
content: safeLastMessageContent,
|
|
}),
|
|
UserMessage: JSON.stringify(userMessage),
|
|
},
|
|
});
|
|
} catch (error) {
|
|
logger.error("Error in speech route:", error);
|
|
return new Response(JSON.stringify({ error: "Internal server error" }), {
|
|
status: 500,
|
|
headers: { "Content-Type": "application/json" },
|
|
});
|
|
}
|
|
} |