diff --git a/package-lock.json b/package-lock.json index 18c4302..bb4b4d0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,11 +1,11 @@ { - "name": "ai-companion", + "name": "ai-avatar", "version": "0.1.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "ai-companion", + "name": "ai-avatar", "version": "0.1.0", "dependencies": { "@hookform/resolvers": "^3.10.0", @@ -21,10 +21,10 @@ "ai": "^4.1.34", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", + "elevenlabs": "^1.59.0", "langchain": "^0.3.15", "leva": "^0.10.0", "lucide-react": "^0.475.0", - "microsoft-cognitiveservices-speech-sdk": "^1.42.0", "next": "15.1.6", "next-themes": "^0.4.4", "openai": "^4.83.0", @@ -2232,12 +2232,6 @@ "integrity": "sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==", "license": "MIT" }, - "node_modules/@types/webrtc": { - "version": "0.0.37", - "resolved": "https://registry.npmjs.org/@types/webrtc/-/webrtc-0.0.37.tgz", - "integrity": "sha512-JGAJC/ZZDhcrrmepU4sPLQLIOIAgs5oIK+Ieq90K8fdaNMhfdfqmYatJdgif1NDQtvrSlTOGJDUYHIDunuufOg==", - "license": "MIT" - }, "node_modules/@types/webxr": { "version": "0.5.21", "resolved": "https://registry.npmjs.org/@types/webxr/-/webxr-0.5.21.tgz", @@ -2546,18 +2540,6 @@ "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" } }, - "node_modules/agent-base": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", - "integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==", - "license": "MIT", - "dependencies": { - "debug": "4" - }, - "engines": { - "node": ">= 6.0.0" - } - }, "node_modules/agentkeepalive": { "version": "4.6.0", "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz", @@ -2965,17 +2947,6 @@ ], "license": "MIT" }, - "node_modules/bent": { - "version": "7.3.12", - "resolved": "https://registry.npmjs.org/bent/-/bent-7.3.12.tgz", - "integrity": "sha512-T3yrKnVGB63zRuoco/7Ybl7BwwGZR0lceoVG5XmQyMIH9s19SV5m+a8qam4if0zQuAmOQTyPTPmsQBdAorGK3w==", - "license": "Apache-2.0", - "dependencies": { - "bytesish": "^0.4.1", - "caseless": "~0.12.0", - "is-stream": "^2.0.0" - } - }, "node_modules/bidi-js": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/bidi-js/-/bidi-js-1.0.3.tgz", @@ -3087,12 +3058,6 @@ "node": ">=10.16.0" } }, - "node_modules/bytesish": { - "version": "0.4.4", - "resolved": "https://registry.npmjs.org/bytesish/-/bytesish-0.4.4.tgz", - "integrity": "sha512-i4uu6M4zuMUiyfZN4RU2+i9+peJh//pXhd9x1oSe1LBkZ3LEbCoygu8W0bXTukU1Jme2txKuotpCZRaC3FLxcQ==", - "license": "(Apache-2.0 AND MIT)" - }, "node_modules/call-bind": { "version": "1.0.8", "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.8.tgz", @@ -3116,7 +3081,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.1.tgz", "integrity": "sha512-BhYE+WDaywFg2TBWYNXAE+8B1ATnThNBqXHP5nQu0jWJdVvY2hvkpyB3qOmtmDePiS5/BDQ8wASEWGMWRG148g==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0", @@ -3130,7 +3094,6 @@ "version": "1.0.3", "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.3.tgz", "integrity": "sha512-YTd+6wGlNlPxSuri7Y6X8tY2dmm12UMH66RpKMhiX6rsk5wXXnYgbUcOt8kiS31/AjfoTOvCsE+w8nZQLQnzHA==", - "dev": true, "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.1", @@ -3203,12 +3166,6 @@ ], "license": "CC-BY-4.0" }, - "node_modules/caseless": { - "version": "0.12.0", - "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz", - "integrity": "sha512-4tYFyifaFfGacoiObjJegolkwSU4xQNGbVgUiNYVUxbQ2x2lUsFvY4hVgVzGiIe6WLOPqycWXA40l+PWsxthUw==", - "license": "Apache-2.0" - }, "node_modules/chalk": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", @@ -3355,6 +3312,12 @@ "node": ">= 0.8" } }, + "node_modules/command-exists": { + "version": "1.2.9", + "resolved": "https://registry.npmjs.org/command-exists/-/command-exists-1.2.9.tgz", + "integrity": "sha512-LTQ/SGc+s0Xc0Fu5WaKnR0YiygZkm9eKFvyS+fRsU7/ZWFF8ykFM6Pc9aCVf1+xasOOZpO3BAVgVrKvsqKHV7w==", + "license": "MIT" + }, "node_modules/commander": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz", @@ -3670,7 +3633,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", - "dev": true, "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.1", @@ -3697,6 +3659,42 @@ "safe-buffer": "^5.0.1" } }, + "node_modules/elevenlabs": { + "version": "1.59.0", + "resolved": "https://registry.npmjs.org/elevenlabs/-/elevenlabs-1.59.0.tgz", + "integrity": "sha512-OVKOd+lxNya8h4Rn5fcjv00Asd+DGWfTT6opGrQ16sTI+1HwdLn/kYtjl8tRMhDXbNmksD/9SBRKjb9neiUuVg==", + "deprecated": "This package has moved to @elevenlabs/elevenlabs-js", + "license": "MIT", + "dependencies": { + "command-exists": "^1.2.9", + "execa": "^5.1.1", + "form-data": "^4.0.0", + "form-data-encoder": "^4.0.2", + "formdata-node": "^6.0.3", + "node-fetch": "^2.7.0", + "qs": "^6.13.1", + "readable-stream": "^4.5.2", + "url-join": "4.0.1" + } + }, + "node_modules/elevenlabs/node_modules/form-data-encoder": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-4.0.2.tgz", + "integrity": "sha512-KQVhvhK8ZkWzxKxOr56CPulAhH3dobtuQ4+hNQ+HekH/Wp5gSOafqRAeTphQUJAIk0GBvHZgJ2ZGRWd5kphMuw==", + "license": "MIT", + "engines": { + "node": ">= 18" + } + }, + "node_modules/elevenlabs/node_modules/formdata-node": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-6.0.3.tgz", + "integrity": "sha512-8e1++BCiTzUno9v5IZ2J6bv4RU+3UKDmqWUQD0MIMVCd9AdhWkO1gw57oo1mNEX1dMq2EGI+FbWz4B92pscSQg==", + "license": "MIT", + "engines": { + "node": ">= 18" + } + }, "node_modules/emoji-regex": { "version": "9.2.2", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", @@ -3796,7 +3794,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -3806,7 +3803,6 @@ "version": "1.3.0", "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -3844,7 +3840,6 @@ "version": "1.1.1", "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0" @@ -4592,7 +4587,6 @@ "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz", "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==", "license": "MIT", - "peer": true, "engines": { "node": ">=0.8.x" } @@ -4606,6 +4600,35 @@ "node": ">=18.0.0" } }, + "node_modules/execa": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz", + "integrity": "sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==", + "license": "MIT", + "dependencies": { + "cross-spawn": "^7.0.3", + "get-stream": "^6.0.0", + "human-signals": "^2.1.0", + "is-stream": "^2.0.0", + "merge-stream": "^2.0.0", + "npm-run-path": "^4.0.1", + "onetime": "^5.1.2", + "signal-exit": "^3.0.3", + "strip-final-newline": "^2.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sindresorhus/execa?sponsor=1" + } + }, + "node_modules/execa/node_modules/signal-exit": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", + "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", + "license": "ISC" + }, "node_modules/expr-eval": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/expr-eval/-/expr-eval-2.0.2.tgz", @@ -4990,7 +5013,6 @@ "version": "1.2.7", "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.7.tgz", "integrity": "sha512-VW6Pxhsrk0KAOqs3WEd0klDiF/+V7gQOpAvY1jVU/LHmaD/kQO4523aiJuikX/QAKYiW6x8Jh+RJej1almdtCA==", - "dev": true, "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.1", @@ -5015,7 +5037,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", - "dev": true, "license": "MIT", "dependencies": { "dunder-proto": "^1.0.1", @@ -5025,6 +5046,18 @@ "node": ">= 0.4" } }, + "node_modules/get-stream": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz", + "integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/get-symbol-description": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/get-symbol-description/-/get-symbol-description-1.1.0.tgz", @@ -5164,7 +5197,6 @@ "version": "1.2.0", "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -5242,7 +5274,6 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -5291,26 +5322,13 @@ "integrity": "sha512-uu0VXUK52JhihhnN/MVVo1lvqNNuhoxkonqgO3IpjvQiGpJBdIXMGkofjQb/j9zvV7a1SW8U9g1FslWx/1HOiQ==", "license": "Apache-2.0" }, - "node_modules/https-proxy-agent": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-4.0.0.tgz", - "integrity": "sha512-zoDhWrkR3of1l9QAL8/scJZyLu8j/gBkcwcaQOZh7Gyh/+uJQzGVETdgT30akuwkpL8HTRfssqI3BZuV18teDg==", - "license": "MIT", - "dependencies": { - "agent-base": "5", - "debug": "4" - }, + "node_modules/human-signals": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/human-signals/-/human-signals-2.1.0.tgz", + "integrity": "sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==", + "license": "Apache-2.0", "engines": { - "node": ">= 6.0.0" - } - }, - "node_modules/https-proxy-agent/node_modules/agent-base": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-5.1.1.tgz", - "integrity": "sha512-TMeqbNl2fMW0nMjTEPOwe3J/PRFP4vqeoNuQMG0HlMrtm5QxKqdvAkZ1pRBQ/ulIyDD5Yq0nJ7YbdD8ey0TO3g==", - "license": "MIT", - "engines": { - "node": ">= 6.0.0" + "node": ">=10.17.0" } }, "node_modules/humanize-ms": { @@ -6897,12 +6915,17 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" } }, + "node_modules/merge-stream": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz", + "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==", + "license": "MIT" + }, "node_modules/merge-value": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/merge-value/-/merge-value-1.0.0.tgz", @@ -6955,54 +6978,6 @@ "node": ">=8.6" } }, - "node_modules/microsoft-cognitiveservices-speech-sdk": { - "version": "1.42.0", - "resolved": "https://registry.npmjs.org/microsoft-cognitiveservices-speech-sdk/-/microsoft-cognitiveservices-speech-sdk-1.42.0.tgz", - "integrity": "sha512-ERrS1rwPPCN1foOwlJv3XmKO4NtBchjW+zYPQBgv4ffRfh87DcxuISXICPDjvlAU61w/r+y6p1W0pnX3gwVZ7A==", - "license": "MIT", - "dependencies": { - "@types/webrtc": "^0.0.37", - "agent-base": "^6.0.1", - "bent": "^7.3.12", - "https-proxy-agent": "^4.0.0", - "uuid": "^9.0.0", - "ws": "^7.5.6" - } - }, - "node_modules/microsoft-cognitiveservices-speech-sdk/node_modules/uuid": { - "version": "9.0.1", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", - "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", - "funding": [ - "https://github.com/sponsors/broofa", - "https://github.com/sponsors/ctavan" - ], - "license": "MIT", - "bin": { - "uuid": "dist/bin/uuid" - } - }, - "node_modules/microsoft-cognitiveservices-speech-sdk/node_modules/ws": { - "version": "7.5.10", - "resolved": "https://registry.npmjs.org/ws/-/ws-7.5.10.tgz", - "integrity": "sha512-+dbF1tHwZpXcbOJdVOkzLDxZP1ailvSxM6ZweXTegylPny803bFhA+vqBYw4s31NSAk4S2Qz+AKXK9a4wkdjcQ==", - "license": "MIT", - "engines": { - "node": ">=8.3.0" - }, - "peerDependencies": { - "bufferutil": "^4.0.1", - "utf-8-validate": "^5.0.2" - }, - "peerDependenciesMeta": { - "bufferutil": { - "optional": true - }, - "utf-8-validate": { - "optional": true - } - } - }, "node_modules/mime-db": { "version": "1.52.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", @@ -7024,6 +6999,15 @@ "node": ">= 0.6" } }, + "node_modules/mimic-fn": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", + "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/minimatch": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", @@ -7259,6 +7243,18 @@ "node": ">=0.10.0" } }, + "node_modules/npm-run-path": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-4.0.1.tgz", + "integrity": "sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==", + "license": "MIT", + "dependencies": { + "path-key": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/object-assign": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", @@ -7281,7 +7277,6 @@ "version": "1.13.4", "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -7407,6 +7402,21 @@ "wrappy": "1" } }, + "node_modules/onetime": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz", + "integrity": "sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==", + "license": "MIT", + "dependencies": { + "mimic-fn": "^2.1.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/openai": { "version": "4.83.0", "resolved": "https://registry.npmjs.org/openai/-/openai-4.83.0.tgz", @@ -7994,7 +8004,6 @@ "resolved": "https://registry.npmjs.org/process/-/process-0.11.10.tgz", "integrity": "sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==", "license": "MIT", - "peer": true, "engines": { "node": ">= 0.6.0" } @@ -8075,6 +8084,21 @@ "node": ">=6" } }, + "node_modules/qs": { + "version": "6.14.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.0.tgz", + "integrity": "sha512-YWWTjgABSKcvs/nWBi9PycY/JiPJqOD4JA6o9Sej2AtvSGarXxKC3OQSk4pAarbdQlKAh5D4FCQkJNkW+GAn3w==", + "license": "BSD-3-Clause", + "dependencies": { + "side-channel": "^1.1.0" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/querystringify": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/querystringify/-/querystringify-2.2.0.tgz", @@ -8234,7 +8258,6 @@ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-4.7.0.tgz", "integrity": "sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==", "license": "MIT", - "peer": true, "dependencies": { "abort-controller": "^3.0.0", "buffer": "^6.0.3", @@ -8523,8 +8546,7 @@ "url": "https://feross.org/support" } ], - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/safe-push-apply": { "version": "1.0.0", @@ -8732,7 +8754,6 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0", @@ -8752,7 +8773,6 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0", @@ -8769,7 +8789,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", - "dev": true, "license": "MIT", "dependencies": { "call-bound": "^1.0.2", @@ -8788,7 +8807,6 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", - "dev": true, "license": "MIT", "dependencies": { "call-bound": "^1.0.2", @@ -8940,7 +8958,6 @@ "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", "license": "MIT", - "peer": true, "dependencies": { "safe-buffer": "~5.2.0" } @@ -9164,6 +9181,15 @@ "node": ">=4" } }, + "node_modules/strip-final-newline": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-2.0.0.tgz", + "integrity": "sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/strip-json-comments": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", @@ -9787,6 +9813,12 @@ "punycode": "^2.1.0" } }, + "node_modules/url-join": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/url-join/-/url-join-4.0.1.tgz", + "integrity": "sha512-jk1+QP6ZJqyOiuEI9AEWQfju/nB2Pw466kbA0LEZljHwKeMgd9WrAEgEGxjPDD2+TNbbb37rTyhEfrCXfuKXnA==", + "license": "MIT" + }, "node_modules/url-parse": { "version": "1.5.10", "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.10.tgz", diff --git a/package.json b/package.json index 5c47d63..ad149af 100644 --- a/package.json +++ b/package.json @@ -22,10 +22,10 @@ "ai": "^4.1.34", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", + "elevenlabs": "^1.59.0", "langchain": "^0.3.15", "leva": "^0.10.0", "lucide-react": "^0.475.0", - "microsoft-cognitiveservices-speech-sdk": "^1.42.0", "next": "15.1.6", "next-themes": "^0.4.4", "openai": "^4.83.0", diff --git a/src/app/api/conversation/speech/route.ts b/src/app/api/conversation/speech/route.ts index 7ae5102..6d91676 100644 --- a/src/app/api/conversation/speech/route.ts +++ b/src/app/api/conversation/speech/route.ts @@ -1,104 +1,122 @@ -// import { PassThrough } from "stream"; - import { PassThrough } from "stream"; import { AIMessage, HumanMessage } from "@langchain/core/messages"; import { Messages } from "@langchain/langgraph"; -import * as sdk from "microsoft-cognitiveservices-speech-sdk"; +import { ElevenLabsClient } from "elevenlabs"; import OpenAI from "openai"; import { graph } from "@/lib/graph"; import logger from "@/lib/logger"; +import { generateApproximateVisemes } from "@/utils/visemes"; const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, }); -const speechConfig = sdk.SpeechConfig.fromSubscription( - process.env.SPEECH_KEY || "", - process.env.SPEECH_REGION || "" -); -speechConfig.speechSynthesisVoiceName = "en-US-BrianMultilingualNeural"; -// Allow streaming responses up to 30 seconds -// export const maxDuration = 30; +const elevenlabs = new ElevenLabsClient({ + apiKey: process.env.ELEVENLABS_API_KEY, +}); + +// Configure voice settings +const VOICE_ID = process.env.ELEVENLABS_VOICE_ID || "21m00Tcm4TlvDq8ikWAM"; +const VOICE_SETTINGS = { + stability: 0.5, + similarity_boost: 0.75, + style: 0.0, + use_speaker_boost: true, +}; + +// Function to estimate audio duration from text (rough approximation) +function estimateAudioDuration(text: string): number { + // Average speaking rate is about 150-160 words per minute + const wordsPerMinute = 150; + const words = text.split(/\s+/).length; + const minutes = words / wordsPerMinute; + return minutes * 60 * 1000; // Convert to milliseconds +} // Define POST method for chat route export async function POST(req: Request) { - const formData = await req.formData(); - const audio = formData.get("audio") as File; - const messages = JSON.parse(formData.get("messages") as string); - logger.info(JSON.stringify(messages, null, 2)); + try { + const formData = await req.formData(); + const audio = formData.get("audio") as File; + const messages = JSON.parse(formData.get("messages") as string); + logger.info(JSON.stringify(messages, null, 2)); - //* Speech to text - const transcription = await client.audio.transcriptions.create({ - file: audio, - model: "whisper-1", - }); - logger.info(JSON.stringify(transcription, null, 2)); - // create new message with transcription - const userMessage = { - role: "user", - content: transcription.text, - id: Date.now().toString(), - }; - const updatedMessages = [...messages, userMessage]; + //* Speech to text (OpenAI Whisper) + const transcription = await client.audio.transcriptions.create({ + file: audio, + model: "whisper-1", + }); + logger.info(JSON.stringify(transcription, null, 2)); + + // Create new message with transcription + const userMessage = { + role: "user", + content: transcription.text, + id: Date.now().toString(), + }; + const updatedMessages = [...messages, userMessage]; - //* Text to text - const allMessages: Messages = updatedMessages.map((message) => - message.role === "user" ? new HumanMessage(message.content) : new AIMessage(message.content) - ); - // Stream of messages - const result = await graph.invoke({ messages: allMessages }); - const lastMessage = result.messages[result.messages.length - 1]; - - //* Text to speech (and visemes) - // Use Microsoft Speech SDK to synthesize speech and get visemes - const speechSynthesizer = new sdk.SpeechSynthesizer(speechConfig); - const visemes: [number, number][] = []; - speechSynthesizer.visemeReceived = function (s, e) { - // logger.info( - // "(Viseme), Audio offset: " + e.audioOffset / 10000 + "ms. Viseme ID: " + e.visemeId - // ); - visemes.push([e.audioOffset / 10000, e.visemeId]); - }; - const audioStream = await new Promise((resolve, reject) => { - speechSynthesizer.speakTextAsync( - `${lastMessage.content}`, - (result) => { - const { audioData } = result; - - speechSynthesizer.close(); - - // convert arrayBuffer to stream - const bufferStream = new PassThrough(); - bufferStream.end(Buffer.from(audioData)); - resolve(bufferStream); - }, - (error) => { - logger.error(error); - speechSynthesizer.close(); - reject(error); - } + //* Text to text + const allMessages: Messages = updatedMessages.map((message) => + message.role === "user" ? new HumanMessage(message.content) : new AIMessage(message.content) ); - }); + + // Stream of messages + const result = await graph.invoke({ messages: allMessages }); + const lastMessage = result.messages[result.messages.length - 1]; + const messageText = lastMessage.content.toString(); - //* Return processed response - logger.info(`Response: ${lastMessage.content}`); - const safeLastMessageContent = lastMessage.content - .toString() - .replace(/[\u2018\u2019]/g, "'") - .replace(/\u2014/g, "-"); - return new Response(audioStream, { - headers: { - "Content-Type": "audio/mpeg", - "Content-Disposition": `inline; filename=tts.mp3`, - Visemes: JSON.stringify(visemes), - Result: JSON.stringify({ - id: lastMessage.id, - role: "assistant", - content: safeLastMessageContent, - }), - UserMessage: JSON.stringify(userMessage), - }, - }); -} + //* Text to speech with ElevenLabs + const audioStream = await elevenlabs.generate({ + voice: VOICE_ID, + text: messageText, + model_id: "eleven_multilingual_v2", + voice_settings: VOICE_SETTINGS, + }); + + // Convert the audio stream to a PassThrough stream + const bufferStream = new PassThrough(); + + // ElevenLabs returns an async iterator, so we need to collect the chunks + const chunks: Uint8Array[] = []; + for await (const chunk of audioStream) { + chunks.push(chunk); + } + + // Combine all chunks into a single buffer + const audioBuffer = Buffer.concat(chunks); + bufferStream.end(audioBuffer); + + // Generate approximate visemes + const estimatedDuration = estimateAudioDuration(messageText); + const visemes = generateApproximateVisemes(messageText, estimatedDuration); + + //* Return processed response + logger.info(`Response: ${lastMessage.content}`); + const safeLastMessageContent = messageText + .replace(/[\u2018\u2019]/g, "'") + .replace(/\u2014/g, "-"); + + return new Response(bufferStream, { + headers: { + "Content-Type": "audio/mpeg", + "Content-Disposition": `inline; filename=tts.mp3`, + Visemes: JSON.stringify(visemes), + Result: JSON.stringify({ + id: lastMessage.id, + role: "assistant", + content: safeLastMessageContent, + }), + UserMessage: JSON.stringify(userMessage), + }, + }); + } catch (error) { + logger.error("Error in speech route:", error); + return new Response(JSON.stringify({ error: "Internal server error" }), { + status: 500, + headers: { "Content-Type": "application/json" }, + }); + } +} \ No newline at end of file diff --git a/src/app/api/conversation/text/route.ts b/src/app/api/conversation/text/route.ts index c489d06..65dca99 100644 --- a/src/app/api/conversation/text/route.ts +++ b/src/app/api/conversation/text/route.ts @@ -1,82 +1,148 @@ -// import { PassThrough } from "stream"; - import { PassThrough } from "stream"; import { AIMessage, HumanMessage } from "@langchain/core/messages"; import { Messages } from "@langchain/langgraph"; import { Message } from "ai"; -import * as sdk from "microsoft-cognitiveservices-speech-sdk"; +import { ElevenLabsClient } from "elevenlabs"; import { graph } from "@/lib/graph"; import logger from "@/lib/logger"; -const speechConfig = sdk.SpeechConfig.fromSubscription( - process.env.SPEECH_KEY || "", - process.env.SPEECH_REGION || "" -); -speechConfig.speechSynthesisVoiceName = "en-US-BrianMultilingualNeural"; +const elevenlabs = new ElevenLabsClient({ + apiKey: process.env.ELEVENLABS_API_KEY, +}); -// Allow streaming responses up to 30 seconds -// export const maxDuration = 30; +// Configure voice settings +const VOICE_ID = process.env.ELEVENLABS_VOICE_ID || "21m00Tcm4TlvDq8ikWAM"; // Default to Rachel voice +const VOICE_SETTINGS = { + stability: 0.5, + similarity_boost: 0.75, + style: 0.0, + use_speaker_boost: true, +}; + +// Function to estimate audio duration from text (rough approximation) +function estimateAudioDuration(text: string): number { + // Average speaking rate is about 150-160 words per minute + const wordsPerMinute = 150; + const words = text.split(/\s+/).length; + const minutes = words / wordsPerMinute; + return minutes * 60 * 1000; // Convert to milliseconds +} + +// Simple viseme generation (basic approximation) +function generateApproximateVisemes(text: string, audioDurationMs: number): [number, number][] { + // Basic phoneme to viseme mapping + const phonemeToViseme: { [key: string]: number } = { + 'sil': 0, 'p': 1, 'b': 1, 'm': 1, 'f': 2, 'v': 2, + 'th': 3, 't': 4, 'd': 4, 'n': 4, 'l': 4, 's': 5, + 'z': 5, 'sh': 6, 'k': 7, 'g': 7, 'r': 8, 'aa': 9, + 'ih': 10, 'uw': 11, 'ah': 12, 'ae': 13, 'eh': 14, + 'ow': 15, 'ay': 16, 'er': 17, 'w': 18 + }; + + const words = text.toLowerCase().replace(/[^\w\s]/g, '').split(/\s+/); + const phonemes: string[] = []; + + for (const word of words) { + for (let i = 0; i < word.length; i++) { + const char = word[i]; + if ('aeiou'.includes(char)) { + phonemes.push('ah'); + } else if ('pbm'.includes(char)) { + phonemes.push('p'); + } else if ('fv'.includes(char)) { + phonemes.push('f'); + } else if ('tdnl'.includes(char)) { + phonemes.push('t'); + } else if ('sz'.includes(char)) { + phonemes.push('s'); + } else if ('kg'.includes(char)) { + phonemes.push('k'); + } else if (char === 'r') { + phonemes.push('r'); + } else { + phonemes.push('t'); + } + } + phonemes.push('sil'); + } + + const visemes: [number, number][] = []; + const timePerPhoneme = audioDurationMs / phonemes.length; + + phonemes.forEach((phoneme, index) => { + const visemeId = phonemeToViseme[phoneme] || 0; + const timestamp = index * timePerPhoneme; + visemes.push([timestamp, visemeId]); + }); + + return visemes; +} // Define POST method for chat route export async function POST(req: Request) { - const { - messages, - }: { - messages: Message[]; - } = await req.json(); + try { + const { + messages, + }: { + messages: Message[]; + } = await req.json(); - // TODO: Filter to only include last message when using langgraph memory - const allMessages: Messages = messages.map((message) => - message.role === "user" ? new HumanMessage(message.content) : new AIMessage(message.content) - ); - - // Stream of messages - const result = await graph.invoke({ messages: allMessages }); - const lastMessage = result.messages[result.messages.length - 1]; - - // Use Microsoft Speech SDK to synthesize speech and get visemes - const speechSynthesizer = new sdk.SpeechSynthesizer(speechConfig); - const visemes: [number, number][] = []; - speechSynthesizer.visemeReceived = function (s, e) { - // logger.info( - // "(Viseme), Audio offset: " + e.audioOffset / 10000 + "ms. Viseme ID: " + e.visemeId - // ); - visemes.push([e.audioOffset / 10000, e.visemeId]); - }; - const audioStream = await new Promise((resolve, reject) => { - speechSynthesizer.speakTextAsync( - `${lastMessage.content}`, - (result) => { - const { audioData } = result; - - speechSynthesizer.close(); - - // convert arrayBuffer to stream - const bufferStream = new PassThrough(); - bufferStream.end(Buffer.from(audioData)); - resolve(bufferStream); - }, - (error) => { - logger.error(error); - speechSynthesizer.close(); - reject(error); - } + // TODO: Filter to only include last message when using langgraph memory + const allMessages: Messages = messages.map((message) => + message.role === "user" ? new HumanMessage(message.content) : new AIMessage(message.content) ); - }); - logger.info(`Response: ${lastMessage.content}`); - return new Response(audioStream, { - headers: { - "Content-Type": "audio/mpeg", - "Content-Disposition": `inline; filename=tts.mp3`, - Visemes: JSON.stringify(visemes), - Message: JSON.stringify({ - id: lastMessage.id, - role: "assistant", - content: lastMessage.content, - }), - }, - }); -} + // Stream of messages + const result = await graph.invoke({ messages: allMessages }); + const lastMessage = result.messages[result.messages.length - 1]; + const messageText = lastMessage.content.toString(); + + // Use ElevenLabs to synthesize speech + const audioStream = await elevenlabs.generate({ + voice: VOICE_ID, + text: messageText, + model_id: "eleven_multilingual_v2", + voice_settings: VOICE_SETTINGS, + }); + + // Convert the audio stream to a PassThrough stream + const bufferStream = new PassThrough(); + + // ElevenLabs returns an async iterator, so we need to collect the chunks + const chunks: Uint8Array[] = []; + for await (const chunk of audioStream) { + chunks.push(chunk); + } + + // Combine all chunks into a single buffer + const audioBuffer = Buffer.concat(chunks); + bufferStream.end(audioBuffer); + + // Generate approximate visemes + const estimatedDuration = estimateAudioDuration(messageText); + const visemes = generateApproximateVisemes(messageText, estimatedDuration); + + logger.info(`Response: ${lastMessage.content}`); + + return new Response(bufferStream, { + headers: { + "Content-Type": "audio/mpeg", + "Content-Disposition": `inline; filename=tts.mp3`, + Visemes: JSON.stringify(visemes), + Message: JSON.stringify({ + id: lastMessage.id, + role: "assistant", + content: lastMessage.content, + }), + }, + }); + } catch (error) { + logger.error("Error in text route:", error); + return new Response(JSON.stringify({ error: "Internal server error" }), { + status: 500, + headers: { "Content-Type": "application/json" }, + }); + } +} \ No newline at end of file diff --git a/src/utils/visemes.ts b/src/utils/visemes.ts new file mode 100644 index 0000000..d9994ef --- /dev/null +++ b/src/utils/visemes.ts @@ -0,0 +1,96 @@ +// utils/visemes.ts +// Simple phoneme to viseme mapping based on text analysis +// This is a basic implementation - you might want to use a more sophisticated library + +interface VisemeMapping { + [key: string]: number; +} + +// Microsoft viseme IDs mapping +const PHONEME_TO_VISEME: VisemeMapping = { + // Silence + 'sil': 0, + // Consonants + 'p': 1, 'b': 1, 'm': 1, + 'f': 2, 'v': 2, + 'th': 3, 'dh': 3, + 't': 4, 'd': 4, 'n': 4, 'l': 4, + 's': 5, 'z': 5, + 'sh': 6, 'zh': 6, 'ch': 6, 'jh': 6, + 'k': 7, 'g': 7, 'ng': 7, + 'r': 8, + // Vowels + 'aa': 9, 'ao': 9, + 'ih': 10, 'iy': 10, + 'uw': 11, 'uh': 11, + 'ah': 12, 'ax': 12, + 'ae': 13, + 'eh': 14, 'ey': 14, + 'ow': 15, 'oy': 15, + 'ay': 16, 'aw': 16, + 'er': 17, + 'w': 18, 'y': 18, +}; + +// Simple text-to-phoneme approximation +function textToPhonemes(text: string): string[] { + // This is a very basic implementation + // For production, consider using a proper phoneme library like espeak-ng + const words = text.toLowerCase().replace(/[^\w\s]/g, '').split(/\s+/); + const phonemes: string[] = []; + + for (const word of words) { + // Basic vowel/consonant detection + for (let i = 0; i < word.length; i++) { + const char = word[i]; + if ('aeiou'.includes(char)) { + phonemes.push('ah'); // Generic vowel + } else if (char === 'r') { + phonemes.push('r'); + } else if (char === 'l') { + phonemes.push('l'); + } else if (char === 'th') { + phonemes.push('th'); + i++; // Skip next char + } else if ('pbm'.includes(char)) { + phonemes.push('p'); + } else if ('fv'.includes(char)) { + phonemes.push('f'); + } else if ('tdnl'.includes(char)) { + phonemes.push('t'); + } else if ('sz'.includes(char)) { + phonemes.push('s'); + } else if ('kg'.includes(char)) { + phonemes.push('k'); + } else { + phonemes.push('t'); // Default consonant + } + } + phonemes.push('sil'); // Silence between words + } + + return phonemes; +} + +export function generateApproximateVisemes(text: string, audioDurationMs: number): [number, number][] { + const phonemes = textToPhonemes(text); + const visemes: [number, number][] = []; + + // Distribute phonemes evenly across the audio duration + const timePerPhoneme = audioDurationMs / phonemes.length; + + phonemes.forEach((phoneme, index) => { + const visemeId = PHONEME_TO_VISEME[phoneme] || 0; + const timestamp = index * timePerPhoneme; + visemes.push([timestamp, visemeId]); + }); + + return visemes; +} + +// Alternative: Use Web Speech API for better phoneme detection (client-side only) +export function getAudioDuration(audioBuffer: Buffer): Promise { + // This would need to be implemented using a library like node-ffprobe + // For now, return an estimate based on typical speech rate + return Promise.resolve(audioBuffer.length / 16000 * 1000); // Rough estimate +} \ No newline at end of file