mirror of
https://github.com/langgenius/dify.git
synced 2026-01-09 07:44:12 +00:00
Compare commits
193 Commits
feat/class
...
fix/e-admi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
85b4386a02 | ||
|
|
1c2e8e1ce7 | ||
|
|
33d2c9d2ca | ||
|
|
4fa3d78ed8 | ||
|
|
849994d35e | ||
|
|
2fce4a338c | ||
|
|
5f7f851b17 | ||
|
|
559ab46ee1 | ||
|
|
df98223c8c | ||
|
|
144f9507f8 | ||
|
|
2e097a1ac0 | ||
|
|
9f7d8a981f | ||
|
|
c4729f8c20 | ||
|
|
5cb1cf9eca | ||
|
|
40b31bafd5 | ||
|
|
d38a2c95fb | ||
|
|
7d18e2a0ef | ||
|
|
024f242251 | ||
|
|
de14a55bde | ||
|
|
cbb1d722a5 | ||
|
|
1769ce16f3 | ||
|
|
170139bb0f | ||
|
|
ede0deb447 | ||
|
|
d40f2e7d94 | ||
|
|
70ebfc064b | ||
|
|
d6c252d77e | ||
|
|
fc3d3e0565 | ||
|
|
b786bbdab5 | ||
|
|
f45321dd27 | ||
|
|
746d4d8ead | ||
|
|
7c31e3b6ba | ||
|
|
7c1116f139 | ||
|
|
b82cc1c2e8 | ||
|
|
fee51ba994 | ||
|
|
2259dfdc58 | ||
|
|
3761944a3f | ||
|
|
a239e756b0 | ||
|
|
ac54dd89f4 | ||
|
|
5310ed4b54 | ||
|
|
bfdce78ca5 | ||
|
|
00c2258352 | ||
|
|
09f8da1429 | ||
|
|
9f07584a00 | ||
|
|
a1b3d41712 | ||
|
|
fcc274d679 | ||
|
|
14f378bbc6 | ||
|
|
669fb6be0f | ||
|
|
724ffe55c9 | ||
|
|
bfa5828259 | ||
|
|
455d14296f | ||
|
|
d1a25e54e5 | ||
|
|
9462ed7bbf | ||
|
|
c6e63ac816 | ||
|
|
a27db51b83 | ||
|
|
e52a9fbfb7 | ||
|
|
2af1dd6de3 | ||
|
|
b26e20fe34 | ||
|
|
161ff432f1 | ||
|
|
509733fbf0 | ||
|
|
99a9def623 | ||
|
|
7770a45253 | ||
|
|
bafdbade52 | ||
|
|
fa76590c24 | ||
|
|
d5b75470e4 | ||
|
|
5f87bdbe3a | ||
|
|
cb13b53ccd | ||
|
|
a1dc3cfdec | ||
|
|
7a4ec9cf23 | ||
|
|
4785c061a9 | ||
|
|
4105c8ff70 | ||
|
|
b922c8c215 | ||
|
|
cbea30e65f | ||
|
|
e9a207b38e | ||
|
|
5e50570739 | ||
|
|
46d43e6758 | ||
|
|
fe1846c437 | ||
|
|
1045f6db7a | ||
|
|
50d36612f0 | ||
|
|
e38631db8a | ||
|
|
7f63cd52a2 | ||
|
|
8e75eb5c63 | ||
|
|
970508fcb6 | ||
|
|
5b357fdbf0 | ||
|
|
9283a5414f | ||
|
|
8923e64b8d | ||
|
|
2a2a0e9be9 | ||
|
|
061a765b7d | ||
|
|
acd7fead87 | ||
|
|
64e9d96d84 | ||
|
|
d27de3818c | ||
|
|
bbb080d5b2 | ||
|
|
8c025abb3b | ||
|
|
c01d8a70f3 | ||
|
|
98606ca558 | ||
|
|
adf3e18ebd | ||
|
|
1ca15989e0 | ||
|
|
8b5a3a9424 | ||
|
|
42ddcf1edd | ||
|
|
21561df10f | ||
|
|
4327ec8c4c | ||
|
|
bbc5ec8301 | ||
|
|
4a51a72c1d | ||
|
|
4b6adffa8e | ||
|
|
c7fd73d330 | ||
|
|
8a709e445a | ||
|
|
f02b77b99f | ||
|
|
abc625bcce | ||
|
|
b6bc1f8bc4 | ||
|
|
b8f9037cd3 | ||
|
|
02606ba3c7 | ||
|
|
79311d3fb5 | ||
|
|
31086a1fbf | ||
|
|
6ae5d052e5 | ||
|
|
c794ecf101 | ||
|
|
d887aae012 | ||
|
|
1b1e96eff7 | ||
|
|
eecd091063 | ||
|
|
d38f2cb380 | ||
|
|
56aaee5558 | ||
|
|
d72b4752c9 | ||
|
|
ea769c6483 | ||
|
|
ec194fa3d4 | ||
|
|
b877039859 | ||
|
|
54634f26d2 | ||
|
|
3bef91a2cd | ||
|
|
7da45ba589 | ||
|
|
e0232c67cc | ||
|
|
1dc4a229d4 | ||
|
|
0e0bada1f3 | ||
|
|
5366a814f9 | ||
|
|
f1240a22db | ||
|
|
66f35c2b7e | ||
|
|
766ee48531 | ||
|
|
083045f45c | ||
|
|
fe237802c9 | ||
|
|
00b923651f | ||
|
|
24fce3cc64 | ||
|
|
8ba969f67d | ||
|
|
6844d59371 | ||
|
|
fe5529db85 | ||
|
|
d89034d913 | ||
|
|
360fbeb108 | ||
|
|
e7c2fa1cfa | ||
|
|
735f09d977 | ||
|
|
f83a5e3e49 | ||
|
|
01a8d4efcc | ||
|
|
fdb1e649d4 | ||
|
|
0856792a57 | ||
|
|
0e33a3aa5f | ||
|
|
d3895bcd6b | ||
|
|
eeb390650b | ||
|
|
ca19bd31d4 | ||
|
|
413dfd5628 | ||
|
|
f9515901cc | ||
|
|
3f42fabff8 | ||
|
|
1caa578771 | ||
|
|
b7c11c1818 | ||
|
|
3eb3db0663 | ||
|
|
be46f32056 | ||
|
|
6e5c915f96 | ||
|
|
04d13a8116 | ||
|
|
e638ede3f2 | ||
|
|
2348abe4bf | ||
|
|
f7e7a399d9 | ||
|
|
ba91f34636 | ||
|
|
16865d43a8 | ||
|
|
0d13aee15c | ||
|
|
49b4144ffd | ||
|
|
186e2d972e | ||
|
|
40dd63ecef | ||
|
|
6d66d6da15 | ||
|
|
03ec3513f3 | ||
|
|
87763fc234 | ||
|
|
f6c44cae2e | ||
|
|
da2ee04fce | ||
|
|
7673c36af3 | ||
|
|
9457b2af2f | ||
|
|
7203991032 | ||
|
|
5a685f7156 | ||
|
|
a6a25030ad | ||
|
|
00458a31d5 | ||
|
|
c6ddf6d6cc | ||
|
|
34b21b3065 | ||
|
|
8fbb355cd2 | ||
|
|
e8b3b7e578 | ||
|
|
59ca44f493 | ||
|
|
9e1457c2c3 | ||
|
|
fac83e14bc | ||
|
|
a97cec57e4 | ||
|
|
38c10b47d3 | ||
|
|
1a2523fd15 | ||
|
|
03243cb422 | ||
|
|
2ad7ee0344 |
2
.github/workflows/build-push.yml
vendored
2
.github/workflows/build-push.yml
vendored
@@ -5,6 +5,8 @@ on:
|
||||
branches:
|
||||
- "main"
|
||||
- "deploy/dev"
|
||||
- "deploy/enterprise"
|
||||
- "e-260"
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
|
||||
29
.github/workflows/deploy-enterprise.yml
vendored
Normal file
29
.github/workflows/deploy-enterprise.yml
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
name: Deploy Enterprise
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows: ["Build and Push API & Web"]
|
||||
branches:
|
||||
- "deploy/enterprise"
|
||||
types:
|
||||
- completed
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
if: |
|
||||
github.event.workflow_run.conclusion == 'success' &&
|
||||
github.event.workflow_run.head_branch == 'deploy/enterprise'
|
||||
|
||||
steps:
|
||||
- name: Deploy to server
|
||||
uses: appleboy/ssh-action@v0.1.8
|
||||
with:
|
||||
host: ${{ secrets.ENTERPRISE_SSH_HOST }}
|
||||
username: ${{ secrets.ENTERPRISE_SSH_USER }}
|
||||
password: ${{ secrets.ENTERPRISE_SSH_PASSWORD }}
|
||||
script: |
|
||||
${{ vars.ENTERPRISE_SSH_SCRIPT || secrets.ENTERPRISE_SSH_SCRIPT }}
|
||||
47
.github/workflows/docker-build.yml
vendored
Normal file
47
.github/workflows/docker-build.yml
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
name: Build docker image
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- "main"
|
||||
paths:
|
||||
- api/Dockerfile
|
||||
- web/Dockerfile
|
||||
|
||||
concurrency:
|
||||
group: docker-build-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build-docker:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- service_name: "api-amd64"
|
||||
platform: linux/amd64
|
||||
context: "api"
|
||||
- service_name: "api-arm64"
|
||||
platform: linux/arm64
|
||||
context: "api"
|
||||
- service_name: "web-amd64"
|
||||
platform: linux/amd64
|
||||
context: "web"
|
||||
- service_name: "web-arm64"
|
||||
platform: linux/arm64
|
||||
context: "web"
|
||||
steps:
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build Docker Image
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
push: false
|
||||
context: "{{defaultContext}}:${{ matrix.context }}"
|
||||
platforms: ${{ matrix.platform }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
3
.markdownlint.json
Normal file
3
.markdownlint.json
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"MD024": false
|
||||
}
|
||||
32
CHANGELOG.md
Normal file
32
CHANGELOG.md
Normal file
@@ -0,0 +1,32 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to Dify will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [0.15.7] - 2025-04-27
|
||||
|
||||
### Added
|
||||
|
||||
- Added support for GPT-4.1 in model providers (#18912)
|
||||
- Added support for Amazon Bedrock DeepSeek-R1 model (#18908)
|
||||
- Added support for Amazon Bedrock Claude Sonnet 3.7 model (#18788)
|
||||
- Refined version compatibility logic in app DSL service
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed issue with creating apps from template categories (#18807, #18868)
|
||||
- Fixed DSL version check when creating apps from explore templates (#18872, #18878)
|
||||
|
||||
## [0.15.6] - 2025-04-22
|
||||
|
||||
### Security
|
||||
|
||||
- Fixed clickjacking vulnerability (#18552)
|
||||
- Fixed reset password security issue (#18366)
|
||||
- Updated reset password token when email code verification succeeds (#18362)
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed Vertex AI Gemini 2.0 Flash 001 schema (#18405)
|
||||
@@ -25,6 +25,9 @@
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="follow on X(Twitter)"></a>
|
||||
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
|
||||
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
|
||||
alt="follow on LinkedIn"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
||||
@@ -21,6 +21,9 @@
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="follow on X(Twitter)"></a>
|
||||
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
|
||||
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
|
||||
alt="follow on LinkedIn"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
||||
@@ -21,6 +21,9 @@
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="follow on X(Twitter)"></a>
|
||||
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
|
||||
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
|
||||
alt="follow on LinkedIn"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
||||
@@ -21,6 +21,9 @@
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="seguir en X(Twitter)"></a>
|
||||
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
|
||||
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
|
||||
alt="seguir en LinkedIn"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Descargas de Docker" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
||||
@@ -21,6 +21,9 @@
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="suivre sur X(Twitter)"></a>
|
||||
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
|
||||
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
|
||||
alt="suivre sur LinkedIn"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Tirages Docker" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
||||
@@ -21,6 +21,9 @@
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="X(Twitter)でフォロー"></a>
|
||||
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
|
||||
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
|
||||
alt="LinkedInでフォロー"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
||||
@@ -21,6 +21,9 @@
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="follow on X(Twitter)"></a>
|
||||
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
|
||||
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
|
||||
alt="follow on LinkedIn"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
||||
@@ -21,6 +21,9 @@
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="follow on X(Twitter)"></a>
|
||||
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
|
||||
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
|
||||
alt="follow on LinkedIn"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
||||
@@ -25,6 +25,9 @@
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="follow on X(Twitter)"></a>
|
||||
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
|
||||
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
|
||||
alt="follow on LinkedIn"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
||||
@@ -22,6 +22,9 @@
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="follow on X(Twitter)"></a>
|
||||
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
|
||||
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
|
||||
alt="follow on LinkedIn"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
||||
@@ -21,6 +21,9 @@
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="X(Twitter)'da takip et"></a>
|
||||
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
|
||||
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
|
||||
alt="LinkedIn'da takip et"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Çekmeleri" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
@@ -62,8 +65,6 @@ Görsel bir arayüz üzerinde güçlü AI iş akışları oluşturun ve test edi
|
||||

|
||||
|
||||
|
||||
Özür dilerim, haklısınız. Daha anlamlı ve akıcı bir çeviri yapmaya çalışayım. İşte güncellenmiş çeviri:
|
||||
|
||||
**3. Prompt IDE**:
|
||||
Komut istemlerini oluşturmak, model performansını karşılaştırmak ve sohbet tabanlı uygulamalara metin-konuşma gibi ek özellikler eklemek için kullanıcı dostu bir arayüz.
|
||||
|
||||
@@ -150,8 +151,6 @@ Görsel bir arayüz üzerinde güçlü AI iş akışları oluşturun ve test edi
|
||||
## Dify'ı Kullanma
|
||||
|
||||
- **Cloud </br>**
|
||||
İşte verdiğiniz metnin Türkçe çevirisi, kod bloğu içinde:
|
||||
-
|
||||
Herkesin sıfır kurulumla denemesi için bir [Dify Cloud](https://dify.ai) hizmeti sunuyoruz. Bu hizmet, kendi kendine dağıtılan versiyonun tüm yeteneklerini sağlar ve sandbox planında 200 ücretsiz GPT-4 çağrısı içerir.
|
||||
|
||||
- **Dify Topluluk Sürümünü Kendi Sunucunuzda Barındırma</br>**
|
||||
@@ -177,8 +176,6 @@ GitHub'da Dify'a yıldız verin ve yeni sürümlerden anında haberdar olun.
|
||||
>- RAM >= 4GB
|
||||
|
||||
</br>
|
||||
İşte verdiğiniz metnin Türkçe çevirisi, kod bloğu içinde:
|
||||
|
||||
Dify sunucusunu başlatmanın en kolay yolu, [docker-compose.yml](docker/docker-compose.yaml) dosyamızı çalıştırmaktır. Kurulum komutunu çalıştırmadan önce, makinenizde [Docker](https://docs.docker.com/get-docker/) ve [Docker Compose](https://docs.docker.com/compose/install/)'un kurulu olduğundan emin olun:
|
||||
|
||||
```bash
|
||||
|
||||
@@ -21,6 +21,9 @@
|
||||
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||
alt="theo dõi trên X(Twitter)"></a>
|
||||
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
|
||||
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
|
||||
alt="theo dõi trên LinkedIn"></a>
|
||||
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||
|
||||
@@ -430,4 +430,7 @@ CREATE_TIDB_SERVICE_JOB_ENABLED=false
|
||||
# Maximum number of submitted thread count in a ThreadPool for parallel node execution
|
||||
MAX_SUBMIT_COUNT=100
|
||||
# Lockout duration in seconds
|
||||
LOGIN_LOCKOUT_DURATION=86400
|
||||
LOGIN_LOCKOUT_DURATION=86400
|
||||
|
||||
# Prevent Clickjacking
|
||||
ALLOW_EMBED=false
|
||||
@@ -48,18 +48,18 @@ ENV TZ=UTC
|
||||
|
||||
WORKDIR /app/api
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends curl nodejs libgmp-dev libmpfr-dev libmpc-dev \
|
||||
# if you located in China, you can use aliyun mirror to speed up
|
||||
# && echo "deb http://mirrors.aliyun.com/debian testing main" > /etc/apt/sources.list \
|
||||
&& echo "deb http://deb.debian.org/debian bookworm main" > /etc/apt/sources.list \
|
||||
&& apt-get update \
|
||||
# For Security
|
||||
&& apt-get install -y --no-install-recommends expat libldap-2.5-0 perl libsqlite3-0 zlib1g \
|
||||
# install a chinese font to support the use of tools like matplotlib
|
||||
&& apt-get install -y fonts-noto-cjk \
|
||||
# install libmagic to support the use of python-magic guess MIMETYPE
|
||||
&& apt-get install -y libmagic1 \
|
||||
RUN \
|
||||
apt-get update \
|
||||
# Install dependencies
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
# basic environment
|
||||
curl nodejs libgmp-dev libmpfr-dev libmpc-dev \
|
||||
# For Security
|
||||
expat libldap-2.5-0 perl libsqlite3-0 zlib1g \
|
||||
# install a chinese font to support the use of tools like matplotlib
|
||||
fonts-noto-cjk \
|
||||
# install libmagic to support the use of python-magic guess MIMETYPE
|
||||
libmagic1 \
|
||||
&& apt-get autoremove -y \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
@@ -78,7 +78,6 @@ COPY . /app/api/
|
||||
COPY docker/entrypoint.sh /entrypoint.sh
|
||||
RUN chmod +x /entrypoint.sh
|
||||
|
||||
|
||||
ARG COMMIT_SHA
|
||||
ENV COMMIT_SHA=${COMMIT_SHA}
|
||||
|
||||
|
||||
@@ -1,9 +1,40 @@
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import Field, NonNegativeInt
|
||||
from pydantic import Field, NonNegativeInt, computed_field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class HostedCreditConfig(BaseSettings):
|
||||
HOSTED_MODEL_CREDIT_CONFIG: str = Field(
|
||||
description="Model credit configuration in format 'model:credits,model:credits', e.g., 'gpt-4:20,gpt-4o:10'",
|
||||
default="",
|
||||
)
|
||||
|
||||
def get_model_credits(self, model_name: str) -> int:
|
||||
"""
|
||||
Get credit value for a specific model name.
|
||||
Returns 1 if model is not found in configuration (default credit).
|
||||
|
||||
:param model_name: The name of the model to search for
|
||||
:return: The credit value for the model
|
||||
"""
|
||||
if not self.HOSTED_MODEL_CREDIT_CONFIG:
|
||||
return 1
|
||||
|
||||
try:
|
||||
credit_map = dict(
|
||||
item.strip().split(":", 1) for item in self.HOSTED_MODEL_CREDIT_CONFIG.split(",") if ":" in item
|
||||
)
|
||||
|
||||
# Search for matching model pattern
|
||||
for pattern, credit in credit_map.items():
|
||||
if pattern.strip() == model_name:
|
||||
return int(credit)
|
||||
return 1 # Default quota if no match found
|
||||
except (ValueError, AttributeError):
|
||||
return 1 # Return default quota if parsing fails
|
||||
|
||||
|
||||
class HostedOpenAiConfig(BaseSettings):
|
||||
"""
|
||||
Configuration for hosted OpenAI service
|
||||
@@ -202,5 +233,7 @@ class HostedServiceConfig(
|
||||
HostedZhipuAIConfig,
|
||||
# moderation
|
||||
HostedModerationConfig,
|
||||
# credit config
|
||||
HostedCreditConfig,
|
||||
):
|
||||
pass
|
||||
|
||||
@@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):
|
||||
|
||||
CURRENT_VERSION: str = Field(
|
||||
description="Dify version",
|
||||
default="0.15.2",
|
||||
default="0.15.7",
|
||||
)
|
||||
|
||||
COMMIT_SHA: str = Field(
|
||||
|
||||
@@ -2,30 +2,28 @@ import uuid
|
||||
from typing import cast
|
||||
|
||||
from flask_login import current_user # type: ignore
|
||||
from flask_restful import Resource, inputs, marshal, marshal_with, reqparse # type: ignore
|
||||
from flask_restful import (Resource, inputs, marshal, # type: ignore
|
||||
marshal_with, reqparse)
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
from werkzeug.exceptions import BadRequest, Forbidden, abort
|
||||
|
||||
from controllers.console import api
|
||||
from controllers.console.app.wraps import get_app_model
|
||||
from controllers.console.wraps import (
|
||||
account_initialization_required,
|
||||
cloud_edition_billing_resource_check,
|
||||
enterprise_license_required,
|
||||
setup_required,
|
||||
)
|
||||
from controllers.console.wraps import (account_initialization_required,
|
||||
cloud_edition_billing_resource_check,
|
||||
enterprise_license_required,
|
||||
setup_required)
|
||||
from core.ops.ops_trace_manager import OpsTraceManager
|
||||
from extensions.ext_database import db
|
||||
from fields.app_fields import (
|
||||
app_detail_fields,
|
||||
app_detail_fields_with_site,
|
||||
app_pagination_fields,
|
||||
)
|
||||
from fields.app_fields import (app_detail_fields, app_detail_fields_with_site,
|
||||
app_pagination_fields)
|
||||
from libs.login import login_required
|
||||
from models import Account, App
|
||||
from services.app_dsl_service import AppDslService, ImportMode
|
||||
from services.app_service import AppService
|
||||
from services.enterprise.enterprise_service import EnterpriseService
|
||||
from services.feature_service import FeatureService
|
||||
|
||||
ALLOW_CREATE_APP_MODES = ["chat", "agent-chat", "advanced-chat", "workflow", "completion"]
|
||||
|
||||
@@ -67,7 +65,17 @@ class AppListApi(Resource):
|
||||
if not app_pagination:
|
||||
return {"data": [], "total": 0, "page": 1, "limit": 20, "has_more": False}
|
||||
|
||||
return marshal(app_pagination, app_pagination_fields)
|
||||
if FeatureService.get_system_features().webapp_auth.enabled:
|
||||
app_ids = [str(app.id) for app in app_pagination.items]
|
||||
res = EnterpriseService.WebAppAuth.batch_get_app_access_mode_by_id(app_ids=app_ids)
|
||||
if len(res) != len(app_ids):
|
||||
raise BadRequest("Invalid app id in webapp auth")
|
||||
|
||||
for app in app_pagination.items:
|
||||
if str(app.id) in res:
|
||||
app.access_mode = res[str(app.id)].access_mode
|
||||
|
||||
return marshal(app_pagination, app_pagination_fields), 200
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@@ -111,6 +119,10 @@ class AppApi(Resource):
|
||||
|
||||
app_model = app_service.get_app(app_model)
|
||||
|
||||
if FeatureService.get_system_features().webapp_auth.enabled:
|
||||
app_setting = EnterpriseService.WebAppAuth.get_app_access_mode_by_id(app_id=str(app_model.id))
|
||||
app_model.access_mode = app_setting.access_mode
|
||||
|
||||
return app_model
|
||||
|
||||
@setup_required
|
||||
|
||||
@@ -8,7 +8,7 @@ from constants.languages import languages
|
||||
from controllers.console import api
|
||||
from controllers.console.auth.error import EmailCodeError, InvalidEmailError, InvalidTokenError, PasswordMismatchError
|
||||
from controllers.console.error import AccountInFreezeError, AccountNotFound, EmailSendIpLimitError
|
||||
from controllers.console.wraps import setup_required
|
||||
from controllers.console.wraps import email_password_login_enabled, setup_required
|
||||
from events.tenant_event import tenant_was_created
|
||||
from extensions.ext_database import db
|
||||
from libs.helper import email, extract_remote_ip
|
||||
@@ -16,12 +16,13 @@ from libs.password import hash_password, valid_password
|
||||
from models.account import Account
|
||||
from services.account_service import AccountService, TenantService
|
||||
from services.errors.account import AccountRegisterError
|
||||
from services.errors.workspace import WorkSpaceNotAllowedCreateError
|
||||
from services.errors.workspace import WorkSpaceNotAllowedCreateError, WorkspacesLimitExceededError
|
||||
from services.feature_service import FeatureService
|
||||
|
||||
|
||||
class ForgotPasswordSendEmailApi(Resource):
|
||||
@setup_required
|
||||
@email_password_login_enabled
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("email", type=email, required=True, location="json")
|
||||
@@ -53,6 +54,7 @@ class ForgotPasswordSendEmailApi(Resource):
|
||||
|
||||
class ForgotPasswordCheckApi(Resource):
|
||||
@setup_required
|
||||
@email_password_login_enabled
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("email", type=str, required=True, location="json")
|
||||
@@ -72,11 +74,20 @@ class ForgotPasswordCheckApi(Resource):
|
||||
if args["code"] != token_data.get("code"):
|
||||
raise EmailCodeError()
|
||||
|
||||
return {"is_valid": True, "email": token_data.get("email")}
|
||||
# Verified, revoke the first token
|
||||
AccountService.revoke_reset_password_token(args["token"])
|
||||
|
||||
# Refresh token data by generating a new token
|
||||
_, new_token = AccountService.generate_reset_password_token(
|
||||
user_email, code=args["code"], additional_data={"phase": "reset"}
|
||||
)
|
||||
|
||||
return {"is_valid": True, "email": token_data.get("email"), "token": new_token}
|
||||
|
||||
|
||||
class ForgotPasswordResetApi(Resource):
|
||||
@setup_required
|
||||
@email_password_login_enabled
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("token", type=str, required=True, nullable=False, location="json")
|
||||
@@ -95,6 +106,9 @@ class ForgotPasswordResetApi(Resource):
|
||||
|
||||
if reset_data is None:
|
||||
raise InvalidTokenError()
|
||||
# Must use token in reset phase
|
||||
if reset_data.get("phase", "") != "reset":
|
||||
raise InvalidTokenError()
|
||||
|
||||
AccountService.revoke_reset_password_token(token)
|
||||
|
||||
@@ -127,6 +141,8 @@ class ForgotPasswordResetApi(Resource):
|
||||
pass
|
||||
except AccountRegisterError as are:
|
||||
raise AccountInFreezeError()
|
||||
except WorkspacesLimitExceededError:
|
||||
pass
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
|
||||
@@ -21,8 +21,9 @@ from controllers.console.error import (
|
||||
AccountNotFound,
|
||||
EmailSendIpLimitError,
|
||||
NotAllowedCreateWorkspace,
|
||||
WorkspacesLimitExceeded,
|
||||
)
|
||||
from controllers.console.wraps import setup_required
|
||||
from controllers.console.wraps import email_password_login_enabled, setup_required
|
||||
from events.tenant_event import tenant_was_created
|
||||
from libs.helper import email, extract_remote_ip
|
||||
from libs.password import valid_password
|
||||
@@ -30,7 +31,7 @@ from models.account import Account
|
||||
from services.account_service import AccountService, RegisterService, TenantService
|
||||
from services.billing_service import BillingService
|
||||
from services.errors.account import AccountRegisterError
|
||||
from services.errors.workspace import WorkSpaceNotAllowedCreateError
|
||||
from services.errors.workspace import WorkSpaceNotAllowedCreateError, WorkspacesLimitExceededError
|
||||
from services.feature_service import FeatureService
|
||||
|
||||
|
||||
@@ -38,6 +39,7 @@ class LoginApi(Resource):
|
||||
"""Resource for user login."""
|
||||
|
||||
@setup_required
|
||||
@email_password_login_enabled
|
||||
def post(self):
|
||||
"""Authenticate user and login."""
|
||||
parser = reqparse.RequestParser()
|
||||
@@ -87,10 +89,15 @@ class LoginApi(Resource):
|
||||
# SELF_HOSTED only have one workspace
|
||||
tenants = TenantService.get_join_tenants(account)
|
||||
if len(tenants) == 0:
|
||||
return {
|
||||
"result": "fail",
|
||||
"data": "workspace not found, please contact system admin to invite you to join in a workspace",
|
||||
}
|
||||
system_features = FeatureService.get_system_features()
|
||||
|
||||
if system_features.is_allow_create_workspace and not system_features.license.workspaces.is_available():
|
||||
raise WorkspacesLimitExceeded()
|
||||
else:
|
||||
return {
|
||||
"result": "fail",
|
||||
"data": "workspace not found, please contact system admin to invite you to join in a workspace",
|
||||
}
|
||||
|
||||
token_pair = AccountService.login(account=account, ip_address=extract_remote_ip(request))
|
||||
AccountService.reset_login_error_rate_limit(args["email"])
|
||||
@@ -110,6 +117,7 @@ class LogoutApi(Resource):
|
||||
|
||||
class ResetPasswordSendEmailApi(Resource):
|
||||
@setup_required
|
||||
@email_password_login_enabled
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("email", type=email, required=True, location="json")
|
||||
@@ -196,6 +204,9 @@ class EmailCodeLoginApi(Resource):
|
||||
if account:
|
||||
tenant = TenantService.get_join_tenants(account)
|
||||
if not tenant:
|
||||
workspaces = FeatureService.get_system_features().license.workspaces
|
||||
if not workspaces.is_available():
|
||||
raise WorkspacesLimitExceeded()
|
||||
if not FeatureService.get_system_features().is_allow_create_workspace:
|
||||
raise NotAllowedCreateWorkspace()
|
||||
else:
|
||||
@@ -213,6 +224,8 @@ class EmailCodeLoginApi(Resource):
|
||||
return NotAllowedCreateWorkspace()
|
||||
except AccountRegisterError as are:
|
||||
raise AccountInFreezeError()
|
||||
except WorkspacesLimitExceededError:
|
||||
raise WorkspacesLimitExceeded()
|
||||
token_pair = AccountService.login(account, ip_address=extract_remote_ip(request))
|
||||
AccountService.reset_login_error_rate_limit(args["email"])
|
||||
return {"result": "success", "data": token_pair.model_dump()}
|
||||
|
||||
@@ -46,6 +46,18 @@ class NotAllowedCreateWorkspace(BaseHTTPException):
|
||||
code = 400
|
||||
|
||||
|
||||
class WorkspaceMembersLimitExceeded(BaseHTTPException):
|
||||
error_code = "limit_exceeded"
|
||||
description = "Unable to add member because the maximum workspace's member limit was exceeded"
|
||||
code = 400
|
||||
|
||||
|
||||
class WorkspacesLimitExceeded(BaseHTTPException):
|
||||
error_code = "limit_exceeded"
|
||||
description = "Unable to create workspace because the maximum workspace limit was exceeded"
|
||||
code = 400
|
||||
|
||||
|
||||
class AccountBannedError(BaseHTTPException):
|
||||
error_code = "account_banned"
|
||||
description = "Account is banned."
|
||||
|
||||
@@ -23,3 +23,9 @@ class AppSuggestedQuestionsAfterAnswerDisabledError(BaseHTTPException):
|
||||
error_code = "app_suggested_questions_after_answer_disabled"
|
||||
description = "Function Suggested questions after answer disabled."
|
||||
code = 403
|
||||
|
||||
|
||||
class AppAccessDeniedError(BaseHTTPException):
|
||||
error_code = "access_denied"
|
||||
description = "App access denied."
|
||||
code = 403
|
||||
|
||||
@@ -1,20 +1,26 @@
|
||||
import logging
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
from flask import request
|
||||
from flask_login import current_user # type: ignore
|
||||
from flask_restful import Resource, inputs, marshal_with, reqparse # type: ignore
|
||||
from flask_restful import (Resource, inputs, marshal_with, # type: ignore
|
||||
reqparse)
|
||||
from sqlalchemy import and_
|
||||
from werkzeug.exceptions import BadRequest, Forbidden, NotFound
|
||||
|
||||
from controllers.console import api
|
||||
from controllers.console.explore.wraps import InstalledAppResource
|
||||
from controllers.console.wraps import account_initialization_required, cloud_edition_billing_resource_check
|
||||
from controllers.console.wraps import (account_initialization_required,
|
||||
cloud_edition_billing_resource_check)
|
||||
from extensions.ext_database import db
|
||||
from fields.installed_app_fields import installed_app_list_fields
|
||||
from libs.login import login_required
|
||||
from models import App, InstalledApp, RecommendedApp
|
||||
from services.account_service import TenantService
|
||||
from services.app_service import AppService
|
||||
from services.enterprise.enterprise_service import EnterpriseService
|
||||
from services.feature_service import FeatureService
|
||||
|
||||
|
||||
class InstalledAppsListApi(Resource):
|
||||
@@ -48,6 +54,23 @@ class InstalledAppsListApi(Resource):
|
||||
for installed_app in installed_apps
|
||||
if installed_app.app is not None
|
||||
]
|
||||
|
||||
# filter out apps that user doesn't have access to
|
||||
if FeatureService.get_system_features().webapp_auth.enabled:
|
||||
user_id = current_user.id
|
||||
res = []
|
||||
for installed_app in installed_app_list:
|
||||
app_code = AppService.get_app_code_by_id(str(installed_app["app"].id))
|
||||
if EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(
|
||||
user_id=user_id,
|
||||
app_code=app_code,
|
||||
):
|
||||
res.append(installed_app)
|
||||
installed_app_list = res
|
||||
logging.info(
|
||||
f"installed_app_list: {installed_app_list}, user_id: {user_id}"
|
||||
)
|
||||
|
||||
installed_app_list.sort(
|
||||
key=lambda app: (
|
||||
-app["is_pinned"],
|
||||
|
||||
@@ -4,10 +4,14 @@ from flask_login import current_user # type: ignore
|
||||
from flask_restful import Resource # type: ignore
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from controllers.console.explore.error import AppAccessDeniedError
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from extensions.ext_database import db
|
||||
from libs.login import login_required
|
||||
from models import InstalledApp
|
||||
from services.app_service import AppService
|
||||
from services.enterprise.enterprise_service import EnterpriseService
|
||||
from services.feature_service import FeatureService
|
||||
|
||||
|
||||
def installed_app_required(view=None):
|
||||
@@ -48,6 +52,30 @@ def installed_app_required(view=None):
|
||||
return decorator
|
||||
|
||||
|
||||
def user_allowed_to_access_app(view=None):
|
||||
def decorator(view):
|
||||
@wraps(view)
|
||||
def decorated(installed_app: InstalledApp, *args, **kwargs):
|
||||
feature = FeatureService.get_system_features()
|
||||
if feature.webapp_auth.enabled:
|
||||
app_id = installed_app.app_id
|
||||
app_code = AppService.get_app_code_by_id(app_id)
|
||||
res = EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(
|
||||
user_id=str(current_user.id),
|
||||
app_code=app_code,
|
||||
)
|
||||
if not res:
|
||||
raise AppAccessDeniedError()
|
||||
|
||||
return view(installed_app, *args, **kwargs)
|
||||
|
||||
return decorated
|
||||
if view:
|
||||
return decorator(view)
|
||||
return decorator
|
||||
|
||||
|
||||
class InstalledAppResource(Resource):
|
||||
# must be reversed if there are multiple decorators
|
||||
method_decorators = [installed_app_required, account_initialization_required, login_required]
|
||||
|
||||
method_decorators = [user_allowed_to_access_app, installed_app_required, account_initialization_required, login_required]
|
||||
|
||||
@@ -6,6 +6,7 @@ from flask_restful import Resource, abort, marshal_with, reqparse # type: ignor
|
||||
import services
|
||||
from configs import dify_config
|
||||
from controllers.console import api
|
||||
from controllers.console.error import WorkspaceMembersLimitExceeded
|
||||
from controllers.console.wraps import (
|
||||
account_initialization_required,
|
||||
cloud_edition_billing_resource_check,
|
||||
@@ -17,6 +18,7 @@ from libs.login import login_required
|
||||
from models.account import Account, TenantAccountRole
|
||||
from services.account_service import RegisterService, TenantService
|
||||
from services.errors.account import AccountAlreadyInTenantError
|
||||
from services.feature_service import FeatureService
|
||||
|
||||
|
||||
class MemberListApi(Resource):
|
||||
@@ -54,6 +56,12 @@ class MemberInviteEmailApi(Resource):
|
||||
inviter = current_user
|
||||
invitation_results = []
|
||||
console_web_url = dify_config.CONSOLE_WEB_URL
|
||||
|
||||
workspace_members = FeatureService.get_features(tenant_id=inviter.current_tenant.id).workspace_members
|
||||
|
||||
if not workspace_members.is_available(len(invitee_emails)):
|
||||
raise WorkspaceMembersLimitExceeded()
|
||||
|
||||
for invitee_email in invitee_emails:
|
||||
try:
|
||||
token = RegisterService.invite_new_member(
|
||||
|
||||
@@ -39,6 +39,17 @@ def only_edition_cloud(view):
|
||||
return decorated
|
||||
|
||||
|
||||
def only_enterprise_edition(view):
|
||||
@wraps(view)
|
||||
def decorated(*args, **kwargs):
|
||||
if not dify_config.ENTERPRISE_ENABLED:
|
||||
abort(404)
|
||||
|
||||
return view(*args, **kwargs)
|
||||
|
||||
return decorated
|
||||
|
||||
|
||||
def only_edition_self_hosted(view):
|
||||
@wraps(view)
|
||||
def decorated(*args, **kwargs):
|
||||
@@ -154,3 +165,16 @@ def enterprise_license_required(view):
|
||||
return view(*args, **kwargs)
|
||||
|
||||
return decorated
|
||||
|
||||
|
||||
def email_password_login_enabled(view):
|
||||
@wraps(view)
|
||||
def decorated(*args, **kwargs):
|
||||
features = FeatureService.get_system_features()
|
||||
if features.enable_email_password_login:
|
||||
return view(*args, **kwargs)
|
||||
|
||||
# otherwise, return 403
|
||||
abort(403)
|
||||
|
||||
return decorated
|
||||
|
||||
@@ -5,4 +5,5 @@ from libs.external_api import ExternalApi
|
||||
bp = Blueprint("inner_api", __name__, url_prefix="/inner/api")
|
||||
api = ExternalApi(bp)
|
||||
|
||||
from . import mail
|
||||
from .workspace import workspace
|
||||
|
||||
27
api/controllers/inner_api/mail.py
Normal file
27
api/controllers/inner_api/mail.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from flask_restful import (
|
||||
Resource, # type: ignore
|
||||
reqparse,
|
||||
)
|
||||
|
||||
from controllers.console.wraps import setup_required
|
||||
from controllers.inner_api import api
|
||||
from controllers.inner_api.wraps import inner_api_only
|
||||
from services.enterprise.mail_service import DifyMail, EnterpriseMailService
|
||||
|
||||
|
||||
class EnterpriseMail(Resource):
|
||||
@setup_required
|
||||
@inner_api_only
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("to", type=str, action="append", required=True)
|
||||
parser.add_argument("subject", type=str, required=True)
|
||||
parser.add_argument("body", type=str, required=True)
|
||||
parser.add_argument("substitutions", type=dict, required=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
EnterpriseMailService.send_mail(DifyMail(**args))
|
||||
return {"message": "success"}, 200
|
||||
|
||||
|
||||
api.add_resource(EnterpriseMail, "/enterprise/mail")
|
||||
@@ -1,12 +1,16 @@
|
||||
from flask_restful import marshal_with # type: ignore
|
||||
|
||||
from flask import request
|
||||
from flask_restful import Resource, marshal_with, reqparse # type: ignore
|
||||
|
||||
from controllers.common import fields
|
||||
from controllers.common import helpers as controller_helpers
|
||||
from controllers.web import api
|
||||
from controllers.web.error import AppUnavailableError
|
||||
from controllers.web.wraps import WebApiResource
|
||||
from libs.passport import PassportService
|
||||
from models.model import App, AppMode
|
||||
from services.app_service import AppService
|
||||
from services.enterprise.enterprise_service import EnterpriseService
|
||||
|
||||
|
||||
class AppParameterApi(WebApiResource):
|
||||
@@ -42,5 +46,51 @@ class AppMeta(WebApiResource):
|
||||
return AppService().get_app_meta(app_model)
|
||||
|
||||
|
||||
class AppAccessMode(Resource):
|
||||
def get(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("appId", type=str, required=True, location="args")
|
||||
args = parser.parse_args()
|
||||
|
||||
app_id = args["appId"]
|
||||
res = EnterpriseService.WebAppAuth.get_app_access_mode_by_id(app_id)
|
||||
|
||||
return {"accessMode": res.access_mode}
|
||||
|
||||
|
||||
class AppWebAuthPermission(Resource):
|
||||
def get(self):
|
||||
user_id = "visitor"
|
||||
try:
|
||||
auth_header = request.headers.get("Authorization")
|
||||
if auth_header is None:
|
||||
raise
|
||||
if " " not in auth_header:
|
||||
raise
|
||||
|
||||
auth_scheme, tk = auth_header.split(None, 1)
|
||||
auth_scheme = auth_scheme.lower()
|
||||
if auth_scheme != "bearer":
|
||||
raise
|
||||
|
||||
decoded = PassportService().verify(tk)
|
||||
user_id = decoded.get("user_id", "visitor")
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("appId", type=str, required=True, location="args")
|
||||
args = parser.parse_args()
|
||||
|
||||
app_id = args["appId"]
|
||||
app_code = AppService.get_app_code_by_id(app_id)
|
||||
|
||||
res = EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(str(user_id), app_code)
|
||||
return {"result": res}
|
||||
|
||||
|
||||
api.add_resource(AppParameterApi, "/parameters")
|
||||
api.add_resource(AppMeta, "/meta")
|
||||
# webapp auth apis
|
||||
api.add_resource(AppAccessMode, "/webapp/access-mode")
|
||||
api.add_resource(AppWebAuthPermission, "/webapp/permission")
|
||||
|
||||
@@ -121,9 +121,15 @@ class UnsupportedFileTypeError(BaseHTTPException):
|
||||
code = 415
|
||||
|
||||
|
||||
class WebSSOAuthRequiredError(BaseHTTPException):
|
||||
class WebAppAuthRequiredError(BaseHTTPException):
|
||||
error_code = "web_sso_auth_required"
|
||||
description = "Web SSO authentication required."
|
||||
description = "Web app authentication required."
|
||||
code = 401
|
||||
|
||||
|
||||
class WebAppAuthAccessDeniedError(BaseHTTPException):
|
||||
error_code = "web_app_access_denied"
|
||||
description = "You do not have permission to access this web app."
|
||||
code = 401
|
||||
|
||||
|
||||
|
||||
121
api/controllers/web/login.py
Normal file
121
api/controllers/web/login.py
Normal file
@@ -0,0 +1,121 @@
|
||||
from flask import request
|
||||
from flask_restful import Resource, reqparse
|
||||
from jwt import InvalidTokenError # type: ignore
|
||||
from web import api
|
||||
from werkzeug.exceptions import BadRequest
|
||||
|
||||
import services
|
||||
from controllers.console.auth.error import EmailCodeError, EmailOrPasswordMismatchError, InvalidEmailError
|
||||
from controllers.console.error import AccountBannedError, AccountNotFound
|
||||
from controllers.console.wraps import setup_required
|
||||
from libs.helper import email
|
||||
from libs.password import valid_password
|
||||
from services.account_service import AccountService
|
||||
from services.webapp_auth_service import WebAppAuthService
|
||||
|
||||
|
||||
class LoginApi(Resource):
|
||||
"""Resource for web app email/password login."""
|
||||
|
||||
def post(self):
|
||||
"""Authenticate user and login."""
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("email", type=email, required=True, location="json")
|
||||
parser.add_argument("password", type=valid_password, required=True, location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
app_code = request.headers.get("X-App-Code")
|
||||
if app_code is None:
|
||||
raise BadRequest("X-App-Code header is missing.")
|
||||
|
||||
try:
|
||||
account = WebAppAuthService.authenticate(args["email"], args["password"])
|
||||
except services.errors.account.AccountLoginError:
|
||||
raise AccountBannedError()
|
||||
except services.errors.account.AccountPasswordError:
|
||||
raise EmailOrPasswordMismatchError()
|
||||
except services.errors.account.AccountNotFoundError:
|
||||
raise AccountNotFound()
|
||||
|
||||
WebAppAuthService._validate_user_accessibility(account=account, app_code=app_code)
|
||||
|
||||
end_user = WebAppAuthService.create_end_user(email=args["email"], app_code=app_code)
|
||||
|
||||
token = WebAppAuthService.login(account=account, app_code=app_code, end_user_id=end_user.id)
|
||||
return {"result": "success", "token": token}
|
||||
|
||||
|
||||
# class LogoutApi(Resource):
|
||||
# @setup_required
|
||||
# def get(self):
|
||||
# account = cast(Account, flask_login.current_user)
|
||||
# if isinstance(account, flask_login.AnonymousUserMixin):
|
||||
# return {"result": "success"}
|
||||
# flask_login.logout_user()
|
||||
# return {"result": "success"}
|
||||
|
||||
|
||||
class EmailCodeLoginSendEmailApi(Resource):
|
||||
@setup_required
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("email", type=email, required=True, location="json")
|
||||
parser.add_argument("language", type=str, required=False, location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args["language"] is not None and args["language"] == "zh-Hans":
|
||||
language = "zh-Hans"
|
||||
else:
|
||||
language = "en-US"
|
||||
|
||||
account = WebAppAuthService.get_user_through_email(args["email"])
|
||||
if account is None:
|
||||
raise AccountNotFound()
|
||||
else:
|
||||
token = WebAppAuthService.send_email_code_login_email(account=account, language=language)
|
||||
|
||||
return {"result": "success", "data": token}
|
||||
|
||||
|
||||
class EmailCodeLoginApi(Resource):
|
||||
@setup_required
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("email", type=str, required=True, location="json")
|
||||
parser.add_argument("code", type=str, required=True, location="json")
|
||||
parser.add_argument("token", type=str, required=True, location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
user_email = args["email"]
|
||||
app_code = request.headers.get("X-App-Code")
|
||||
if app_code is None:
|
||||
raise BadRequest("X-App-Code header is missing.")
|
||||
|
||||
token_data = WebAppAuthService.get_email_code_login_data(args["token"])
|
||||
if token_data is None:
|
||||
raise InvalidTokenError()
|
||||
|
||||
if token_data["email"] != args["email"]:
|
||||
raise InvalidEmailError()
|
||||
|
||||
if token_data["code"] != args["code"]:
|
||||
raise EmailCodeError()
|
||||
|
||||
WebAppAuthService.revoke_email_code_login_token(args["token"])
|
||||
account = WebAppAuthService.get_user_through_email(user_email)
|
||||
if not account:
|
||||
raise AccountNotFound()
|
||||
|
||||
WebAppAuthService._validate_user_accessibility(account=account, app_code=app_code)
|
||||
|
||||
end_user = WebAppAuthService.create_end_user(email=user_email, app_code=app_code)
|
||||
|
||||
token = WebAppAuthService.login(account=account, app_code=app_code, end_user_id=end_user.id)
|
||||
AccountService.reset_login_error_rate_limit(args["email"])
|
||||
return {"result": "success", "token": token}
|
||||
|
||||
|
||||
api.add_resource(LoginApi, "/login")
|
||||
# api.add_resource(LogoutApi, "/logout")
|
||||
api.add_resource(EmailCodeLoginSendEmailApi, "/email-code-login")
|
||||
api.add_resource(EmailCodeLoginApi, "/email-code-login/validity")
|
||||
@@ -5,7 +5,7 @@ from flask_restful import Resource # type: ignore
|
||||
from werkzeug.exceptions import NotFound, Unauthorized
|
||||
|
||||
from controllers.web import api
|
||||
from controllers.web.error import WebSSOAuthRequiredError
|
||||
from controllers.web.error import WebAppAuthRequiredError
|
||||
from extensions.ext_database import db
|
||||
from libs.passport import PassportService
|
||||
from models.model import App, EndUser, Site
|
||||
@@ -22,10 +22,10 @@ class PassportResource(Resource):
|
||||
if app_code is None:
|
||||
raise Unauthorized("X-App-Code header is missing.")
|
||||
|
||||
if system_features.sso_enforced_for_web:
|
||||
app_web_sso_enabled = EnterpriseService.get_app_web_sso_enabled(app_code).get("enabled", False)
|
||||
if app_web_sso_enabled:
|
||||
raise WebSSOAuthRequiredError()
|
||||
if system_features.webapp_auth.enabled:
|
||||
app_settings = EnterpriseService.WebAppAuth.get_app_access_mode_by_code(app_code=app_code)
|
||||
if not app_settings or not app_settings.access_mode == "public":
|
||||
raise WebAppAuthRequiredError()
|
||||
|
||||
# get site from db and check if it is normal
|
||||
site = db.session.query(Site).filter(Site.code == app_code, Site.status == "normal").first()
|
||||
|
||||
@@ -4,7 +4,7 @@ from flask import request
|
||||
from flask_restful import Resource # type: ignore
|
||||
from werkzeug.exceptions import BadRequest, NotFound, Unauthorized
|
||||
|
||||
from controllers.web.error import WebSSOAuthRequiredError
|
||||
from controllers.web.error import WebAppAuthAccessDeniedError, WebAppAuthRequiredError
|
||||
from extensions.ext_database import db
|
||||
from libs.passport import PassportService
|
||||
from models.model import App, EndUser, Site
|
||||
@@ -57,35 +57,53 @@ def decode_jwt_token():
|
||||
if not end_user:
|
||||
raise NotFound()
|
||||
|
||||
_validate_web_sso_token(decoded, system_features, app_code)
|
||||
# for enterprise webapp auth
|
||||
app_web_auth_enabled = False
|
||||
if system_features.webapp_auth.enabled:
|
||||
app_web_auth_enabled = (
|
||||
EnterpriseService.WebAppAuth.get_app_access_mode_by_code(app_code=app_code).access_mode != "public"
|
||||
)
|
||||
|
||||
_validate_webapp_token(decoded, app_web_auth_enabled, system_features.webapp_auth.enabled)
|
||||
_validate_user_accessibility(decoded, app_code, app_web_auth_enabled, system_features.webapp_auth.enabled)
|
||||
|
||||
return app_model, end_user
|
||||
except Unauthorized as e:
|
||||
if system_features.sso_enforced_for_web:
|
||||
app_web_sso_enabled = EnterpriseService.get_app_web_sso_enabled(app_code).get("enabled", False)
|
||||
if app_web_sso_enabled:
|
||||
raise WebSSOAuthRequiredError()
|
||||
if system_features.webapp_auth.enabled:
|
||||
app_web_auth_enabled = (
|
||||
EnterpriseService.WebAppAuth.get_app_access_mode_by_code(app_code=app_code).access_mode != "public"
|
||||
)
|
||||
if app_web_auth_enabled:
|
||||
raise WebAppAuthRequiredError()
|
||||
|
||||
raise Unauthorized(e.description)
|
||||
|
||||
|
||||
def _validate_web_sso_token(decoded, system_features, app_code):
|
||||
app_web_sso_enabled = False
|
||||
|
||||
# Check if SSO is enforced for web, and if the token source is not SSO, raise an error and redirect to SSO login
|
||||
if system_features.sso_enforced_for_web:
|
||||
app_web_sso_enabled = EnterpriseService.get_app_web_sso_enabled(app_code).get("enabled", False)
|
||||
if app_web_sso_enabled:
|
||||
source = decoded.get("token_source")
|
||||
if not source or source != "sso":
|
||||
raise WebSSOAuthRequiredError()
|
||||
|
||||
# Check if SSO is not enforced for web, and if the token source is SSO,
|
||||
# raise an error and redirect to normal passport login
|
||||
if not system_features.sso_enforced_for_web or not app_web_sso_enabled:
|
||||
def _validate_webapp_token(decoded, app_web_auth_enabled: bool, system_webapp_auth_enabled: bool):
|
||||
# Check if authentication is enforced for web app, and if the token source is not webapp,
|
||||
# raise an error and redirect to login
|
||||
if system_webapp_auth_enabled and app_web_auth_enabled:
|
||||
source = decoded.get("token_source")
|
||||
if source and source == "sso":
|
||||
raise Unauthorized("sso token expired.")
|
||||
if not source or source != "webapp":
|
||||
raise WebAppAuthRequiredError()
|
||||
|
||||
# Check if authentication is not enforced for web, and if the token source is webapp,
|
||||
# raise an error and redirect to normal passport login
|
||||
if not system_webapp_auth_enabled or not app_web_auth_enabled:
|
||||
source = decoded.get("token_source")
|
||||
if source and source == "webapp":
|
||||
raise Unauthorized("webapp token expired.")
|
||||
|
||||
|
||||
def _validate_user_accessibility(decoded, app_code, app_web_auth_enabled: bool, system_webapp_auth_enabled: bool):
|
||||
if system_webapp_auth_enabled and app_web_auth_enabled:
|
||||
# Check if the user is allowed to access the web app
|
||||
user_id = decoded.get("user_id")
|
||||
if not user_id:
|
||||
raise WebAppAuthRequiredError()
|
||||
|
||||
if not EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(user_id, app_code=app_code):
|
||||
raise WebAppAuthAccessDeniedError()
|
||||
|
||||
|
||||
class WebApiResource(Resource):
|
||||
|
||||
@@ -104,7 +104,6 @@ class CotAgentRunner(BaseAgentRunner, ABC):
|
||||
|
||||
# recalc llm max tokens
|
||||
prompt_messages = self._organize_prompt_messages()
|
||||
self.recalc_llm_max_tokens(self.model_config, prompt_messages)
|
||||
# invoke model
|
||||
chunks = model_instance.invoke_llm(
|
||||
prompt_messages=prompt_messages,
|
||||
|
||||
@@ -84,7 +84,6 @@ class FunctionCallAgentRunner(BaseAgentRunner):
|
||||
|
||||
# recalc llm max tokens
|
||||
prompt_messages = self._organize_prompt_messages()
|
||||
self.recalc_llm_max_tokens(self.model_config, prompt_messages)
|
||||
# invoke model
|
||||
chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = model_instance.invoke_llm(
|
||||
prompt_messages=prompt_messages,
|
||||
|
||||
@@ -55,20 +55,6 @@ class AgentChatAppRunner(AppRunner):
|
||||
query = application_generate_entity.query
|
||||
files = application_generate_entity.files
|
||||
|
||||
# Pre-calculate the number of tokens of the prompt messages,
|
||||
# and return the rest number of tokens by model context token size limit and max token size limit.
|
||||
# If the rest number of tokens is not enough, raise exception.
|
||||
# Include: prompt template, inputs, query(optional), files(optional)
|
||||
# Not Include: memory, external data, dataset context
|
||||
self.get_pre_calculate_rest_tokens(
|
||||
app_record=app_record,
|
||||
model_config=application_generate_entity.model_conf,
|
||||
prompt_template_entity=app_config.prompt_template,
|
||||
inputs=inputs,
|
||||
files=files,
|
||||
query=query,
|
||||
)
|
||||
|
||||
memory = None
|
||||
if application_generate_entity.conversation_id:
|
||||
# get memory of conversation (read-only)
|
||||
|
||||
@@ -15,10 +15,8 @@ from core.app.features.annotation_reply.annotation_reply import AnnotationReplyF
|
||||
from core.app.features.hosting_moderation.hosting_moderation import HostingModerationFeature
|
||||
from core.external_data_tool.external_data_fetch import ExternalDataFetch
|
||||
from core.memory.token_buffer_memory import TokenBufferMemory
|
||||
from core.model_manager import ModelInstance
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
|
||||
from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage
|
||||
from core.model_runtime.entities.model_entities import ModelPropertyKey
|
||||
from core.model_runtime.errors.invoke import InvokeBadRequestError
|
||||
from core.moderation.input_moderation import InputModeration
|
||||
from core.prompt.advanced_prompt_transform import AdvancedPromptTransform
|
||||
@@ -31,106 +29,6 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
class AppRunner:
|
||||
def get_pre_calculate_rest_tokens(
|
||||
self,
|
||||
app_record: App,
|
||||
model_config: ModelConfigWithCredentialsEntity,
|
||||
prompt_template_entity: PromptTemplateEntity,
|
||||
inputs: Mapping[str, str],
|
||||
files: Sequence["File"],
|
||||
query: Optional[str] = None,
|
||||
) -> int:
|
||||
"""
|
||||
Get pre calculate rest tokens
|
||||
:param app_record: app record
|
||||
:param model_config: model config entity
|
||||
:param prompt_template_entity: prompt template entity
|
||||
:param inputs: inputs
|
||||
:param files: files
|
||||
:param query: query
|
||||
:return:
|
||||
"""
|
||||
# Invoke model
|
||||
model_instance = ModelInstance(
|
||||
provider_model_bundle=model_config.provider_model_bundle, model=model_config.model
|
||||
)
|
||||
|
||||
model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
|
||||
|
||||
max_tokens = 0
|
||||
for parameter_rule in model_config.model_schema.parameter_rules:
|
||||
if parameter_rule.name == "max_tokens" or (
|
||||
parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
|
||||
):
|
||||
max_tokens = (
|
||||
model_config.parameters.get(parameter_rule.name)
|
||||
or model_config.parameters.get(parameter_rule.use_template or "")
|
||||
) or 0
|
||||
|
||||
if model_context_tokens is None:
|
||||
return -1
|
||||
|
||||
if max_tokens is None:
|
||||
max_tokens = 0
|
||||
|
||||
# get prompt messages without memory and context
|
||||
prompt_messages, stop = self.organize_prompt_messages(
|
||||
app_record=app_record,
|
||||
model_config=model_config,
|
||||
prompt_template_entity=prompt_template_entity,
|
||||
inputs=inputs,
|
||||
files=files,
|
||||
query=query,
|
||||
)
|
||||
|
||||
prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
|
||||
|
||||
rest_tokens: int = model_context_tokens - max_tokens - prompt_tokens
|
||||
if rest_tokens < 0:
|
||||
raise InvokeBadRequestError(
|
||||
"Query or prefix prompt is too long, you can reduce the prefix prompt, "
|
||||
"or shrink the max token, or switch to a llm with a larger token limit size."
|
||||
)
|
||||
|
||||
return rest_tokens
|
||||
|
||||
def recalc_llm_max_tokens(
|
||||
self, model_config: ModelConfigWithCredentialsEntity, prompt_messages: list[PromptMessage]
|
||||
):
|
||||
# recalc max_tokens if sum(prompt_token + max_tokens) over model token limit
|
||||
model_instance = ModelInstance(
|
||||
provider_model_bundle=model_config.provider_model_bundle, model=model_config.model
|
||||
)
|
||||
|
||||
model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
|
||||
|
||||
max_tokens = 0
|
||||
for parameter_rule in model_config.model_schema.parameter_rules:
|
||||
if parameter_rule.name == "max_tokens" or (
|
||||
parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
|
||||
):
|
||||
max_tokens = (
|
||||
model_config.parameters.get(parameter_rule.name)
|
||||
or model_config.parameters.get(parameter_rule.use_template or "")
|
||||
) or 0
|
||||
|
||||
if model_context_tokens is None:
|
||||
return -1
|
||||
|
||||
if max_tokens is None:
|
||||
max_tokens = 0
|
||||
|
||||
prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
|
||||
|
||||
if prompt_tokens + max_tokens > model_context_tokens:
|
||||
max_tokens = max(model_context_tokens - prompt_tokens, 16)
|
||||
|
||||
for parameter_rule in model_config.model_schema.parameter_rules:
|
||||
if parameter_rule.name == "max_tokens" or (
|
||||
parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
|
||||
):
|
||||
model_config.parameters[parameter_rule.name] = max_tokens
|
||||
|
||||
def organize_prompt_messages(
|
||||
self,
|
||||
app_record: App,
|
||||
|
||||
@@ -50,20 +50,6 @@ class ChatAppRunner(AppRunner):
|
||||
query = application_generate_entity.query
|
||||
files = application_generate_entity.files
|
||||
|
||||
# Pre-calculate the number of tokens of the prompt messages,
|
||||
# and return the rest number of tokens by model context token size limit and max token size limit.
|
||||
# If the rest number of tokens is not enough, raise exception.
|
||||
# Include: prompt template, inputs, query(optional), files(optional)
|
||||
# Not Include: memory, external data, dataset context
|
||||
self.get_pre_calculate_rest_tokens(
|
||||
app_record=app_record,
|
||||
model_config=application_generate_entity.model_conf,
|
||||
prompt_template_entity=app_config.prompt_template,
|
||||
inputs=inputs,
|
||||
files=files,
|
||||
query=query,
|
||||
)
|
||||
|
||||
memory = None
|
||||
if application_generate_entity.conversation_id:
|
||||
# get memory of conversation (read-only)
|
||||
@@ -194,9 +180,6 @@ class ChatAppRunner(AppRunner):
|
||||
if hosting_moderation_result:
|
||||
return
|
||||
|
||||
# Re-calculate the max tokens if sum(prompt_token + max_tokens) over model token limit
|
||||
self.recalc_llm_max_tokens(model_config=application_generate_entity.model_conf, prompt_messages=prompt_messages)
|
||||
|
||||
# Invoke model
|
||||
model_instance = ModelInstance(
|
||||
provider_model_bundle=application_generate_entity.model_conf.provider_model_bundle,
|
||||
|
||||
@@ -43,20 +43,6 @@ class CompletionAppRunner(AppRunner):
|
||||
query = application_generate_entity.query
|
||||
files = application_generate_entity.files
|
||||
|
||||
# Pre-calculate the number of tokens of the prompt messages,
|
||||
# and return the rest number of tokens by model context token size limit and max token size limit.
|
||||
# If the rest number of tokens is not enough, raise exception.
|
||||
# Include: prompt template, inputs, query(optional), files(optional)
|
||||
# Not Include: memory, external data, dataset context
|
||||
self.get_pre_calculate_rest_tokens(
|
||||
app_record=app_record,
|
||||
model_config=application_generate_entity.model_conf,
|
||||
prompt_template_entity=app_config.prompt_template,
|
||||
inputs=inputs,
|
||||
files=files,
|
||||
query=query,
|
||||
)
|
||||
|
||||
# organize all inputs and template to prompt messages
|
||||
# Include: prompt template, inputs, query(optional), files(optional)
|
||||
prompt_messages, stop = self.organize_prompt_messages(
|
||||
@@ -152,9 +138,6 @@ class CompletionAppRunner(AppRunner):
|
||||
if hosting_moderation_result:
|
||||
return
|
||||
|
||||
# Re-calculate the max tokens if sum(prompt_token + max_tokens) over model token limit
|
||||
self.recalc_llm_max_tokens(model_config=application_generate_entity.model_conf, prompt_messages=prompt_messages)
|
||||
|
||||
# Invoke model
|
||||
model_instance = ModelInstance(
|
||||
provider_model_bundle=application_generate_entity.model_conf.provider_model_bundle,
|
||||
|
||||
@@ -11,15 +11,6 @@ from configs import dify_config
|
||||
|
||||
SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES
|
||||
|
||||
proxy_mounts = (
|
||||
{
|
||||
"http://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTP_URL),
|
||||
"https://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTPS_URL),
|
||||
}
|
||||
if dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL
|
||||
else None
|
||||
)
|
||||
|
||||
BACKOFF_FACTOR = 0.5
|
||||
STATUS_FORCELIST = [429, 500, 502, 503, 504]
|
||||
|
||||
@@ -51,7 +42,11 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
|
||||
if dify_config.SSRF_PROXY_ALL_URL:
|
||||
with httpx.Client(proxy=dify_config.SSRF_PROXY_ALL_URL) as client:
|
||||
response = client.request(method=method, url=url, **kwargs)
|
||||
elif proxy_mounts:
|
||||
elif dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL:
|
||||
proxy_mounts = {
|
||||
"http://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTP_URL),
|
||||
"https://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTPS_URL),
|
||||
}
|
||||
with httpx.Client(mounts=proxy_mounts) as client:
|
||||
response = client.request(method=method, url=url, **kwargs)
|
||||
else:
|
||||
|
||||
@@ -26,7 +26,7 @@ class TokenBufferMemory:
|
||||
self.model_instance = model_instance
|
||||
|
||||
def get_history_prompt_messages(
|
||||
self, max_token_limit: int = 2000, message_limit: Optional[int] = None
|
||||
self, max_token_limit: int = 100000, message_limit: Optional[int] = None
|
||||
) -> Sequence[PromptMessage]:
|
||||
"""
|
||||
Get history prompt messages.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
|
||||
from .llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
|
||||
from .message_entities import (
|
||||
AssistantPromptMessage,
|
||||
AudioPromptMessageContent,
|
||||
@@ -23,6 +23,7 @@ __all__ = [
|
||||
"AudioPromptMessageContent",
|
||||
"DocumentPromptMessageContent",
|
||||
"ImagePromptMessageContent",
|
||||
"LLMMode",
|
||||
"LLMResult",
|
||||
"LLMResultChunk",
|
||||
"LLMResultChunkDelta",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from decimal import Decimal
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
@@ -8,7 +8,7 @@ from core.model_runtime.entities.message_entities import AssistantPromptMessage,
|
||||
from core.model_runtime.entities.model_entities import ModelUsage, PriceInfo
|
||||
|
||||
|
||||
class LLMMode(Enum):
|
||||
class LLMMode(StrEnum):
|
||||
"""
|
||||
Enum class for large language model mode.
|
||||
"""
|
||||
|
||||
@@ -30,6 +30,11 @@ from core.model_runtime.model_providers.__base.ai_model import AIModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
HTML_THINKING_TAG = (
|
||||
'<details style="color:gray;background-color: #f8f8f8;padding: 8px;border-radius: 4px;" open> '
|
||||
"<summary> Thinking... </summary>"
|
||||
)
|
||||
|
||||
|
||||
class LargeLanguageModel(AIModel):
|
||||
"""
|
||||
@@ -400,6 +405,40 @@ if you are not sure about the structure.
|
||||
),
|
||||
)
|
||||
|
||||
def _wrap_thinking_by_reasoning_content(self, delta: dict, is_reasoning: bool) -> tuple[str, bool]:
|
||||
"""
|
||||
If the reasoning response is from delta.get("reasoning_content"), we wrap
|
||||
it with HTML details tag.
|
||||
|
||||
:param delta: delta dictionary from LLM streaming response
|
||||
:param is_reasoning: is reasoning
|
||||
:return: tuple of (processed_content, is_reasoning)
|
||||
"""
|
||||
|
||||
content = delta.get("content") or ""
|
||||
reasoning_content = delta.get("reasoning_content")
|
||||
|
||||
if reasoning_content:
|
||||
if not is_reasoning:
|
||||
content = HTML_THINKING_TAG + reasoning_content
|
||||
is_reasoning = True
|
||||
else:
|
||||
content = reasoning_content
|
||||
elif is_reasoning:
|
||||
content = "</details>" + content
|
||||
is_reasoning = False
|
||||
return content, is_reasoning
|
||||
|
||||
def _wrap_thinking_by_tag(self, content: str) -> str:
|
||||
"""
|
||||
if the reasoning response is a <think>...</think> block from delta.get("content"),
|
||||
we replace <think> to <detail>.
|
||||
|
||||
:param content: delta.get("content")
|
||||
:return: processed_content
|
||||
"""
|
||||
return content.replace("<think>", HTML_THINKING_TAG).replace("</think>", "</details>")
|
||||
|
||||
def _invoke_result_generator(
|
||||
self,
|
||||
model: str,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
- openai
|
||||
- deepseek
|
||||
- anthropic
|
||||
- azure_openai
|
||||
- google
|
||||
@@ -32,7 +33,6 @@
|
||||
- localai
|
||||
- volcengine_maas
|
||||
- openai_api_compatible
|
||||
- deepseek
|
||||
- hunyuan
|
||||
- siliconflow
|
||||
- perfxcloud
|
||||
|
||||
@@ -51,6 +51,40 @@ model_credential_schema:
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- variable: mode
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
label:
|
||||
en_US: Completion mode
|
||||
type: select
|
||||
required: false
|
||||
default: chat
|
||||
placeholder:
|
||||
zh_Hans: 选择对话类型
|
||||
en_US: Select completion mode
|
||||
options:
|
||||
- value: completion
|
||||
label:
|
||||
en_US: Completion
|
||||
zh_Hans: 补全
|
||||
- value: chat
|
||||
label:
|
||||
en_US: Chat
|
||||
zh_Hans: 对话
|
||||
- variable: context_size
|
||||
label:
|
||||
zh_Hans: 模型上下文长度
|
||||
en_US: Model context size
|
||||
required: true
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
type: text-input
|
||||
default: "4096"
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的模型上下文长度
|
||||
en_US: Enter your Model context size
|
||||
- variable: jwt_token
|
||||
required: true
|
||||
label:
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import logging
|
||||
from collections.abc import Generator
|
||||
from collections.abc import Generator, Sequence
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
from azure.ai.inference import ChatCompletionsClient
|
||||
from azure.ai.inference.models import StreamingChatCompletionsUpdate
|
||||
from azure.ai.inference.models import StreamingChatCompletionsUpdate, SystemMessage, UserMessage
|
||||
from azure.core.credentials import AzureKeyCredential
|
||||
from azure.core.exceptions import (
|
||||
ClientAuthenticationError,
|
||||
@@ -20,7 +20,7 @@ from azure.core.exceptions import (
|
||||
)
|
||||
|
||||
from core.model_runtime.callbacks.base_callback import Callback
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
|
||||
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
AssistantPromptMessage,
|
||||
PromptMessage,
|
||||
@@ -30,6 +30,7 @@ from core.model_runtime.entities.model_entities import (
|
||||
AIModelEntity,
|
||||
FetchFrom,
|
||||
I18nObject,
|
||||
ModelPropertyKey,
|
||||
ModelType,
|
||||
ParameterRule,
|
||||
ParameterType,
|
||||
@@ -60,10 +61,10 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
prompt_messages: Sequence[PromptMessage],
|
||||
model_parameters: dict,
|
||||
tools: Optional[list[PromptMessageTool]] = None,
|
||||
stop: Optional[list[str]] = None,
|
||||
tools: Optional[Sequence[PromptMessageTool]] = None,
|
||||
stop: Optional[Sequence[str]] = None,
|
||||
stream: bool = True,
|
||||
user: Optional[str] = None,
|
||||
) -> Union[LLMResult, Generator]:
|
||||
@@ -82,8 +83,8 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
|
||||
"""
|
||||
|
||||
if not self.client:
|
||||
endpoint = credentials.get("endpoint")
|
||||
api_key = credentials.get("api_key")
|
||||
endpoint = str(credentials.get("endpoint"))
|
||||
api_key = str(credentials.get("api_key"))
|
||||
self.client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
|
||||
|
||||
messages = [{"role": msg.role.value, "content": msg.content} for msg in prompt_messages]
|
||||
@@ -94,6 +95,7 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
|
||||
"temperature": model_parameters.get("temperature", 0),
|
||||
"top_p": model_parameters.get("top_p", 1),
|
||||
"stream": stream,
|
||||
"model": model,
|
||||
}
|
||||
|
||||
if stop:
|
||||
@@ -255,10 +257,16 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
endpoint = credentials.get("endpoint")
|
||||
api_key = credentials.get("api_key")
|
||||
endpoint = str(credentials.get("endpoint"))
|
||||
api_key = str(credentials.get("api_key"))
|
||||
client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
|
||||
client.get_model_info()
|
||||
client.complete(
|
||||
messages=[
|
||||
SystemMessage(content="I say 'ping', you say 'pong'"),
|
||||
UserMessage(content="ping"),
|
||||
],
|
||||
model=model,
|
||||
)
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
@@ -327,7 +335,10 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_type=ModelType.LLM,
|
||||
features=[],
|
||||
model_properties={},
|
||||
model_properties={
|
||||
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "4096")),
|
||||
ModelPropertyKey.MODE: credentials.get("mode", LLMMode.CHAT),
|
||||
},
|
||||
parameter_rules=rules,
|
||||
)
|
||||
|
||||
|
||||
@@ -138,6 +138,18 @@ model_credential_schema:
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: o3-mini
|
||||
value: o3-mini
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: o3-mini-2025-01-31
|
||||
value: o3-mini-2025-01-31
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: o1-preview
|
||||
value: o1-preview
|
||||
|
||||
@@ -123,6 +123,15 @@ provider_credential_schema:
|
||||
en_US: AWS GovCloud (US-West)
|
||||
zh_Hans: AWS GovCloud (US-West)
|
||||
ja_JP: AWS GovCloud (米国西部)
|
||||
- variable: bedrock_endpoint_url
|
||||
label:
|
||||
zh_Hans: Bedrock Endpoint URL
|
||||
en_US: Bedrock Endpoint URL
|
||||
type: text-input
|
||||
required: false
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 Bedrock Endpoint URL, 如:https://123456.cloudfront.net
|
||||
en_US: Enter your Bedrock Endpoint URL, e.g. https://123456.cloudfront.net
|
||||
- variable: model_for_validation
|
||||
required: false
|
||||
label:
|
||||
|
||||
@@ -13,6 +13,7 @@ def get_bedrock_client(service_name: str, credentials: Mapping[str, str]):
|
||||
client_config = Config(region_name=region_name)
|
||||
aws_access_key_id = credentials.get("aws_access_key_id")
|
||||
aws_secret_access_key = credentials.get("aws_secret_access_key")
|
||||
bedrock_endpoint_url = credentials.get("bedrock_endpoint_url")
|
||||
|
||||
if aws_access_key_id and aws_secret_access_key:
|
||||
# use aksk to call bedrock
|
||||
@@ -21,6 +22,7 @@ def get_bedrock_client(service_name: str, credentials: Mapping[str, str]):
|
||||
config=client_config,
|
||||
aws_access_key_id=aws_access_key_id,
|
||||
aws_secret_access_key=aws_secret_access_key,
|
||||
**({"endpoint_url": bedrock_endpoint_url} if bedrock_endpoint_url else {}),
|
||||
)
|
||||
else:
|
||||
# use iam without aksk to call
|
||||
|
||||
@@ -0,0 +1,115 @@
|
||||
model: us.anthropic.claude-3-7-sonnet-20250219-v1:0
|
||||
label:
|
||||
en_US: Claude 3.7 Sonnet(US.Cross Region Inference)
|
||||
icon: icon_s_en.svg
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: enable_cache
|
||||
label:
|
||||
zh_Hans: 启用提示缓存
|
||||
en_US: Enable Prompt Cache
|
||||
type: boolean
|
||||
required: false
|
||||
default: true
|
||||
help:
|
||||
zh_Hans: 启用提示缓存可以提高性能并降低成本。Claude 3.7 Sonnet支持在system、messages和tools字段中使用缓存检查点。
|
||||
en_US: Enable prompt caching to improve performance and reduce costs. Claude 3.7 Sonnet supports cache checkpoints in system, messages, and tools fields.
|
||||
- name: reasoning_type
|
||||
label:
|
||||
zh_Hans: 推理配置
|
||||
en_US: Reasoning Type
|
||||
type: boolean
|
||||
required: false
|
||||
default: false
|
||||
placeholder:
|
||||
zh_Hans: 设置推理配置
|
||||
en_US: Set reasoning configuration
|
||||
help:
|
||||
zh_Hans: 控制模型的推理能力。启用时,temperature将固定为1且top_p将被禁用。
|
||||
en_US: Controls the model's reasoning capability. When enabled, temperature will be fixed to 1 and top_p will be disabled.
|
||||
- name: reasoning_budget
|
||||
show_on:
|
||||
- variable: reasoning_type
|
||||
value: true
|
||||
label:
|
||||
zh_Hans: 推理预算
|
||||
en_US: Reasoning Budget
|
||||
type: int
|
||||
default: 1024
|
||||
min: 0
|
||||
max: 128000
|
||||
help:
|
||||
zh_Hans: 推理的预算限制(最小1024),必须小于max_tokens。仅在推理类型为enabled时可用。
|
||||
en_US: Budget limit for reasoning (minimum 1024), must be less than max_tokens. Only available when reasoning type is enabled.
|
||||
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
label:
|
||||
zh_Hans: 最大token数
|
||||
en_US: Max Tokens
|
||||
type: int
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 128000
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
label:
|
||||
zh_Hans: 模型温度
|
||||
en_US: Model Temperature
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。当推理功能启用时,该值将被固定为1。
|
||||
en_US: The amount of randomness injected into the response. When reasoning is enabled, this value will be fixed to 1.
|
||||
- name: top_p
|
||||
show_on:
|
||||
- variable: reasoning_type
|
||||
value: disabled
|
||||
use_template: top_p
|
||||
label:
|
||||
zh_Hans: Top P
|
||||
en_US: Top P
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中的概率阈值。当推理功能启用时,该参数将被禁用。
|
||||
en_US: The probability threshold in nucleus sampling. When reasoning is enabled, this parameter will be disabled.
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -58,6 +58,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
|
||||
# TODO There is invoke issue: context limit on Cohere Model, will add them after fixed.
|
||||
CONVERSE_API_ENABLED_MODEL_INFO = [
|
||||
{"prefix": "anthropic.claude-v2", "support_system_prompts": True, "support_tool_use": False},
|
||||
{"prefix": "us.deepseek", "support_system_prompts": True, "support_tool_use": False},
|
||||
{"prefix": "anthropic.claude-v1", "support_system_prompts": True, "support_tool_use": False},
|
||||
{"prefix": "us.anthropic.claude-3", "support_system_prompts": True, "support_tool_use": True},
|
||||
{"prefix": "eu.anthropic.claude-3", "support_system_prompts": True, "support_tool_use": True},
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
model: us.deepseek.r1-v1:0
|
||||
label:
|
||||
en_US: DeepSeek-R1(US.Cross Region Inference)
|
||||
icon: icon_s_en.svg
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
label:
|
||||
zh_Hans: 最大token数
|
||||
en_US: Max Tokens
|
||||
type: int
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 128000
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。
|
||||
en_US: The maximum number of tokens to generate before stopping.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
label:
|
||||
zh_Hans: 模型温度
|
||||
en_US: Model Temperature
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。当推理功能启用时,该值将被固定为1。
|
||||
en_US: The amount of randomness injected into the response. When reasoning is enabled, this value will be fixed to 1.
|
||||
- name: top_p
|
||||
show_on:
|
||||
- variable: reasoning_type
|
||||
value: disabled
|
||||
use_template: top_p
|
||||
label:
|
||||
zh_Hans: Top P
|
||||
en_US: Top P
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中的概率阈值。当推理功能启用时,该参数将被禁用。
|
||||
en_US: The probability threshold in nucleus sampling. When reasoning is enabled, this parameter will be disabled.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.001'
|
||||
output: '0.005'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,13 +1,10 @@
|
||||
import json
|
||||
from collections.abc import Generator
|
||||
from typing import Optional, Union
|
||||
|
||||
import requests
|
||||
from yarl import URL
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
|
||||
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
AssistantPromptMessage,
|
||||
PromptMessage,
|
||||
PromptMessageTool,
|
||||
)
|
||||
@@ -39,208 +36,3 @@ class DeepseekLargeLanguageModel(OAIAPICompatLargeLanguageModel):
|
||||
credentials["mode"] = LLMMode.CHAT.value
|
||||
credentials["function_calling_type"] = "tool_call"
|
||||
credentials["stream_function_calling"] = "support"
|
||||
|
||||
def _handle_generate_stream_response(
|
||||
self, model: str, credentials: dict, response: requests.Response, prompt_messages: list[PromptMessage]
|
||||
) -> Generator:
|
||||
"""
|
||||
Handle llm stream response
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param response: streamed response
|
||||
:param prompt_messages: prompt messages
|
||||
:return: llm response chunk generator
|
||||
"""
|
||||
full_assistant_content = ""
|
||||
chunk_index = 0
|
||||
is_reasoning_started = False # Add flag to track reasoning state
|
||||
|
||||
def create_final_llm_result_chunk(
|
||||
id: Optional[str], index: int, message: AssistantPromptMessage, finish_reason: str, usage: dict
|
||||
) -> LLMResultChunk:
|
||||
# calculate num tokens
|
||||
prompt_tokens = usage and usage.get("prompt_tokens")
|
||||
if prompt_tokens is None:
|
||||
prompt_tokens = self._num_tokens_from_string(model, prompt_messages[0].content)
|
||||
completion_tokens = usage and usage.get("completion_tokens")
|
||||
if completion_tokens is None:
|
||||
completion_tokens = self._num_tokens_from_string(model, full_assistant_content)
|
||||
|
||||
# transform usage
|
||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
||||
|
||||
return LLMResultChunk(
|
||||
id=id,
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(index=index, message=message, finish_reason=finish_reason, usage=usage),
|
||||
)
|
||||
|
||||
# delimiter for stream response, need unicode_escape
|
||||
import codecs
|
||||
|
||||
delimiter = credentials.get("stream_mode_delimiter", "\n\n")
|
||||
delimiter = codecs.decode(delimiter, "unicode_escape")
|
||||
|
||||
tools_calls: list[AssistantPromptMessage.ToolCall] = []
|
||||
|
||||
def increase_tool_call(new_tool_calls: list[AssistantPromptMessage.ToolCall]):
|
||||
def get_tool_call(tool_call_id: str):
|
||||
if not tool_call_id:
|
||||
return tools_calls[-1]
|
||||
|
||||
tool_call = next((tool_call for tool_call in tools_calls if tool_call.id == tool_call_id), None)
|
||||
if tool_call is None:
|
||||
tool_call = AssistantPromptMessage.ToolCall(
|
||||
id=tool_call_id,
|
||||
type="function",
|
||||
function=AssistantPromptMessage.ToolCall.ToolCallFunction(name="", arguments=""),
|
||||
)
|
||||
tools_calls.append(tool_call)
|
||||
|
||||
return tool_call
|
||||
|
||||
for new_tool_call in new_tool_calls:
|
||||
# get tool call
|
||||
tool_call = get_tool_call(new_tool_call.function.name)
|
||||
# update tool call
|
||||
if new_tool_call.id:
|
||||
tool_call.id = new_tool_call.id
|
||||
if new_tool_call.type:
|
||||
tool_call.type = new_tool_call.type
|
||||
if new_tool_call.function.name:
|
||||
tool_call.function.name = new_tool_call.function.name
|
||||
if new_tool_call.function.arguments:
|
||||
tool_call.function.arguments += new_tool_call.function.arguments
|
||||
|
||||
finish_reason = None # The default value of finish_reason is None
|
||||
message_id, usage = None, None
|
||||
for chunk in response.iter_lines(decode_unicode=True, delimiter=delimiter):
|
||||
chunk = chunk.strip()
|
||||
if chunk:
|
||||
# ignore sse comments
|
||||
if chunk.startswith(":"):
|
||||
continue
|
||||
decoded_chunk = chunk.strip().removeprefix("data:").lstrip()
|
||||
if decoded_chunk == "[DONE]": # Some provider returns "data: [DONE]"
|
||||
continue
|
||||
|
||||
try:
|
||||
chunk_json: dict = json.loads(decoded_chunk)
|
||||
# stream ended
|
||||
except json.JSONDecodeError as e:
|
||||
yield create_final_llm_result_chunk(
|
||||
id=message_id,
|
||||
index=chunk_index + 1,
|
||||
message=AssistantPromptMessage(content=""),
|
||||
finish_reason="Non-JSON encountered.",
|
||||
usage=usage,
|
||||
)
|
||||
break
|
||||
# handle the error here. for issue #11629
|
||||
if chunk_json.get("error") and chunk_json.get("choices") is None:
|
||||
raise ValueError(chunk_json.get("error"))
|
||||
|
||||
if chunk_json:
|
||||
if u := chunk_json.get("usage"):
|
||||
usage = u
|
||||
if not chunk_json or len(chunk_json["choices"]) == 0:
|
||||
continue
|
||||
|
||||
choice = chunk_json["choices"][0]
|
||||
finish_reason = chunk_json["choices"][0].get("finish_reason")
|
||||
message_id = chunk_json.get("id")
|
||||
chunk_index += 1
|
||||
|
||||
if "delta" in choice:
|
||||
delta = choice["delta"]
|
||||
is_reasoning = delta.get("reasoning_content")
|
||||
delta_content = delta.get("content") or delta.get("reasoning_content")
|
||||
|
||||
assistant_message_tool_calls = None
|
||||
|
||||
if "tool_calls" in delta and credentials.get("function_calling_type", "no_call") == "tool_call":
|
||||
assistant_message_tool_calls = delta.get("tool_calls", None)
|
||||
elif (
|
||||
"function_call" in delta
|
||||
and credentials.get("function_calling_type", "no_call") == "function_call"
|
||||
):
|
||||
assistant_message_tool_calls = [
|
||||
{"id": "tool_call_id", "type": "function", "function": delta.get("function_call", {})}
|
||||
]
|
||||
|
||||
# assistant_message_function_call = delta.delta.function_call
|
||||
|
||||
# extract tool calls from response
|
||||
if assistant_message_tool_calls:
|
||||
tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
|
||||
increase_tool_call(tool_calls)
|
||||
|
||||
if delta_content is None or delta_content == "":
|
||||
continue
|
||||
|
||||
# Add markdown quote markers for reasoning content
|
||||
if is_reasoning:
|
||||
if not is_reasoning_started:
|
||||
delta_content = "> 💭 " + delta_content
|
||||
is_reasoning_started = True
|
||||
elif "\n\n" in delta_content:
|
||||
delta_content = delta_content.replace("\n\n", "\n> ")
|
||||
elif "\n" in delta_content:
|
||||
delta_content = delta_content.replace("\n", "\n> ")
|
||||
elif is_reasoning_started:
|
||||
# If we were in reasoning mode but now getting regular content,
|
||||
# add \n\n to close the reasoning block
|
||||
delta_content = "\n\n" + delta_content
|
||||
is_reasoning_started = False
|
||||
|
||||
# transform assistant message to prompt message
|
||||
assistant_prompt_message = AssistantPromptMessage(
|
||||
content=delta_content,
|
||||
)
|
||||
|
||||
# reset tool calls
|
||||
tool_calls = []
|
||||
full_assistant_content += delta_content
|
||||
elif "text" in choice:
|
||||
choice_text = choice.get("text", "")
|
||||
if choice_text == "":
|
||||
continue
|
||||
|
||||
# transform assistant message to prompt message
|
||||
assistant_prompt_message = AssistantPromptMessage(content=choice_text)
|
||||
full_assistant_content += choice_text
|
||||
else:
|
||||
continue
|
||||
|
||||
yield LLMResultChunk(
|
||||
id=message_id,
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(
|
||||
index=chunk_index,
|
||||
message=assistant_prompt_message,
|
||||
),
|
||||
)
|
||||
|
||||
chunk_index += 1
|
||||
|
||||
if tools_calls:
|
||||
yield LLMResultChunk(
|
||||
id=message_id,
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(
|
||||
index=chunk_index,
|
||||
message=AssistantPromptMessage(tool_calls=tools_calls, content=""),
|
||||
),
|
||||
)
|
||||
|
||||
yield create_final_llm_result_chunk(
|
||||
id=message_id,
|
||||
index=chunk_index,
|
||||
message=AssistantPromptMessage(content=""),
|
||||
finish_reason=finish_reason,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
@@ -19,8 +19,8 @@ class GoogleProvider(ModelProvider):
|
||||
try:
|
||||
model_instance = self.get_model_instance(ModelType.LLM)
|
||||
|
||||
# Use `gemini-pro` model for validate,
|
||||
model_instance.validate_credentials(model="gemini-pro", credentials=credentials)
|
||||
# Use `gemini-2.0-flash` model for validate,
|
||||
model_instance.validate_credentials(model="gemini-2.0-flash", credentials=credentials)
|
||||
except CredentialsValidateFailedError as ex:
|
||||
raise ex
|
||||
except Exception as ex:
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
- gemini-2.0-flash-001
|
||||
- gemini-2.0-flash-exp
|
||||
- gemini-2.0-pro-exp-02-05
|
||||
- gemini-2.0-flash-thinking-exp-1219
|
||||
- gemini-2.0-flash-thinking-exp-01-21
|
||||
- gemini-1.5-pro
|
||||
@@ -17,5 +19,3 @@
|
||||
- gemini-exp-1206
|
||||
- gemini-exp-1121
|
||||
- gemini-exp-1114
|
||||
- gemini-pro
|
||||
- gemini-pro-vision
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
model: gemini-2.0-flash-001
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash 001
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: gemini-2.0-pro-exp-02-05
|
||||
label:
|
||||
en_US: Gemini 2.0 pro exp 02-05
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -1,3 +1,4 @@
|
||||
- deepseek-r1-distill-llama-70b
|
||||
- llama-3.1-405b-reasoning
|
||||
- llama-3.3-70b-versatile
|
||||
- llama-3.1-70b-versatile
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
model: deepseek-r1-distill-llama-70b
|
||||
label:
|
||||
en_US: DeepSeek R1 Distill Llama 70b
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '3.00'
|
||||
output: '3.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -1,3 +1,4 @@
|
||||
- deepseek-ai/deepseek-r1
|
||||
- google/gemma-7b
|
||||
- google/codegemma-7b
|
||||
- google/recurrentgemma-2b
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
model: deepseek-ai/deepseek-r1
|
||||
label:
|
||||
en_US: deepseek-ai/deepseek-r1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 1
|
||||
default: 0.5
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 1024
|
||||
default: 1024
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
@@ -83,7 +83,7 @@ class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
|
||||
def _add_custom_parameters(self, credentials: dict, model: str) -> None:
|
||||
credentials["mode"] = "chat"
|
||||
|
||||
if self.MODEL_SUFFIX_MAP[model]:
|
||||
if self.MODEL_SUFFIX_MAP.get(model):
|
||||
credentials["server_url"] = f"https://ai.api.nvidia.com/v1/{self.MODEL_SUFFIX_MAP[model]}"
|
||||
credentials.pop("endpoint_url")
|
||||
else:
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
model: cohere.command-r-08-2024
|
||||
label:
|
||||
en_US: cohere.command-r-08-2024 v1.7
|
||||
model_type: llm
|
||||
features:
|
||||
- multi-tool-call
|
||||
- agent-thought
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 1
|
||||
max: 1.0
|
||||
- name: topP
|
||||
use_template: top_p
|
||||
default: 0.75
|
||||
min: 0
|
||||
max: 1
|
||||
- name: topK
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
default: 0
|
||||
min: 0
|
||||
max: 500
|
||||
- name: presencePenalty
|
||||
use_template: presence_penalty
|
||||
min: 0
|
||||
max: 1
|
||||
default: 0
|
||||
- name: frequencyPenalty
|
||||
use_template: frequency_penalty
|
||||
min: 0
|
||||
max: 1
|
||||
default: 0
|
||||
- name: maxTokens
|
||||
use_template: max_tokens
|
||||
default: 600
|
||||
max: 4000
|
||||
pricing:
|
||||
input: '0.0009'
|
||||
output: '0.0009'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -50,3 +50,4 @@ pricing:
|
||||
output: '0.004'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
deprecated: true
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
model: cohere.command-r-plus-08-2024
|
||||
label:
|
||||
en_US: cohere.command-r-plus-08-2024 v1.6
|
||||
model_type: llm
|
||||
features:
|
||||
- multi-tool-call
|
||||
- agent-thought
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 1
|
||||
max: 1.0
|
||||
- name: topP
|
||||
use_template: top_p
|
||||
default: 0.75
|
||||
min: 0
|
||||
max: 1
|
||||
- name: topK
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
default: 0
|
||||
min: 0
|
||||
max: 500
|
||||
- name: presencePenalty
|
||||
use_template: presence_penalty
|
||||
min: 0
|
||||
max: 1
|
||||
default: 0
|
||||
- name: frequencyPenalty
|
||||
use_template: frequency_penalty
|
||||
min: 0
|
||||
max: 1
|
||||
default: 0
|
||||
- name: maxTokens
|
||||
use_template: max_tokens
|
||||
default: 600
|
||||
max: 4000
|
||||
pricing:
|
||||
input: '0.0156'
|
||||
output: '0.0156'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -50,3 +50,4 @@ pricing:
|
||||
output: '0.0219'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
deprecated: true
|
||||
|
||||
@@ -33,7 +33,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
request_template = {
|
||||
"compartmentId": "",
|
||||
"servingMode": {"modelId": "cohere.command-r-plus", "servingType": "ON_DEMAND"},
|
||||
"servingMode": {"modelId": "cohere.command-r-plus-08-2024", "servingType": "ON_DEMAND"},
|
||||
"chatRequest": {
|
||||
"apiFormat": "COHERE",
|
||||
# "preambleOverride": "You are a helpful assistant.",
|
||||
@@ -60,19 +60,19 @@ oci_config_template = {
|
||||
class OCILargeLanguageModel(LargeLanguageModel):
|
||||
# https://docs.oracle.com/en-us/iaas/Content/generative-ai/pretrained-models.htm
|
||||
_supported_models = {
|
||||
"meta.llama-3-70b-instruct": {
|
||||
"meta.llama-3.1-70b-instruct": {
|
||||
"system": True,
|
||||
"multimodal": False,
|
||||
"tool_call": False,
|
||||
"stream_tool_call": False,
|
||||
},
|
||||
"cohere.command-r-16k": {
|
||||
"cohere.command-r-08-2024": {
|
||||
"system": True,
|
||||
"multimodal": False,
|
||||
"tool_call": True,
|
||||
"stream_tool_call": False,
|
||||
},
|
||||
"cohere.command-r-plus": {
|
||||
"cohere.command-r-plus-08-2024": {
|
||||
"system": True,
|
||||
"multimodal": False,
|
||||
"tool_call": True,
|
||||
|
||||
@@ -49,3 +49,4 @@ pricing:
|
||||
output: '0.015'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
deprecated: true
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
model: meta.llama-3.1-70b-instruct
|
||||
label:
|
||||
zh_Hans: meta.llama-3.1-70b-instruct
|
||||
en_US: meta.llama-3.1-70b-instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 1
|
||||
max: 2.0
|
||||
- name: topP
|
||||
use_template: top_p
|
||||
default: 0.75
|
||||
min: 0
|
||||
max: 1
|
||||
- name: topK
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
default: 0
|
||||
min: 0
|
||||
max: 500
|
||||
- name: presencePenalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: frequencyPenalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: maxTokens
|
||||
use_template: max_tokens
|
||||
default: 600
|
||||
max: 4000
|
||||
pricing:
|
||||
input: '0.0075'
|
||||
output: '0.0075'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -19,8 +19,8 @@ class OCIGENAIProvider(ModelProvider):
|
||||
try:
|
||||
model_instance = self.get_model_instance(ModelType.LLM)
|
||||
|
||||
# Use `cohere.command-r-plus` model for validate,
|
||||
model_instance.validate_credentials(model="cohere.command-r-plus", credentials=credentials)
|
||||
# Use `cohere.command-r-plus-08-2024` model for validate,
|
||||
model_instance.validate_credentials(model="cohere.command-r-plus-08-2024", credentials=credentials)
|
||||
except CredentialsValidateFailedError as ex:
|
||||
raise ex
|
||||
except Exception as ex:
|
||||
|
||||
@@ -367,6 +367,7 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
|
||||
|
||||
# transform assistant message to prompt message
|
||||
text = chunk_json["response"]
|
||||
text = self._wrap_thinking_by_tag(text)
|
||||
|
||||
assistant_prompt_message = AssistantPromptMessage(content=text)
|
||||
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
- o1-2024-12-17
|
||||
- o1-mini
|
||||
- o1-mini-2024-09-12
|
||||
- o3-mini
|
||||
- o3-mini-2025-01-31
|
||||
- gpt-4
|
||||
- gpt-4o
|
||||
- gpt-4o-2024-05-13
|
||||
|
||||
@@ -619,9 +619,9 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel):
|
||||
# clear illegal prompt messages
|
||||
prompt_messages = self._clear_illegal_prompt_messages(model, prompt_messages)
|
||||
|
||||
# o1 compatibility
|
||||
# o1, o3 compatibility
|
||||
block_as_stream = False
|
||||
if model.startswith("o1"):
|
||||
if model.startswith(("o1", "o3")):
|
||||
if "max_tokens" in model_parameters:
|
||||
model_parameters["max_completion_tokens"] = model_parameters["max_tokens"]
|
||||
del model_parameters["max_tokens"]
|
||||
@@ -941,7 +941,7 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel):
|
||||
]
|
||||
)
|
||||
|
||||
if model.startswith("o1"):
|
||||
if model.startswith(("o1", "o3")):
|
||||
system_message_count = len([m for m in prompt_messages if isinstance(m, SystemPromptMessage)])
|
||||
if system_message_count > 0:
|
||||
new_prompt_messages = []
|
||||
@@ -1053,11 +1053,11 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel):
|
||||
model = model.split(":")[1]
|
||||
|
||||
# Currently, we can use gpt4o to calculate chatgpt-4o-latest's token.
|
||||
if model == "chatgpt-4o-latest" or model.startswith("o1"):
|
||||
if model == "chatgpt-4o-latest" or model.startswith(("o1", "o3")):
|
||||
model = "gpt-4o"
|
||||
|
||||
try:
|
||||
encoding = tiktoken.encoding_for_model(model)
|
||||
encoding = tiktoken.get_encoding(model)
|
||||
except KeyError:
|
||||
logger.warning("Warning: model not found. Using cl100k_base encoding.")
|
||||
model = "cl100k_base"
|
||||
@@ -1068,7 +1068,7 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel):
|
||||
tokens_per_message = 4
|
||||
# if there's a name, the role is omitted
|
||||
tokens_per_name = -1
|
||||
elif model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4") or model.startswith("o1"):
|
||||
elif model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4") or model.startswith(("o1", "o3")):
|
||||
tokens_per_message = 3
|
||||
tokens_per_name = 1
|
||||
else:
|
||||
|
||||
@@ -16,6 +16,19 @@ parameter_rules:
|
||||
default: 50000
|
||||
min: 1
|
||||
max: 50000
|
||||
- name: reasoning_effort
|
||||
label:
|
||||
zh_Hans: 推理工作
|
||||
en_US: reasoning_effort
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 限制推理模型的推理工作
|
||||
en_US: constrains effort on reasoning for reasoning models
|
||||
required: false
|
||||
options:
|
||||
- low
|
||||
- medium
|
||||
- high
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
|
||||
@@ -17,6 +17,19 @@ parameter_rules:
|
||||
default: 50000
|
||||
min: 1
|
||||
max: 50000
|
||||
- name: reasoning_effort
|
||||
label:
|
||||
zh_Hans: 推理工作
|
||||
en_US: reasoning_effort
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 限制推理模型的推理工作
|
||||
en_US: constrains effort on reasoning for reasoning models
|
||||
required: false
|
||||
options:
|
||||
- low
|
||||
- medium
|
||||
- high
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
model: o3-mini-2025-01-31
|
||||
label:
|
||||
zh_Hans: o3-mini-2025-01-31
|
||||
en_US: o3-mini-2025-01-31
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 100000
|
||||
min: 1
|
||||
max: 100000
|
||||
- name: reasoning_effort
|
||||
label:
|
||||
zh_Hans: 推理工作
|
||||
en_US: reasoning_effort
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 限制推理模型的推理工作
|
||||
en_US: constrains effort on reasoning for reasoning models
|
||||
required: false
|
||||
options:
|
||||
- low
|
||||
- medium
|
||||
- high
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: response_format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '1.10'
|
||||
output: '4.40'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,46 @@
|
||||
model: o3-mini
|
||||
label:
|
||||
zh_Hans: o3-mini
|
||||
en_US: o3-mini
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 100000
|
||||
min: 1
|
||||
max: 100000
|
||||
- name: reasoning_effort
|
||||
label:
|
||||
zh_Hans: 推理工作
|
||||
en_US: reasoning_effort
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 限制推理模型的推理工作
|
||||
en_US: constrains effort on reasoning for reasoning models
|
||||
required: false
|
||||
options:
|
||||
- low
|
||||
- medium
|
||||
- high
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: response_format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '1.10'
|
||||
output: '4.40'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -1,5 +1,5 @@
|
||||
import codecs
|
||||
import json
|
||||
import logging
|
||||
from collections.abc import Generator
|
||||
from decimal import Decimal
|
||||
from typing import Optional, Union, cast
|
||||
@@ -38,8 +38,6 @@ from core.model_runtime.model_providers.__base.large_language_model import Large
|
||||
from core.model_runtime.model_providers.openai_api_compatible._common import _CommonOaiApiCompat
|
||||
from core.model_runtime.utils import helper
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
||||
"""
|
||||
@@ -99,7 +97,7 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
||||
:param tools: tools for tool calling
|
||||
:return:
|
||||
"""
|
||||
return self._num_tokens_from_messages(model, prompt_messages, tools, credentials)
|
||||
return self._num_tokens_from_messages(prompt_messages, tools, credentials)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
@@ -398,6 +396,73 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
||||
|
||||
return self._handle_generate_response(model, credentials, response, prompt_messages)
|
||||
|
||||
def _create_final_llm_result_chunk(
|
||||
self,
|
||||
index: int,
|
||||
message: AssistantPromptMessage,
|
||||
finish_reason: str,
|
||||
usage: dict,
|
||||
model: str,
|
||||
prompt_messages: list[PromptMessage],
|
||||
credentials: dict,
|
||||
full_content: str,
|
||||
) -> LLMResultChunk:
|
||||
# calculate num tokens
|
||||
prompt_tokens = usage and usage.get("prompt_tokens")
|
||||
if prompt_tokens is None:
|
||||
prompt_tokens = self._num_tokens_from_string(text=prompt_messages[0].content)
|
||||
completion_tokens = usage and usage.get("completion_tokens")
|
||||
if completion_tokens is None:
|
||||
completion_tokens = self._num_tokens_from_string(text=full_content)
|
||||
|
||||
# transform usage
|
||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
||||
|
||||
return LLMResultChunk(
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(index=index, message=message, finish_reason=finish_reason, usage=usage),
|
||||
)
|
||||
|
||||
def _get_tool_call(self, tool_call_id: str, tools_calls: list[AssistantPromptMessage.ToolCall]):
|
||||
"""
|
||||
Get or create a tool call by ID
|
||||
|
||||
:param tool_call_id: tool call ID
|
||||
:param tools_calls: list of existing tool calls
|
||||
:return: existing or new tool call, updated tools_calls
|
||||
"""
|
||||
if not tool_call_id:
|
||||
return tools_calls[-1], tools_calls
|
||||
|
||||
tool_call = next((tool_call for tool_call in tools_calls if tool_call.id == tool_call_id), None)
|
||||
if tool_call is None:
|
||||
tool_call = AssistantPromptMessage.ToolCall(
|
||||
id=tool_call_id,
|
||||
type="function",
|
||||
function=AssistantPromptMessage.ToolCall.ToolCallFunction(name="", arguments=""),
|
||||
)
|
||||
tools_calls.append(tool_call)
|
||||
|
||||
return tool_call, tools_calls
|
||||
|
||||
def _increase_tool_call(
|
||||
self, new_tool_calls: list[AssistantPromptMessage.ToolCall], tools_calls: list[AssistantPromptMessage.ToolCall]
|
||||
) -> list[AssistantPromptMessage.ToolCall]:
|
||||
for new_tool_call in new_tool_calls:
|
||||
# get tool call
|
||||
tool_call, tools_calls = self._get_tool_call(new_tool_call.function.name, tools_calls)
|
||||
# update tool call
|
||||
if new_tool_call.id:
|
||||
tool_call.id = new_tool_call.id
|
||||
if new_tool_call.type:
|
||||
tool_call.type = new_tool_call.type
|
||||
if new_tool_call.function.name:
|
||||
tool_call.function.name = new_tool_call.function.name
|
||||
if new_tool_call.function.arguments:
|
||||
tool_call.function.arguments += new_tool_call.function.arguments
|
||||
return tools_calls
|
||||
|
||||
def _handle_generate_stream_response(
|
||||
self, model: str, credentials: dict, response: requests.Response, prompt_messages: list[PromptMessage]
|
||||
) -> Generator:
|
||||
@@ -410,69 +475,15 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
||||
:param prompt_messages: prompt messages
|
||||
:return: llm response chunk generator
|
||||
"""
|
||||
full_assistant_content = ""
|
||||
chunk_index = 0
|
||||
|
||||
def create_final_llm_result_chunk(
|
||||
id: Optional[str], index: int, message: AssistantPromptMessage, finish_reason: str, usage: dict
|
||||
) -> LLMResultChunk:
|
||||
# calculate num tokens
|
||||
prompt_tokens = usage and usage.get("prompt_tokens")
|
||||
if prompt_tokens is None:
|
||||
prompt_tokens = self._num_tokens_from_string(model, prompt_messages[0].content)
|
||||
completion_tokens = usage and usage.get("completion_tokens")
|
||||
if completion_tokens is None:
|
||||
completion_tokens = self._num_tokens_from_string(model, full_assistant_content)
|
||||
|
||||
# transform usage
|
||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
||||
|
||||
return LLMResultChunk(
|
||||
id=id,
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(index=index, message=message, finish_reason=finish_reason, usage=usage),
|
||||
)
|
||||
|
||||
full_assistant_content = ""
|
||||
tools_calls: list[AssistantPromptMessage.ToolCall] = []
|
||||
finish_reason = None
|
||||
usage = None
|
||||
is_reasoning_started = False
|
||||
# delimiter for stream response, need unicode_escape
|
||||
import codecs
|
||||
|
||||
delimiter = credentials.get("stream_mode_delimiter", "\n\n")
|
||||
delimiter = codecs.decode(delimiter, "unicode_escape")
|
||||
|
||||
tools_calls: list[AssistantPromptMessage.ToolCall] = []
|
||||
|
||||
def increase_tool_call(new_tool_calls: list[AssistantPromptMessage.ToolCall]):
|
||||
def get_tool_call(tool_call_id: str):
|
||||
if not tool_call_id:
|
||||
return tools_calls[-1]
|
||||
|
||||
tool_call = next((tool_call for tool_call in tools_calls if tool_call.id == tool_call_id), None)
|
||||
if tool_call is None:
|
||||
tool_call = AssistantPromptMessage.ToolCall(
|
||||
id=tool_call_id,
|
||||
type="function",
|
||||
function=AssistantPromptMessage.ToolCall.ToolCallFunction(name="", arguments=""),
|
||||
)
|
||||
tools_calls.append(tool_call)
|
||||
|
||||
return tool_call
|
||||
|
||||
for new_tool_call in new_tool_calls:
|
||||
# get tool call
|
||||
tool_call = get_tool_call(new_tool_call.function.name)
|
||||
# update tool call
|
||||
if new_tool_call.id:
|
||||
tool_call.id = new_tool_call.id
|
||||
if new_tool_call.type:
|
||||
tool_call.type = new_tool_call.type
|
||||
if new_tool_call.function.name:
|
||||
tool_call.function.name = new_tool_call.function.name
|
||||
if new_tool_call.function.arguments:
|
||||
tool_call.function.arguments += new_tool_call.function.arguments
|
||||
|
||||
finish_reason = None # The default value of finish_reason is None
|
||||
message_id, usage = None, None
|
||||
for chunk in response.iter_lines(decode_unicode=True, delimiter=delimiter):
|
||||
chunk = chunk.strip()
|
||||
if chunk:
|
||||
@@ -487,12 +498,15 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
||||
chunk_json: dict = json.loads(decoded_chunk)
|
||||
# stream ended
|
||||
except json.JSONDecodeError as e:
|
||||
yield create_final_llm_result_chunk(
|
||||
id=message_id,
|
||||
yield self._create_final_llm_result_chunk(
|
||||
index=chunk_index + 1,
|
||||
message=AssistantPromptMessage(content=""),
|
||||
finish_reason="Non-JSON encountered.",
|
||||
usage=usage,
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
prompt_messages=prompt_messages,
|
||||
full_content=full_assistant_content,
|
||||
)
|
||||
break
|
||||
# handle the error here. for issue #11629
|
||||
@@ -507,12 +521,14 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
||||
|
||||
choice = chunk_json["choices"][0]
|
||||
finish_reason = chunk_json["choices"][0].get("finish_reason")
|
||||
message_id = chunk_json.get("id")
|
||||
chunk_index += 1
|
||||
|
||||
if "delta" in choice:
|
||||
delta = choice["delta"]
|
||||
delta_content = delta.get("content")
|
||||
delta_content, is_reasoning_started = self._wrap_thinking_by_reasoning_content(
|
||||
delta, is_reasoning_started
|
||||
)
|
||||
delta_content = self._wrap_thinking_by_tag(delta_content)
|
||||
|
||||
assistant_message_tool_calls = None
|
||||
|
||||
@@ -526,12 +542,10 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
||||
{"id": "tool_call_id", "type": "function", "function": delta.get("function_call", {})}
|
||||
]
|
||||
|
||||
# assistant_message_function_call = delta.delta.function_call
|
||||
|
||||
# extract tool calls from response
|
||||
if assistant_message_tool_calls:
|
||||
tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
|
||||
increase_tool_call(tool_calls)
|
||||
tools_calls = self._increase_tool_call(tool_calls, tools_calls)
|
||||
|
||||
if delta_content is None or delta_content == "":
|
||||
continue
|
||||
@@ -556,7 +570,6 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
||||
continue
|
||||
|
||||
yield LLMResultChunk(
|
||||
id=message_id,
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(
|
||||
@@ -569,7 +582,6 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
||||
|
||||
if tools_calls:
|
||||
yield LLMResultChunk(
|
||||
id=message_id,
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(
|
||||
@@ -578,12 +590,15 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
||||
),
|
||||
)
|
||||
|
||||
yield create_final_llm_result_chunk(
|
||||
id=message_id,
|
||||
yield self._create_final_llm_result_chunk(
|
||||
index=chunk_index,
|
||||
message=AssistantPromptMessage(content=""),
|
||||
finish_reason=finish_reason,
|
||||
usage=usage,
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
prompt_messages=prompt_messages,
|
||||
full_content=full_assistant_content,
|
||||
)
|
||||
|
||||
def _handle_generate_response(
|
||||
@@ -697,12 +712,11 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
||||
return message_dict
|
||||
|
||||
def _num_tokens_from_string(
|
||||
self, model: str, text: Union[str, list[PromptMessageContent]], tools: Optional[list[PromptMessageTool]] = None
|
||||
self, text: Union[str, list[PromptMessageContent]], tools: Optional[list[PromptMessageTool]] = None
|
||||
) -> int:
|
||||
"""
|
||||
Approximate num tokens for model with gpt2 tokenizer.
|
||||
|
||||
:param model: model name
|
||||
:param text: prompt text
|
||||
:param tools: tools for tool calling
|
||||
:return: number of tokens
|
||||
@@ -725,7 +739,6 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
||||
|
||||
def _num_tokens_from_messages(
|
||||
self,
|
||||
model: str,
|
||||
messages: list[PromptMessage],
|
||||
tools: Optional[list[PromptMessageTool]] = None,
|
||||
credentials: Optional[dict] = None,
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
- openai/o1-preview
|
||||
- openai/o1-mini
|
||||
- openai/o3-mini
|
||||
- openai/o3-mini-2025-01-31
|
||||
- openai/gpt-4o
|
||||
- openai/gpt-4o-mini
|
||||
- openai/gpt-4
|
||||
@@ -28,5 +30,6 @@
|
||||
- mistralai/mistral-7b-instruct
|
||||
- qwen/qwen-2.5-72b-instruct
|
||||
- qwen/qwen-2-72b-instruct
|
||||
- deepseek/deepseek-r1
|
||||
- deepseek/deepseek-chat
|
||||
- deepseek/deepseek-coder
|
||||
|
||||
@@ -53,7 +53,7 @@ parameter_rules:
|
||||
zh_Hans: 介于 -2.0 和 2.0 之间的数字。如果该值为正,那么新 token 会根据其在已有文本中的出现频率受到相应的惩罚,降低模型重复相同内容的可能性。
|
||||
en_US: A number between -2.0 and 2.0. If the value is positive, new tokens are penalized based on their frequency of occurrence in existing text, reducing the likelihood that the model will repeat the same content.
|
||||
pricing:
|
||||
input: "0.14"
|
||||
output: "0.28"
|
||||
input: "0.49"
|
||||
output: "0.89"
|
||||
unit: "0.000001"
|
||||
currency: USD
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
model: deepseek/deepseek-r1
|
||||
label:
|
||||
en_US: deepseek-r1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 163840
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 2.0
|
||||
help:
|
||||
zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
|
||||
en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.01
|
||||
max: 1.00
|
||||
help:
|
||||
zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。
|
||||
en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
default: 0
|
||||
min: -2.0
|
||||
max: 2.0
|
||||
help:
|
||||
zh_Hans: 介于 -2.0 和 2.0 之间的数字。如果该值为正,那么新 token 会根据其在已有文本中的出现频率受到相应的惩罚,降低模型重复相同内容的可能性。
|
||||
en_US: A number between -2.0 and 2.0. If the value is positive, new tokens are penalized based on their frequency of occurrence in existing text, reducing the likelihood that the model will repeat the same content.
|
||||
pricing:
|
||||
input: "3"
|
||||
output: "8"
|
||||
unit: "0.000001"
|
||||
currency: USD
|
||||
@@ -0,0 +1,49 @@
|
||||
model: openai/o3-mini-2025-01-31
|
||||
label:
|
||||
en_US: o3-mini-2025-01-31
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 100000
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: response_format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: "1.10"
|
||||
output: "4.40"
|
||||
unit: "0.000001"
|
||||
currency: USD
|
||||
@@ -0,0 +1,49 @@
|
||||
model: openai/o3-mini
|
||||
label:
|
||||
en_US: o3-mini
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 100000
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: response_format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: "1.10"
|
||||
output: "4.40"
|
||||
unit: "0.000001"
|
||||
currency: USD
|
||||
@@ -12,7 +12,11 @@
|
||||
- Pro/Qwen/Qwen2-VL-7B-Instruct
|
||||
- OpenGVLab/InternVL2-26B
|
||||
- Pro/OpenGVLab/InternVL2-8B
|
||||
- deepseek-ai/DeepSeek-R1
|
||||
- deepseek-ai/DeepSeek-V2-Chat
|
||||
- deepseek-ai/DeepSeek-V2.5
|
||||
- deepseek-ai/DeepSeek-V3
|
||||
- deepseek-ai/DeepSeek-Coder-V2-Instruct
|
||||
- THUDM/glm-4-9b-chat
|
||||
- 01-ai/Yi-1.5-34B-Chat-16K
|
||||
- 01-ai/Yi-1.5-9B-Chat-16K
|
||||
@@ -25,3 +29,4 @@
|
||||
- meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||
- google/gemma-2-27b-it
|
||||
- google/gemma-2-9b-it
|
||||
- Tencent/Hunyuan-A52B-Instruct
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
model: deepseek-ai/DeepSeek-R1
|
||||
label:
|
||||
zh_Hans: deepseek-ai/DeepSeek-R1
|
||||
en_US: deepseek-ai/DeepSeek-R1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 64000
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 8192
|
||||
default: 4096
|
||||
pricing:
|
||||
input: "4"
|
||||
output: "16"
|
||||
unit: "0.000001"
|
||||
currency: RMB
|
||||
@@ -0,0 +1,53 @@
|
||||
model: deepseek-ai/DeepSeek-V3
|
||||
label:
|
||||
en_US: deepseek-ai/DeepSeek-V3
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 64000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: "1"
|
||||
output: "2"
|
||||
unit: "0.000001"
|
||||
currency: RMB
|
||||
@@ -1,13 +1,9 @@
|
||||
import json
|
||||
from collections.abc import Generator
|
||||
from typing import Optional, Union
|
||||
|
||||
import requests
|
||||
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
|
||||
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
AssistantPromptMessage,
|
||||
PromptMessage,
|
||||
PromptMessageTool,
|
||||
)
|
||||
@@ -96,208 +92,3 @@ class SiliconflowLargeLanguageModel(OAIAPICompatLargeLanguageModel):
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
def _handle_generate_stream_response(
|
||||
self, model: str, credentials: dict, response: requests.Response, prompt_messages: list[PromptMessage]
|
||||
) -> Generator:
|
||||
"""
|
||||
Handle llm stream response
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param response: streamed response
|
||||
:param prompt_messages: prompt messages
|
||||
:return: llm response chunk generator
|
||||
"""
|
||||
full_assistant_content = ""
|
||||
chunk_index = 0
|
||||
is_reasoning_started = False # Add flag to track reasoning state
|
||||
|
||||
def create_final_llm_result_chunk(
|
||||
id: Optional[str], index: int, message: AssistantPromptMessage, finish_reason: str, usage: dict
|
||||
) -> LLMResultChunk:
|
||||
# calculate num tokens
|
||||
prompt_tokens = usage and usage.get("prompt_tokens")
|
||||
if prompt_tokens is None:
|
||||
prompt_tokens = self._num_tokens_from_string(model, prompt_messages[0].content)
|
||||
completion_tokens = usage and usage.get("completion_tokens")
|
||||
if completion_tokens is None:
|
||||
completion_tokens = self._num_tokens_from_string(model, full_assistant_content)
|
||||
|
||||
# transform usage
|
||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
||||
|
||||
return LLMResultChunk(
|
||||
id=id,
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(index=index, message=message, finish_reason=finish_reason, usage=usage),
|
||||
)
|
||||
|
||||
# delimiter for stream response, need unicode_escape
|
||||
import codecs
|
||||
|
||||
delimiter = credentials.get("stream_mode_delimiter", "\n\n")
|
||||
delimiter = codecs.decode(delimiter, "unicode_escape")
|
||||
|
||||
tools_calls: list[AssistantPromptMessage.ToolCall] = []
|
||||
|
||||
def increase_tool_call(new_tool_calls: list[AssistantPromptMessage.ToolCall]):
|
||||
def get_tool_call(tool_call_id: str):
|
||||
if not tool_call_id:
|
||||
return tools_calls[-1]
|
||||
|
||||
tool_call = next((tool_call for tool_call in tools_calls if tool_call.id == tool_call_id), None)
|
||||
if tool_call is None:
|
||||
tool_call = AssistantPromptMessage.ToolCall(
|
||||
id=tool_call_id,
|
||||
type="function",
|
||||
function=AssistantPromptMessage.ToolCall.ToolCallFunction(name="", arguments=""),
|
||||
)
|
||||
tools_calls.append(tool_call)
|
||||
|
||||
return tool_call
|
||||
|
||||
for new_tool_call in new_tool_calls:
|
||||
# get tool call
|
||||
tool_call = get_tool_call(new_tool_call.function.name)
|
||||
# update tool call
|
||||
if new_tool_call.id:
|
||||
tool_call.id = new_tool_call.id
|
||||
if new_tool_call.type:
|
||||
tool_call.type = new_tool_call.type
|
||||
if new_tool_call.function.name:
|
||||
tool_call.function.name = new_tool_call.function.name
|
||||
if new_tool_call.function.arguments:
|
||||
tool_call.function.arguments += new_tool_call.function.arguments
|
||||
|
||||
finish_reason = None # The default value of finish_reason is None
|
||||
message_id, usage = None, None
|
||||
for chunk in response.iter_lines(decode_unicode=True, delimiter=delimiter):
|
||||
chunk = chunk.strip()
|
||||
if chunk:
|
||||
# ignore sse comments
|
||||
if chunk.startswith(":"):
|
||||
continue
|
||||
decoded_chunk = chunk.strip().removeprefix("data:").lstrip()
|
||||
if decoded_chunk == "[DONE]": # Some provider returns "data: [DONE]"
|
||||
continue
|
||||
|
||||
try:
|
||||
chunk_json: dict = json.loads(decoded_chunk)
|
||||
# stream ended
|
||||
except json.JSONDecodeError as e:
|
||||
yield create_final_llm_result_chunk(
|
||||
id=message_id,
|
||||
index=chunk_index + 1,
|
||||
message=AssistantPromptMessage(content=""),
|
||||
finish_reason="Non-JSON encountered.",
|
||||
usage=usage,
|
||||
)
|
||||
break
|
||||
# handle the error here. for issue #11629
|
||||
if chunk_json.get("error") and chunk_json.get("choices") is None:
|
||||
raise ValueError(chunk_json.get("error"))
|
||||
|
||||
if chunk_json:
|
||||
if u := chunk_json.get("usage"):
|
||||
usage = u
|
||||
if not chunk_json or len(chunk_json["choices"]) == 0:
|
||||
continue
|
||||
|
||||
choice = chunk_json["choices"][0]
|
||||
finish_reason = chunk_json["choices"][0].get("finish_reason")
|
||||
message_id = chunk_json.get("id")
|
||||
chunk_index += 1
|
||||
|
||||
if "delta" in choice:
|
||||
delta = choice["delta"]
|
||||
delta_content = delta.get("content")
|
||||
|
||||
assistant_message_tool_calls = None
|
||||
|
||||
if "tool_calls" in delta and credentials.get("function_calling_type", "no_call") == "tool_call":
|
||||
assistant_message_tool_calls = delta.get("tool_calls", None)
|
||||
elif (
|
||||
"function_call" in delta
|
||||
and credentials.get("function_calling_type", "no_call") == "function_call"
|
||||
):
|
||||
assistant_message_tool_calls = [
|
||||
{"id": "tool_call_id", "type": "function", "function": delta.get("function_call", {})}
|
||||
]
|
||||
|
||||
# assistant_message_function_call = delta.delta.function_call
|
||||
|
||||
# extract tool calls from response
|
||||
if assistant_message_tool_calls:
|
||||
tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
|
||||
increase_tool_call(tool_calls)
|
||||
|
||||
if delta_content is None or delta_content == "":
|
||||
continue
|
||||
|
||||
# Check for think tags
|
||||
if "<think>" in delta_content:
|
||||
is_reasoning_started = True
|
||||
# Remove <think> tag and add markdown quote
|
||||
delta_content = "> 💭 " + delta_content.replace("<think>", "")
|
||||
elif "</think>" in delta_content:
|
||||
# Remove </think> tag and add newlines to end quote block
|
||||
delta_content = delta_content.replace("</think>", "") + "\n\n"
|
||||
is_reasoning_started = False
|
||||
elif is_reasoning_started:
|
||||
# Add quote markers for content within thinking block
|
||||
if "\n\n" in delta_content:
|
||||
delta_content = delta_content.replace("\n\n", "\n> ")
|
||||
elif "\n" in delta_content:
|
||||
delta_content = delta_content.replace("\n", "\n> ")
|
||||
|
||||
# transform assistant message to prompt message
|
||||
assistant_prompt_message = AssistantPromptMessage(
|
||||
content=delta_content,
|
||||
)
|
||||
|
||||
# reset tool calls
|
||||
tool_calls = []
|
||||
full_assistant_content += delta_content
|
||||
elif "text" in choice:
|
||||
choice_text = choice.get("text", "")
|
||||
if choice_text == "":
|
||||
continue
|
||||
|
||||
# transform assistant message to prompt message
|
||||
assistant_prompt_message = AssistantPromptMessage(content=choice_text)
|
||||
full_assistant_content += choice_text
|
||||
else:
|
||||
continue
|
||||
|
||||
yield LLMResultChunk(
|
||||
id=message_id,
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(
|
||||
index=chunk_index,
|
||||
message=assistant_prompt_message,
|
||||
),
|
||||
)
|
||||
|
||||
chunk_index += 1
|
||||
|
||||
if tools_calls:
|
||||
yield LLMResultChunk(
|
||||
id=message_id,
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(
|
||||
index=chunk_index,
|
||||
message=AssistantPromptMessage(tool_calls=tools_calls, content=""),
|
||||
),
|
||||
)
|
||||
|
||||
yield create_final_llm_result_chunk(
|
||||
id=message_id,
|
||||
index=chunk_index,
|
||||
message=AssistantPromptMessage(content=""),
|
||||
finish_reason=finish_reason,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
model: gemini-2.0-flash-001
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash 001
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: gemini-2.0-flash-lite-preview-02-05
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash Lite Preview 0205
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,39 @@
|
||||
model: gemini-2.0-flash-thinking-exp-01-21
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash Thinking Exp 0121
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32767
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,39 @@
|
||||
model: gemini-2.0-flash-thinking-exp-1219
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash Thinking Exp 1219
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32767
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,37 @@
|
||||
model: gemini-2.0-pro-exp-02-05
|
||||
label:
|
||||
en_US: Gemini 2.0 Pro Exp 0205
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2000000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -1,14 +1,18 @@
|
||||
model: gemini-pro
|
||||
model: gemini-exp-1114
|
||||
label:
|
||||
en_US: Gemini Pro
|
||||
en_US: Gemini exp 1114
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 30720
|
||||
context_size: 32767
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
@@ -23,17 +27,15 @@ parameter_rules:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 2048
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
deprecated: true
|
||||
@@ -1,12 +1,18 @@
|
||||
model: gemini-pro-vision
|
||||
model: gemini-exp-1121
|
||||
label:
|
||||
en_US: Gemini Pro Vision
|
||||
en_US: Gemini exp 1121
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 12288
|
||||
context_size: 32767
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
@@ -21,15 +27,15 @@ parameter_rules:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 4096
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
deprecated: true
|
||||
@@ -0,0 +1,41 @@
|
||||
model: gemini-exp-1206
|
||||
label:
|
||||
en_US: Gemini exp 1206
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user