upgrade Unstructured version

fix: fix tongyi models blocking mode with incremental_output=stream (#13620 )
fix document extractor node incorrectly processing doc and ppt files (#12902 )
2026-01-10 16:34:15 +00:00 · 2025-02-13 15:08:34 +08:00 · 2025-02-13 10:24:05 +08:00 · 2025-02-12 18:04:28 +08:00 · 2025-02-12 15:02:11 +08:00 · 2025-02-12 13:53:51 +08:00
411 changed files with 11140 additions and 15909 deletions
--- a/.github/workflows/api-tests.yml
+++ b/.github/workflows/api-tests.yml
@@ -26,6 +26,9 @@ jobs:
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false

      - name: Setup Poetry and Python ${{ matrix.python-version }}
        uses: ./.github/actions/setup-poetry
--- a/.github/workflows/build-push.yml
+++ b/.github/workflows/build-push.yml
@@ -5,8 +5,6 @@ on:
    branches:
      - "main"
      - "deploy/dev"
-      - "deploy/enterprise"
-      - "e-260"
  release:
    types: [published]

@@ -81,10 +79,12 @@ jobs:
          cache-to: type=gha,mode=max,scope=${{ matrix.service_name }}

      - name: Export digest
+        env:
+          DIGEST: ${{ steps.build.outputs.digest }}
        run: |
          mkdir -p /tmp/digests
-          digest="${{ steps.build.outputs.digest }}"
-          touch "/tmp/digests/${digest#sha256:}"
+          sanitized_digest=${DIGEST#sha256:}
+          touch "/tmp/digests/${sanitized_digest}"

      - name: Upload digest
        uses: actions/upload-artifact@v4
@@ -134,10 +134,15 @@ jobs:

      - name: Create manifest list and push
        working-directory: /tmp/digests
+        env:
+          IMAGE_NAME: ${{ env[matrix.image_name_env] }}
        run: |
          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            $(printf '${{ env[matrix.image_name_env] }}@sha256:%s ' *)
+            $(printf "$IMAGE_NAME@sha256:%s " *)

      - name: Inspect image
+        env:
+          IMAGE_NAME: ${{ env[matrix.image_name_env] }}
+          IMAGE_VERSION: ${{ steps.meta.outputs.version }}
        run: |
-          docker buildx imagetools inspect ${{ env[matrix.image_name_env] }}:${{ steps.meta.outputs.version }}
+          docker buildx imagetools inspect "$IMAGE_NAME:$IMAGE_VERSION"
--- a/.github/workflows/db-migration-test.yml
+++ b/.github/workflows/db-migration-test.yml
@@ -19,6 +19,9 @@ jobs:
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false

      - name: Setup Poetry and Python
        uses: ./.github/actions/setup-poetry
--- a/.github/workflows/deploy-enterprise.yml
+++ b/.github/workflows/deploy-enterprise.yml
@@ -1,29 +0,0 @@
-name: Deploy Enterprise
-
-permissions:
-  contents: read
-
-on:
-  workflow_run:
-    workflows: ["Build and Push API & Web"]
-    branches:
-      - "deploy/enterprise"
-    types:
-      - completed
-
-jobs:
-  deploy:
-    runs-on: ubuntu-latest
-    if: |
-      github.event.workflow_run.conclusion == 'success' &&
-      github.event.workflow_run.head_branch == 'deploy/enterprise'
-
-    steps:
-      - name: Deploy to server
-        uses: appleboy/ssh-action@v0.1.8
-        with:
-          host: ${{ secrets.ENTERPRISE_SSH_HOST }}
-          username: ${{ secrets.ENTERPRISE_SSH_USER }}
-          password: ${{ secrets.ENTERPRISE_SSH_PASSWORD }}
-          script: |
-            ${{ vars.ENTERPRISE_SSH_SCRIPT || secrets.ENTERPRISE_SSH_SCRIPT }}
--- a/.github/workflows/expose_service_ports.sh
+++ b/.github/workflows/expose_service_ports.sh
@@ -9,6 +9,6 @@ yq eval '.services["pgvecto-rs"].ports += ["5431:5432"]' -i docker/docker-compos
 yq eval '.services["elasticsearch"].ports += ["9200:9200"]' -i docker/docker-compose.yaml
 yq eval '.services.couchbase-server.ports += ["8091-8096:8091-8096"]' -i docker/docker-compose.yaml
 yq eval '.services.couchbase-server.ports += ["11210:11210"]' -i docker/docker-compose.yaml
-yq eval '.services.tidb.ports += ["4000:4000"]' -i docker/docker-compose.yaml
+yq eval '.services.tidb.ports += ["4000:4000"]' -i docker/tidb/docker-compose.yaml

 echo "Ports exposed for sandbox, weaviate, tidb, qdrant, chroma, milvus, pgvector, pgvecto-rs, elasticsearch, couchbase"
--- a/.github/workflows/style.yml
+++ b/.github/workflows/style.yml
@@ -17,6 +17,9 @@ jobs:
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false

      - name: Check changed files
        id: changed-files
@@ -59,6 +62,9 @@ jobs:
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false

      - name: Check changed files
        id: changed-files
@@ -89,6 +95,9 @@ jobs:
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false

      - name: Check changed files
        id: changed-files
@@ -117,6 +126,9 @@ jobs:
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false

      - name: Check changed files
        id: changed-files
--- a/.github/workflows/tool-test-sdks.yaml
+++ b/.github/workflows/tool-test-sdks.yaml
@@ -26,6 +26,9 @@ jobs:

    steps:
      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false

      - name: Use Node.js ${{ matrix.node-version }}
        uses: actions/setup-node@v4
--- a/.github/workflows/translate-i18n-base-on-english.yml
+++ b/.github/workflows/translate-i18n-base-on-english.yml
@@ -16,6 +16,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 2 # last 2 commits
+          persist-credentials: false

      - name: Check for file changes in i18n/en-US
        id: check_files
--- a/.github/workflows/vdb-tests.yml
+++ b/.github/workflows/vdb-tests.yml
@@ -28,6 +28,9 @@ jobs:
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false

      - name: Setup Poetry and Python ${{ matrix.python-version }}
        uses: ./.github/actions/setup-poetry
@@ -51,7 +54,15 @@ jobs:
      - name: Expose Service Ports
        run: sh .github/workflows/expose_service_ports.sh

-      - name: Set up Vector Stores (TiDB, Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase)
+      - name: Set up Vector Store (TiDB)
+        uses: hoverkraft-tech/compose-action@v2.0.2
+        with:
+          compose-file: docker/tidb/docker-compose.yaml
+          services: |
+            tidb
+            tiflash
+
+      - name: Set up Vector Stores (Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase)
        uses: hoverkraft-tech/compose-action@v2.0.2
        with:
          compose-file: |
@@ -67,7 +78,9 @@ jobs:
            pgvector
            chroma
            elasticsearch
-            tidb
+
+      - name: Check TiDB Ready
+        run: poetry run -P api python api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py

      - name: Test Vector Stores
        run: poetry run -P api bash dev/pytest/pytest_vdb.sh
--- a/.github/workflows/web-tests.yml
+++ b/.github/workflows/web-tests.yml
@@ -22,6 +22,9 @@ jobs:
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false

      - name: Check changed files
        id: changed-files
--- a/.gitignore
+++ b/.gitignore
@@ -163,6 +163,7 @@ docker/volumes/db/data/*
 docker/volumes/redis/data/*
 docker/volumes/weaviate/*
 docker/volumes/qdrant/*
+docker/tidb/volumes/*
 docker/volumes/etcd/*
 docker/volumes/minio/*
 docker/volumes/milvus/*
--- a/.markdownlint.json
+++ b/.markdownlint.json
@@ -1,4 +0,0 @@
-{
-    "MD024": false,
-    "MD013": false
-}
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,45 +0,0 @@
-# Changelog
-
-All notable changes to Dify will be documented in this file.
-
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
-and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-
-## [0.15.8] - 2025-05-30
-
-### Added
-
- Added gunicorn keepalive setting (#19537)
-
-### Fixed
-
- Fixed database configuration to allow DB_EXTRAS to set search_path via options (#16a4f77)
- Fixed frontend third-party package security issues (#19655)
- Updated dependencies: huggingface-hub (~0.16.4 to ~0.31.0), transformers (~4.35.0 to ~4.39.0), and resend (~0.7.0 to ~2.9.0) (#19563)
- Downgrade boto3 from 1.36 to 1.35 (#19736)
-
-## [0.15.7] - 2025-04-27
-
-### Added
-
- Added support for GPT-4.1 in model providers (#18912)
- Added support for Amazon Bedrock DeepSeek-R1 model (#18908)
- Added support for Amazon Bedrock Claude Sonnet 3.7 model (#18788)
- Refined version compatibility logic in app DSL service
-
-### Fixed
-
- Fixed issue with creating apps from template categories (#18807, #18868)
- Fixed DSL version check when creating apps from explore templates (#18872, #18878)
-
-## [0.15.6] - 2025-04-22
-
-### Security
-
- Fixed clickjacking vulnerability (#18552)
- Fixed reset password security issue (#18366)
- Updated reset password token when email code verification succeeds (#18362)
-
-### Fixed
-
- Fixed Vertex AI Gemini 2.0 Flash 001 schema (#18405)
--- a/README.md
+++ b/README.md
@@ -108,6 +108,72 @@ Please refer to our [FAQ](https://docs.dify.ai/getting-started/install-self-host
 **7. Backend-as-a-Service**: 
  All of Dify's offerings come with corresponding APIs, so you could effortlessly integrate Dify into your own business logic.

+## Feature Comparison
+<table style="width: 100%;">
+  <tr>
+    <th align="center">Feature</th>
+    <th align="center">Dify.AI</th>
+    <th align="center">LangChain</th>
+    <th align="center">Flowise</th>
+    <th align="center">OpenAI Assistants API</th>
+  </tr>
+  <tr>
+    <td align="center">Programming Approach</td>
+    <td align="center">API + App-oriented</td>
+    <td align="center">Python Code</td>
+    <td align="center">App-oriented</td>
+    <td align="center">API-oriented</td>
+  </tr>
+  <tr>
+    <td align="center">Supported LLMs</td>
+    <td align="center">Rich Variety</td>
+    <td align="center">Rich Variety</td>
+    <td align="center">Rich Variety</td>
+    <td align="center">OpenAI-only</td>
+  </tr>
+  <tr>
+    <td align="center">RAG Engine</td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+  </tr>
+  <tr>
+    <td align="center">Agent</td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+    <td align="center">✅</td>
+  </tr>
+  <tr>
+    <td align="center">Workflow</td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+  </tr>
+  <tr>
+    <td align="center">Observability</td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+    <td align="center">❌</td>
+  </tr>
+  <tr>
+    <td align="center">Enterprise Feature (SSO/Access control)</td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+    <td align="center">❌</td>
+    <td align="center">❌</td>
+  </tr>
+  <tr>
+    <td align="center">Local Deployment</td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+  </tr>
+</table>

 ## Using Dify

--- a/README_KL.md
+++ b/README_KL.md
@@ -87,9 +87,7 @@ Dify is an open-source LLM app development platform. Its intuitive interface com

 ## Feature Comparison
 <table style="width: 100%;">
-  <tr
-
->
+  <tr>
    <th align="center">Feature</th>
    <th align="center">Dify.AI</th>
    <th align="center">LangChain</th>
--- a/README_SI.md
+++ b/README_SI.md
@@ -106,6 +106,73 @@ Prosimo, glejte naša pogosta vprašanja [FAQ](https://docs.dify.ai/getting-star
 **7. Backend-as-a-Service**: 
  AVse ponudbe Difyja so opremljene z ustreznimi API-ji, tako da lahko Dify brez težav integrirate v svojo poslovno logiko.

+## Primerjava Funkcij
+
+<table style="width: 100%;">
+  <tr>
+    <th align="center">Funkcija</th>
+    <th align="center">Dify.AI</th>
+    <th align="center">LangChain</th>
+    <th align="center">Flowise</th>
+    <th align="center">OpenAI Assistants API</th>
+  </tr>
+  <tr>
+    <td align="center">Programski pristop</td>
+    <td align="center">API + usmerjeno v aplikacije</td>
+    <td align="center">Python koda</td>
+    <td align="center">Usmerjeno v aplikacije</td>
+    <td align="center">Usmerjeno v API</td>
+  </tr>
+  <tr>
+    <td align="center">Podprti LLM-ji</td>
+    <td align="center">Bogata izbira</td>
+    <td align="center">Bogata izbira</td>
+    <td align="center">Bogata izbira</td>
+    <td align="center">Samo OpenAI</td>
+  </tr>
+  <tr>
+    <td align="center">RAG pogon</td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+  </tr>
+  <tr>
+    <td align="center">Agent</td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+    <td align="center">✅</td>
+  </tr>
+  <tr>
+    <td align="center">Potek dela</td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+  </tr>
+  <tr>
+    <td align="center">Spremljanje</td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+    <td align="center">❌</td>
+  </tr>
+  <tr>
+    <td align="center">Funkcija za podjetja (SSO/nadzor dostopa)</td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+    <td align="center">❌</td>
+    <td align="center">❌</td>
+  </tr>
+  <tr>
+    <td align="center">Lokalna namestitev</td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+    <td align="center">✅</td>
+    <td align="center">❌</td>
+  </tr>
+</table>

 ## Uporaba Dify

@@ -187,4 +254,4 @@ Zaradi zaščite vaše zasebnosti se izogibajte objavljanju varnostnih vprašanj

 ## Licenca

-To skladišče je na voljo pod [odprtokodno licenco Dify](LICENSE) , ki je v bistvu Apache 2.0 z nekaj dodatnimi omejitvami.
+To skladišče je na voljo pod [odprtokodno licenco Dify](LICENSE) , ki je v bistvu Apache 2.0 z nekaj dodatnimi omejitvami.
--- a/api/.env.example
+++ b/api/.env.example
@@ -430,7 +430,4 @@ CREATE_TIDB_SERVICE_JOB_ENABLED=false
 # Maximum number of submitted thread count in a ThreadPool for parallel node execution
 MAX_SUBMIT_COUNT=100
 # Lockout duration in seconds
-LOGIN_LOCKOUT_DURATION=86400
-
-# Prevent Clickjacking
-ALLOW_EMBED=false
+LOGIN_LOCKOUT_DURATION=86400
--- a/api/README.md
+++ b/api/README.md
@@ -37,7 +37,13 @@

 4. Create environment.

-   Dify API service uses [Poetry](https://python-poetry.org/docs/) to manage dependencies. You can execute `poetry shell` to activate the environment.
+   Dify API service uses [Poetry](https://python-poetry.org/docs/) to manage dependencies. First, you need to add the poetry shell plugin, if you don't have it already, in order to run in a virtual environment. [Note: Poetry shell is no longer a native command so you need to install the poetry plugin beforehand]
+
+   ```bash
+   poetry self add poetry-plugin-shell
+   ```
+   
+   Then, You can execute `poetry shell` to activate the environment.

 5. Install dependencies

--- a/api/configs/feature/init.py
+++ b/api/configs/feature/init.py
@@ -315,8 +315,8 @@ class HttpConfig(BaseSettings):
    )

    RESPECT_XFORWARD_HEADERS_ENABLED: bool = Field(
-        description="Enable or disable the X-Forwarded-For Proxy Fix middleware from Werkzeug"
-        " to respect X-* headers to redirect clients",
+        description="Enable handling of X-Forwarded-For, X-Forwarded-Proto, and X-Forwarded-Port headers"
+        " when the app is behind a single trusted reverse proxy.",
        default=False,
    )

@@ -498,6 +498,11 @@ class AuthConfig(BaseSettings):
        default=86400,
    )

+    FORGOT_PASSWORD_LOCKOUT_DURATION: PositiveInt = Field(
+        description="Time (in seconds) a user must wait before retrying password reset after exceeding the rate limit.",
+        default=86400,
+    )
+

 class ModerationConfig(BaseSettings):
    """
--- a/api/configs/middleware/init.py
+++ b/api/configs/middleware/init.py
@@ -1,5 +1,5 @@
 from typing import Any, Literal, Optional
-from urllib.parse import parse_qsl, quote_plus
+from urllib.parse import quote_plus

 from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt, computed_field
 from pydantic_settings import BaseSettings
@@ -166,28 +166,14 @@ class DatabaseConfig(BaseSettings):
        default=False,
    )

-    @computed_field  # type: ignore[misc]
-    @property
+    @computed_field
    def SQLALCHEMY_ENGINE_OPTIONS(self) -> dict[str, Any]:
-        # Parse DB_EXTRAS for 'options'
-        db_extras_dict = dict(parse_qsl(self.DB_EXTRAS))
-        options = db_extras_dict.get("options", "")
-        # Always include timezone
-        timezone_opt = "-c timezone=UTC"
-        if options:
-            # Merge user options and timezone
-            merged_options = f"{options} {timezone_opt}"
-        else:
-            merged_options = timezone_opt
-
-        connect_args = {"options": merged_options}
-
        return {
            "pool_size": self.SQLALCHEMY_POOL_SIZE,
            "max_overflow": self.SQLALCHEMY_MAX_OVERFLOW,
            "pool_recycle": self.SQLALCHEMY_POOL_RECYCLE,
            "pool_pre_ping": self.SQLALCHEMY_POOL_PRE_PING,
-            "connect_args": connect_args,
+            "connect_args": {"options": "-c timezone=UTC"},
        }


--- a/api/configs/packaging/init.py
+++ b/api/configs/packaging/init.py
@@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):

    CURRENT_VERSION: str = Field(
        description="Dify version",
-        default="0.15.8",
+        default="0.15.3",
    )

    COMMIT_SHA: str = Field(
--- a/api/constants/init.py
+++ b/api/constants/init.py
@@ -15,7 +15,7 @@ AUDIO_EXTENSIONS.extend([ext.upper() for ext in AUDIO_EXTENSIONS])

 if dify_config.ETL_TYPE == "Unstructured":
    DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls"]
-    DOCUMENT_EXTENSIONS.extend(("docx", "csv", "eml", "msg", "pptx", "xml", "epub"))
+    DOCUMENT_EXTENSIONS.extend(("doc", "docx", "csv", "eml", "msg", "pptx", "xml", "epub"))
    if dify_config.UNSTRUCTURED_API_URL:
        DOCUMENT_EXTENSIONS.append("ppt")
    DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS])
--- a/api/controllers/console/app/app.py
+++ b/api/controllers/console/app/app.py
@@ -2,28 +2,30 @@ import uuid
 from typing import cast

 from flask_login import current_user  # type: ignore
-from flask_restful import (Resource, inputs, marshal,  # type: ignore
-                           marshal_with, reqparse)
+from flask_restful import Resource, inputs, marshal, marshal_with, reqparse  # type: ignore
 from sqlalchemy import select
 from sqlalchemy.orm import Session
 from werkzeug.exceptions import BadRequest, Forbidden, abort

 from controllers.console import api
 from controllers.console.app.wraps import get_app_model
-from controllers.console.wraps import (account_initialization_required,
-                                       cloud_edition_billing_resource_check,
-                                       enterprise_license_required,
-                                       setup_required)
+from controllers.console.wraps import (
+    account_initialization_required,
+    cloud_edition_billing_resource_check,
+    enterprise_license_required,
+    setup_required,
+)
 from core.ops.ops_trace_manager import OpsTraceManager
 from extensions.ext_database import db
-from fields.app_fields import (app_detail_fields, app_detail_fields_with_site,
-                               app_pagination_fields)
+from fields.app_fields import (
+    app_detail_fields,
+    app_detail_fields_with_site,
+    app_pagination_fields,
+)
 from libs.login import login_required
 from models import Account, App
 from services.app_dsl_service import AppDslService, ImportMode
 from services.app_service import AppService
-from services.enterprise.enterprise_service import EnterpriseService
-from services.feature_service import FeatureService

 ALLOW_CREATE_APP_MODES = ["chat", "agent-chat", "advanced-chat", "workflow", "completion"]

@@ -65,17 +67,7 @@ class AppListApi(Resource):
        if not app_pagination:
            return {"data": [], "total": 0, "page": 1, "limit": 20, "has_more": False}

-        if FeatureService.get_system_features().webapp_auth.enabled:
-            app_ids = [str(app.id) for app in app_pagination.items]
-            res = EnterpriseService.WebAppAuth.batch_get_app_access_mode_by_id(app_ids=app_ids)
-            if len(res) != len(app_ids):
-                raise BadRequest("Invalid app id in webapp auth")
-
-            for app in app_pagination.items:
-                if str(app.id) in res:
-                    app.access_mode = res[str(app.id)].access_mode
-
-        return marshal(app_pagination, app_pagination_fields), 200
+        return marshal(app_pagination, app_pagination_fields)

    @setup_required
    @login_required
@@ -119,10 +111,6 @@ class AppApi(Resource):

        app_model = app_service.get_app(app_model)

-        if FeatureService.get_system_features().webapp_auth.enabled:
-            app_setting = EnterpriseService.WebAppAuth.get_app_access_mode_by_id(app_id=str(app_model.id))
-            app_model.access_mode = app_setting.access_mode
-
        return app_model

    @setup_required
--- a/api/controllers/console/auth/forgot_password.py
+++ b/api/controllers/console/auth/forgot_password.py
@@ -6,13 +6,15 @@ from flask_restful import Resource, reqparse  # type: ignore

 from constants.languages import languages
 from controllers.console import api
-from controllers.console.auth.error import (EmailCodeError, InvalidEmailError,
-                                            InvalidTokenError,
-                                            PasswordMismatchError)
-from controllers.console.error import (AccountInFreezeError, AccountNotFound,
-                                       EmailSendIpLimitError)
-from controllers.console.wraps import (email_password_login_enabled,
-                                       setup_required)
+from controllers.console.auth.error import (
+    EmailCodeError,
+    EmailPasswordResetLimitError,
+    InvalidEmailError,
+    InvalidTokenError,
+    PasswordMismatchError,
+)
+from controllers.console.error import AccountInFreezeError, AccountNotFound, EmailSendIpLimitError
+from controllers.console.wraps import setup_required
 from events.tenant_event import tenant_was_created
 from extensions.ext_database import db
 from libs.helper import email, extract_remote_ip
@@ -20,14 +22,12 @@ from libs.password import hash_password, valid_password
 from models.account import Account
 from services.account_service import AccountService, TenantService
 from services.errors.account import AccountRegisterError
-from services.errors.workspace import (WorkSpaceNotAllowedCreateError,
-                                       WorkspacesLimitExceededError)
+from services.errors.workspace import WorkSpaceNotAllowedCreateError
 from services.feature_service import FeatureService


 class ForgotPasswordSendEmailApi(Resource):
    @setup_required
-    @email_password_login_enabled
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument("email", type=email, required=True, location="json")
@@ -59,7 +59,6 @@ class ForgotPasswordSendEmailApi(Resource):

 class ForgotPasswordCheckApi(Resource):
    @setup_required
-    @email_password_login_enabled
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument("email", type=str, required=True, location="json")
@@ -69,6 +68,10 @@ class ForgotPasswordCheckApi(Resource):

        user_email = args["email"]

+        is_forgot_password_error_rate_limit = AccountService.is_forgot_password_error_rate_limit(args["email"])
+        if is_forgot_password_error_rate_limit:
+            raise EmailPasswordResetLimitError()
+
        token_data = AccountService.get_reset_password_data(args["token"])
        if token_data is None:
            raise InvalidTokenError()
@@ -77,22 +80,15 @@ class ForgotPasswordCheckApi(Resource):
            raise InvalidEmailError()

        if args["code"] != token_data.get("code"):
+            AccountService.add_forgot_password_error_rate_limit(args["email"])
            raise EmailCodeError()

-        # Verified, revoke the first token
-        AccountService.revoke_reset_password_token(args["token"])
-
-        # Refresh token data by generating a new token
-        _, new_token = AccountService.generate_reset_password_token(
-            user_email, code=args["code"], additional_data={"phase": "reset"}
-        )
-
-        return {"is_valid": True, "email": token_data.get("email"), "token": new_token}
+        AccountService.reset_forgot_password_error_rate_limit(args["email"])
+        return {"is_valid": True, "email": token_data.get("email")}


 class ForgotPasswordResetApi(Resource):
    @setup_required
-    @email_password_login_enabled
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument("token", type=str, required=True, nullable=False, location="json")
@@ -111,9 +107,6 @@ class ForgotPasswordResetApi(Resource):

        if reset_data is None:
            raise InvalidTokenError()
-        # Must use token in reset phase
-        if reset_data.get("phase", "") != "reset":
-            raise InvalidTokenError()

        AccountService.revoke_reset_password_token(token)

@@ -146,8 +139,6 @@ class ForgotPasswordResetApi(Resource):
                pass
            except AccountRegisterError as are:
                raise AccountInFreezeError()
-            except WorkspacesLimitExceededError:
-                pass

        return {"result": "success"}

--- a/api/controllers/console/auth/login.py
+++ b/api/controllers/console/auth/login.py
@@ -21,9 +21,8 @@ from controllers.console.error import (
    AccountNotFound,
    EmailSendIpLimitError,
    NotAllowedCreateWorkspace,
-    WorkspacesLimitExceeded,
 )
-from controllers.console.wraps import email_password_login_enabled, setup_required
+from controllers.console.wraps import setup_required
 from events.tenant_event import tenant_was_created
 from libs.helper import email, extract_remote_ip
 from libs.password import valid_password
@@ -31,7 +30,7 @@ from models.account import Account
 from services.account_service import AccountService, RegisterService, TenantService
 from services.billing_service import BillingService
 from services.errors.account import AccountRegisterError
-from services.errors.workspace import WorkSpaceNotAllowedCreateError, WorkspacesLimitExceededError
+from services.errors.workspace import WorkSpaceNotAllowedCreateError
 from services.feature_service import FeatureService


@@ -39,7 +38,6 @@ class LoginApi(Resource):
    """Resource for user login."""

    @setup_required
-    @email_password_login_enabled
    def post(self):
        """Authenticate user and login."""
        parser = reqparse.RequestParser()
@@ -89,15 +87,10 @@ class LoginApi(Resource):
        # SELF_HOSTED only have one workspace
        tenants = TenantService.get_join_tenants(account)
        if len(tenants) == 0:
-            system_features = FeatureService.get_system_features()
-
-            if system_features.is_allow_create_workspace and not system_features.license.workspaces.is_available():
-                raise WorkspacesLimitExceeded()
-            else:
-                return {
-                    "result": "fail",
-                    "data": "workspace not found, please contact system admin to invite you to join in a workspace",
-                }
+            return {
+                "result": "fail",
+                "data": "workspace not found, please contact system admin to invite you to join in a workspace",
+            }

        token_pair = AccountService.login(account=account, ip_address=extract_remote_ip(request))
        AccountService.reset_login_error_rate_limit(args["email"])
@@ -117,7 +110,6 @@ class LogoutApi(Resource):

 class ResetPasswordSendEmailApi(Resource):
    @setup_required
-    @email_password_login_enabled
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument("email", type=email, required=True, location="json")
@@ -204,9 +196,6 @@ class EmailCodeLoginApi(Resource):
        if account:
            tenant = TenantService.get_join_tenants(account)
            if not tenant:
-                workspaces = FeatureService.get_system_features().license.workspaces
-                if not workspaces.is_available():
-                    raise WorkspacesLimitExceeded()
                if not FeatureService.get_system_features().is_allow_create_workspace:
                    raise NotAllowedCreateWorkspace()
                else:
@@ -224,8 +213,6 @@ class EmailCodeLoginApi(Resource):
                return NotAllowedCreateWorkspace()
            except AccountRegisterError as are:
                raise AccountInFreezeError()
-            except WorkspacesLimitExceededError:
-                raise WorkspacesLimitExceeded()
        token_pair = AccountService.login(account, ip_address=extract_remote_ip(request))
        AccountService.reset_login_error_rate_limit(args["email"])
        return {"result": "success", "data": token_pair.model_dump()}
--- a/api/controllers/console/error.py
+++ b/api/controllers/console/error.py
@@ -46,18 +46,6 @@ class NotAllowedCreateWorkspace(BaseHTTPException):
    code = 400


-class WorkspaceMembersLimitExceeded(BaseHTTPException):
-    error_code = "limit_exceeded"
-    description = "Unable to add member because the maximum workspace's member limit was exceeded"
-    code = 400
-
-
-class WorkspacesLimitExceeded(BaseHTTPException):
-    error_code = "limit_exceeded"
-    description = "Unable to create workspace because the maximum workspace limit was exceeded"
-    code = 400
-
-
 class AccountBannedError(BaseHTTPException):
    error_code = "account_banned"
    description = "Account is banned."
--- a/api/controllers/console/explore/error.py
+++ b/api/controllers/console/explore/error.py
@@ -23,9 +23,3 @@ class AppSuggestedQuestionsAfterAnswerDisabledError(BaseHTTPException):
    error_code = "app_suggested_questions_after_answer_disabled"
    description = "Function Suggested questions after answer disabled."
    code = 403
-
-
-class AppAccessDeniedError(BaseHTTPException):
-    error_code = "access_denied"
-    description = "App access denied."
-    code = 403
--- a/api/controllers/console/explore/installed_app.py
+++ b/api/controllers/console/explore/installed_app.py
@@ -1,26 +1,20 @@
-import logging
 from datetime import UTC, datetime
 from typing import Any

 from flask import request
 from flask_login import current_user  # type: ignore
-from flask_restful import (Resource, inputs, marshal_with,  # type: ignore
-                           reqparse)
+from flask_restful import Resource, inputs, marshal_with, reqparse  # type: ignore
 from sqlalchemy import and_
 from werkzeug.exceptions import BadRequest, Forbidden, NotFound

 from controllers.console import api
 from controllers.console.explore.wraps import InstalledAppResource
-from controllers.console.wraps import (account_initialization_required,
-                                       cloud_edition_billing_resource_check)
+from controllers.console.wraps import account_initialization_required, cloud_edition_billing_resource_check
 from extensions.ext_database import db
 from fields.installed_app_fields import installed_app_list_fields
 from libs.login import login_required
 from models import App, InstalledApp, RecommendedApp
 from services.account_service import TenantService
-from services.app_service import AppService
-from services.enterprise.enterprise_service import EnterpriseService
-from services.feature_service import FeatureService


 class InstalledAppsListApi(Resource):
@@ -54,30 +48,6 @@ class InstalledAppsListApi(Resource):
            for installed_app in installed_apps
            if installed_app.app is not None
        ]
-
-        # filter out apps that user doesn't have access to
-        if FeatureService.get_system_features().webapp_auth.enabled:
-            user_id = current_user.id
-            res = []
-            app_ids = [installed_app["app"].id for installed_app in installed_app_list]
-            webapp_settings = EnterpriseService.WebAppAuth.batch_get_app_access_mode_by_id(app_ids)
-            for installed_app in installed_app_list:
-                webapp_setting = webapp_settings.get(installed_app["app"].id)
-                if not webapp_setting:
-                    continue
-                if webapp_setting.access_mode == "sso_verified":
-                    continue
-                app_code = AppService.get_app_code_by_id(str(installed_app["app"].id))
-                if EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(
-                    user_id=user_id,
-                    app_code=app_code,
-                ):
-                    res.append(installed_app)
-            installed_app_list = res
-            logging.info(
-                f"installed_app_list: {installed_app_list}, user_id: {user_id}"
-            )
-
        installed_app_list.sort(
            key=lambda app: (
                -app["is_pinned"],
--- a/api/controllers/console/explore/wraps.py
+++ b/api/controllers/console/explore/wraps.py
@@ -4,14 +4,10 @@ from flask_login import current_user  # type: ignore
 from flask_restful import Resource  # type: ignore
 from werkzeug.exceptions import NotFound

-from controllers.console.explore.error import AppAccessDeniedError
 from controllers.console.wraps import account_initialization_required
 from extensions.ext_database import db
 from libs.login import login_required
 from models import InstalledApp
-from services.app_service import AppService
-from services.enterprise.enterprise_service import EnterpriseService
-from services.feature_service import FeatureService


 def installed_app_required(view=None):
@@ -52,30 +48,6 @@ def installed_app_required(view=None):
    return decorator


-def user_allowed_to_access_app(view=None):
-    def decorator(view):
-        @wraps(view)
-        def decorated(installed_app: InstalledApp, *args, **kwargs):
-            feature = FeatureService.get_system_features()
-            if feature.webapp_auth.enabled:
-                app_id = installed_app.app_id
-                app_code = AppService.get_app_code_by_id(app_id)
-                res = EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(
-                    user_id=str(current_user.id),
-                    app_code=app_code,
-                )
-                if not res:
-                    raise AppAccessDeniedError()
-
-            return view(installed_app, *args, **kwargs)
-
-        return decorated
-    if view:
-        return decorator(view)
-    return decorator
-
-
 class InstalledAppResource(Resource):
    # must be reversed if there are multiple decorators
-
-    method_decorators = [user_allowed_to_access_app, installed_app_required, account_initialization_required, login_required]
+    method_decorators = [installed_app_required, account_initialization_required, login_required]
--- a/api/controllers/console/workspace/members.py
+++ b/api/controllers/console/workspace/members.py
@@ -6,7 +6,6 @@ from flask_restful import Resource, abort, marshal_with, reqparse  # type: ignor
 import services
 from configs import dify_config
 from controllers.console import api
-from controllers.console.error import WorkspaceMembersLimitExceeded
 from controllers.console.wraps import (
    account_initialization_required,
    cloud_edition_billing_resource_check,
@@ -18,7 +17,6 @@ from libs.login import login_required
 from models.account import Account, TenantAccountRole
 from services.account_service import RegisterService, TenantService
 from services.errors.account import AccountAlreadyInTenantError
-from services.feature_service import FeatureService


 class MemberListApi(Resource):
@@ -56,12 +54,6 @@ class MemberInviteEmailApi(Resource):
        inviter = current_user
        invitation_results = []
        console_web_url = dify_config.CONSOLE_WEB_URL
-
-        workspace_members = FeatureService.get_features(tenant_id=inviter.current_tenant.id).workspace_members
-
-        if not workspace_members.is_available(len(invitee_emails)):
-            raise WorkspaceMembersLimitExceeded()
-
        for invitee_email in invitee_emails:
            try:
                token = RegisterService.invite_new_member(
@@ -79,6 +71,7 @@ class MemberInviteEmailApi(Resource):
                invitation_results.append(
                    {"status": "success", "email": invitee_email, "url": f"{console_web_url}/signin"}
                )
+                break
            except Exception as e:
                invitation_results.append({"status": "failed", "email": invitee_email, "message": str(e)})

--- a/api/controllers/console/wraps.py
+++ b/api/controllers/console/wraps.py
@@ -11,8 +11,7 @@ from models.model import DifySetup
 from services.feature_service import FeatureService, LicenseStatus
 from services.operation_service import OperationService

-from .error import (NotInitValidateError, NotSetupError,
-                    UnauthorizedAndForceLogout)
+from .error import NotInitValidateError, NotSetupError, UnauthorizedAndForceLogout


 def account_initialization_required(view):
@@ -40,28 +39,6 @@ def only_edition_cloud(view):
    return decorated


-def only_edition_enterprise(view):
-    @wraps(view)
-    def decorated(*args, **kwargs):
-        if not dify_config.ENTERPRISE_ENABLED:
-            abort(404)
-
-        return view(*args, **kwargs)
-
-    return decorated
-
-
-def only_edition_self_hosted(view):
-    @wraps(view)
-    def decorated(*args, **kwargs):
-        if not dify_config.ENTERPRISE_ENABLED:
-            abort(404)
-
-        return view(*args, **kwargs)
-
-    return decorated
-
-
 def only_edition_self_hosted(view):
    @wraps(view)
    def decorated(*args, **kwargs):
@@ -177,16 +154,3 @@ def enterprise_license_required(view):
        return view(*args, **kwargs)

    return decorated
-
-
-def email_password_login_enabled(view):
-    @wraps(view)
-    def decorated(*args, **kwargs):
-        features = FeatureService.get_system_features()
-        if features.enable_email_password_login:
-            return view(*args, **kwargs)
-
-        # otherwise, return 403
-        abort(403)
-
-    return decorated
--- a/api/controllers/inner_api/init.py
+++ b/api/controllers/inner_api/init.py
@@ -5,5 +5,4 @@ from libs.external_api import ExternalApi
 bp = Blueprint("inner_api", __name__, url_prefix="/inner/api")
 api = ExternalApi(bp)

-from . import mail
 from .workspace import workspace
--- a/api/controllers/inner_api/mail.py
+++ b/api/controllers/inner_api/mail.py
@@ -1,27 +0,0 @@
-from flask_restful import (
-    Resource,  # type: ignore
-    reqparse,
-)
-
-from controllers.console.wraps import setup_required
-from controllers.inner_api import api
-from controllers.inner_api.wraps import inner_api_only
-from services.enterprise.mail_service import DifyMail, EnterpriseMailService
-
-
-class EnterpriseMail(Resource):
-    @setup_required
-    @inner_api_only
-    def post(self):
-        parser = reqparse.RequestParser()
-        parser.add_argument("to", type=str, action="append", required=True)
-        parser.add_argument("subject", type=str, required=True)
-        parser.add_argument("body", type=str, required=True)
-        parser.add_argument("substitutions", type=dict, required=False)
-        args = parser.parse_args()
-
-        EnterpriseMailService.send_mail(DifyMail(**args))
-        return {"message": "success"}, 200
-
-
-api.add_resource(EnterpriseMail, "/enterprise/mail")
--- a/api/controllers/inner_api/workspace/workspace.py
+++ b/api/controllers/inner_api/workspace/workspace.py
@@ -50,8 +50,8 @@ class EnterpriseWorkspaceNoOwnerEmail(Resource):
            "plan": tenant.plan,
            "status": tenant.status,
            "custom_config": json.loads(tenant.custom_config) if tenant.custom_config else {},
-            "created_at": tenant.created_at.isoformat() if tenant.created_at else None,
-            "updated_at": tenant.updated_at.isoformat() if tenant.updated_at else None,
+            "created_at": tenant.created_at.isoformat() + "Z" if tenant.created_at else None,
+            "updated_at": tenant.updated_at.isoformat() + "Z" if tenant.updated_at else None,
        }

        return {
--- a/api/controllers/web/init.py
+++ b/api/controllers/web/init.py
@@ -15,17 +15,4 @@ api.add_resource(FileApi, "/files/upload")
 api.add_resource(RemoteFileInfoApi, "/remote-files/<path:url>")
 api.add_resource(RemoteFileUploadApi, "/remote-files/upload")

-from . import (
-    app,
-    audio,
-    completion,
-    conversation,
-    feature,
-    forgot_password,
-    login,
-    message,
-    passport,
-    saved_message,
-    site,
-    workflow,
-)
+from . import app, audio, completion, conversation, feature, message, passport, saved_message, site, workflow
--- a/api/controllers/web/app.py
+++ b/api/controllers/web/app.py
@@ -1,17 +1,12 @@
+from flask_restful import marshal_with  # type: ignore

 from controllers.common import fields
 from controllers.common import helpers as controller_helpers
 from controllers.web import api
 from controllers.web.error import AppUnavailableError
 from controllers.web.wraps import WebApiResource
-from flask import request
-from flask_restful import Resource, marshal_with, reqparse  # type: ignore
-from libs.passport import PassportService
 from models.model import App, AppMode
 from services.app_service import AppService
-from services.enterprise.enterprise_service import EnterpriseService
-from services.feature_service import FeatureService
-from services.webapp_auth_service import WebAppAuthService


 class AppParameterApi(WebApiResource):
@@ -47,65 +42,5 @@ class AppMeta(WebApiResource):
        return AppService().get_app_meta(app_model)


-class AppAccessMode(Resource):
-    def get(self):
-        parser = reqparse.RequestParser()
-        parser.add_argument("appId", type=str, required=False, location="args")
-        parser.add_argument("appCode", type=str, required=False, location="args")
-        args = parser.parse_args()
-
-        features = FeatureService.get_system_features()
-        if not features.webapp_auth.enabled:
-            return {"accessMode": "public"}
-
-        app_id = args.get("appId")
-        if args.get("appCode"):
-            app_code = args["appCode"]
-            app_id = AppService.get_app_id_by_code(app_code)
-
-        if not app_id:
-            raise ValueError("appId or appCode must be provided")
-
-        res = EnterpriseService.WebAppAuth.get_app_access_mode_by_id(app_id)
-
-        return {"accessMode": res.access_mode}
-
-
-class AppWebAuthPermission(Resource):
-    def get(self):
-        user_id = "visitor"
-        try:
-            auth_header = request.headers.get("Authorization")
-            if auth_header is None:
-                raise
-            if " " not in auth_header:
-                raise
-
-            auth_scheme, tk = auth_header.split(None, 1)
-            auth_scheme = auth_scheme.lower()
-            if auth_scheme != "bearer":
-                raise
-
-            decoded = PassportService().verify(tk)
-            user_id = decoded.get("user_id", "visitor")
-        except Exception as e:
-            pass
-
-        parser = reqparse.RequestParser()
-        parser.add_argument("appId", type=str, required=True, location="args")
-        args = parser.parse_args()
-
-        app_id = args["appId"]
-        app_code = AppService.get_app_code_by_id(app_id)
-
-        res = True
-        if WebAppAuthService.is_app_require_permission_check(app_id=app_id):
-            res = EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(str(user_id), app_code)
-        return {"result": res}
-
-
 api.add_resource(AppParameterApi, "/parameters")
 api.add_resource(AppMeta, "/meta")
-# webapp auth apis
-api.add_resource(AppAccessMode, "/webapp/access-mode")
-api.add_resource(AppWebAuthPermission, "/webapp/permission")
--- a/api/controllers/web/error.py
+++ b/api/controllers/web/error.py
@@ -7,12 +7,6 @@ class AppUnavailableError(BaseHTTPException):
    code = 400


-class AppNotPublishedError(BaseHTTPException):
-    error_code = "app_not_published"
-    description = "App not published, please check your app configurations."
-    code = 400
-
-
 class NotCompletionAppError(BaseHTTPException):
    error_code = "not_completion_app"
    description = "Please check if your Completion app mode matches the right API route."
@@ -127,15 +121,9 @@ class UnsupportedFileTypeError(BaseHTTPException):
    code = 415


-class WebAppAuthRequiredError(BaseHTTPException):
+class WebSSOAuthRequiredError(BaseHTTPException):
    error_code = "web_sso_auth_required"
-    description = "Web app authentication required."
-    code = 401
-
-
-class WebAppAuthAccessDeniedError(BaseHTTPException):
-    error_code = "web_app_access_denied"
-    description = "You do not have permission to access this web app."
+    description = "Web SSO authentication required."
    code = 401


--- a/api/controllers/web/forgot_password.py
+++ b/api/controllers/web/forgot_password.py
@@ -1,147 +0,0 @@
-import base64
-import secrets
-
-from flask import request
-from flask_restful import Resource, reqparse
-from sqlalchemy import select
-from sqlalchemy.orm import Session
-
-from controllers.console.auth.error import (
-    EmailCodeError,
-    EmailPasswordResetLimitError,
-    InvalidEmailError,
-    InvalidTokenError,
-    PasswordMismatchError,
-)
-from controllers.console.error import AccountNotFound, EmailSendIpLimitError
-from controllers.console.wraps import email_password_login_enabled, only_edition_enterprise, setup_required
-from controllers.web import api
-from extensions.ext_database import db
-from libs.helper import email, extract_remote_ip
-from libs.password import hash_password, valid_password
-from models.account import Account
-from services.account_service import AccountService
-
-
-class ForgotPasswordSendEmailApi(Resource):
-    @only_edition_enterprise
-    @setup_required
-    @email_password_login_enabled
-    def post(self):
-        parser = reqparse.RequestParser()
-        parser.add_argument("email", type=email, required=True, location="json")
-        parser.add_argument("language", type=str, required=False, location="json")
-        args = parser.parse_args()
-
-        ip_address = extract_remote_ip(request)
-        if AccountService.is_email_send_ip_limit(ip_address):
-            raise EmailSendIpLimitError()
-
-        if args["language"] is not None and args["language"] == "zh-Hans":
-            language = "zh-Hans"
-        else:
-            language = "en-US"
-
-        with Session(db.engine) as session:
-            account = session.execute(select(Account).filter_by(email=args["email"])).scalar_one_or_none()
-        token = None
-        if account is None:
-            raise AccountNotFound()
-        else:
-            token = AccountService.send_reset_password_email(account=account, email=args["email"], language=language)
-
-        return {"result": "success", "data": token}
-
-
-class ForgotPasswordCheckApi(Resource):
-    @only_edition_enterprise
-    @setup_required
-    @email_password_login_enabled
-    def post(self):
-        parser = reqparse.RequestParser()
-        parser.add_argument("email", type=str, required=True, location="json")
-        parser.add_argument("code", type=str, required=True, location="json")
-        parser.add_argument("token", type=str, required=True, nullable=False, location="json")
-        args = parser.parse_args()
-
-        user_email = args["email"]
-
-        is_forgot_password_error_rate_limit = AccountService.is_forgot_password_error_rate_limit(args["email"])
-        if is_forgot_password_error_rate_limit:
-            raise EmailPasswordResetLimitError()
-
-        token_data = AccountService.get_reset_password_data(args["token"])
-        if token_data is None:
-            raise InvalidTokenError()
-
-        if user_email != token_data.get("email"):
-            raise InvalidEmailError()
-
-        if args["code"] != token_data.get("code"):
-            AccountService.add_forgot_password_error_rate_limit(args["email"])
-            raise EmailCodeError()
-
-        # Verified, revoke the first token
-        AccountService.revoke_reset_password_token(args["token"])
-
-        # Refresh token data by generating a new token
-        _, new_token = AccountService.generate_reset_password_token(
-            user_email, code=args["code"], additional_data={"phase": "reset"}
-        )
-
-        AccountService.reset_forgot_password_error_rate_limit(args["email"])
-        return {"is_valid": True, "email": token_data.get("email"), "token": new_token}
-
-
-class ForgotPasswordResetApi(Resource):
-    @only_edition_enterprise
-    @setup_required
-    @email_password_login_enabled
-    def post(self):
-        parser = reqparse.RequestParser()
-        parser.add_argument("token", type=str, required=True, nullable=False, location="json")
-        parser.add_argument("new_password", type=valid_password, required=True, nullable=False, location="json")
-        parser.add_argument("password_confirm", type=valid_password, required=True, nullable=False, location="json")
-        args = parser.parse_args()
-
-        # Validate passwords match
-        if args["new_password"] != args["password_confirm"]:
-            raise PasswordMismatchError()
-
-        # Validate token and get reset data
-        reset_data = AccountService.get_reset_password_data(args["token"])
-        if not reset_data:
-            raise InvalidTokenError()
-        # Must use token in reset phase
-        if reset_data.get("phase", "") != "reset":
-            raise InvalidTokenError()
-
-        # Revoke token to prevent reuse
-        AccountService.revoke_reset_password_token(args["token"])
-
-        # Generate secure salt and hash password
-        salt = secrets.token_bytes(16)
-        password_hashed = hash_password(args["new_password"], salt)
-
-        email = reset_data.get("email", "")
-
-        with Session(db.engine) as session:
-            account = session.execute(select(Account).filter_by(email=email)).scalar_one_or_none()
-
-            if account:
-                self._update_existing_account(account, password_hashed, salt, session)
-            else:
-                raise AccountNotFound()
-
-        return {"result": "success"}
-
-    def _update_existing_account(self, account, password_hashed, salt, session):
-        # Update existing account credentials
-        account.password = base64.b64encode(password_hashed).decode()
-        account.password_salt = base64.b64encode(salt).decode()
-        session.commit()
-
-
-api.add_resource(ForgotPasswordSendEmailApi, "/forgot-password")
-api.add_resource(ForgotPasswordCheckApi, "/forgot-password/validity")
-api.add_resource(ForgotPasswordResetApi, "/forgot-password/resets")
--- a/api/controllers/web/login.py
+++ b/api/controllers/web/login.py
@@ -1,109 +0,0 @@
-import services
-from controllers.console.auth.error import (EmailCodeError,
-                                            EmailOrPasswordMismatchError,
-                                            InvalidEmailError)
-from controllers.console.error import AccountBannedError, AccountNotFound
-from controllers.console.wraps import only_edition_enterprise, setup_required
-from controllers.web import api
-from flask_restful import Resource, reqparse
-from jwt import InvalidTokenError  # type: ignore
-from libs.helper import email
-from libs.password import valid_password
-from services.account_service import AccountService
-from services.webapp_auth_service import WebAppAuthService
-
-
-class LoginApi(Resource):
-    """Resource for web app email/password login."""
-
-    @setup_required
-    @only_edition_enterprise
-    def post(self):
-        """Authenticate user and login."""
-        parser = reqparse.RequestParser()
-        parser.add_argument("email", type=email, required=True, location="json")
-        parser.add_argument("password", type=valid_password, required=True, location="json")
-        args = parser.parse_args()
-
-        try:
-            account = WebAppAuthService.authenticate(args["email"], args["password"])
-        except services.errors.account.AccountLoginError:
-            raise AccountBannedError()
-        except services.errors.account.AccountPasswordError:
-            raise EmailOrPasswordMismatchError()
-        except services.errors.account.AccountNotFoundError:
-            raise AccountNotFound()
-
-        token = WebAppAuthService.login(account=account)
-        return {"result": "success", "data": {"access_token": token}}
-
-
-# class LogoutApi(Resource):
-#     @setup_required
-#     def get(self):
-#         account = cast(Account, flask_login.current_user)
-#         if isinstance(account, flask_login.AnonymousUserMixin):
-#             return {"result": "success"}
-#         flask_login.logout_user()
-#         return {"result": "success"}
-
-
-class EmailCodeLoginSendEmailApi(Resource):
-    @setup_required
-    @only_edition_enterprise
-    def post(self):
-        parser = reqparse.RequestParser()
-        parser.add_argument("email", type=email, required=True, location="json")
-        parser.add_argument("language", type=str, required=False, location="json")
-        args = parser.parse_args()
-
-        if args["language"] is not None and args["language"] == "zh-Hans":
-            language = "zh-Hans"
-        else:
-            language = "en-US"
-
-        account = WebAppAuthService.get_user_through_email(args["email"])
-        if account is None:
-            raise AccountNotFound()
-        else:
-            token = WebAppAuthService.send_email_code_login_email(account=account, language=language)
-
-        return {"result": "success", "data": token}
-
-
-class EmailCodeLoginApi(Resource):
-    @setup_required
-    @only_edition_enterprise
-    def post(self):
-        parser = reqparse.RequestParser()
-        parser.add_argument("email", type=str, required=True, location="json")
-        parser.add_argument("code", type=str, required=True, location="json")
-        parser.add_argument("token", type=str, required=True, location="json")
-        args = parser.parse_args()
-
-        user_email = args["email"]
-
-        token_data = WebAppAuthService.get_email_code_login_data(args["token"])
-        if token_data is None:
-            raise InvalidTokenError()
-
-        if token_data["email"] != args["email"]:
-            raise InvalidEmailError()
-
-        if token_data["code"] != args["code"]:
-            raise EmailCodeError()
-
-        WebAppAuthService.revoke_email_code_login_token(args["token"])
-        account = WebAppAuthService.get_user_through_email(user_email)
-        if not account:
-            raise AccountNotFound()
-
-        token = WebAppAuthService.login(account=account)
-        AccountService.reset_login_error_rate_limit(args["email"])
-        return {"result": "success", "data": {"access_token": token}}
-
-
-api.add_resource(LoginApi, "/login")
-# api.add_resource(LogoutApi, "/logout")
-api.add_resource(EmailCodeLoginSendEmailApi, "/email-code-login")
-api.add_resource(EmailCodeLoginApi, "/email-code-login/validity")
--- a/api/controllers/web/passport.py
+++ b/api/controllers/web/passport.py
@@ -1,18 +1,16 @@
 import uuid
-from datetime import UTC, datetime, timedelta

-from configs import dify_config
-from controllers.web import api
-from controllers.web.error import WebAppAuthRequiredError
-from extensions.ext_database import db
 from flask import request
-from flask_restful import Resource
+from flask_restful import Resource  # type: ignore
+from werkzeug.exceptions import NotFound, Unauthorized
+
+from controllers.web import api
+from controllers.web.error import WebSSOAuthRequiredError
+from extensions.ext_database import db
 from libs.passport import PassportService
 from models.model import App, EndUser, Site
 from services.enterprise.enterprise_service import EnterpriseService
 from services.feature_service import FeatureService
-from services.webapp_auth_service import WebAppAuthService, WebAppAuthType
-from werkzeug.exceptions import NotFound, Unauthorized


 class PassportResource(Resource):
@@ -21,23 +19,13 @@ class PassportResource(Resource):
    def get(self):
        system_features = FeatureService.get_system_features()
        app_code = request.headers.get("X-App-Code")
-        web_app_access_token = request.args.get("web_app_access_token")
-
        if app_code is None:
            raise Unauthorized("X-App-Code header is missing.")

-        # exchange token for enterprise logined web user
-        enterprise_user_decoded = decode_enterprise_webapp_user_id(web_app_access_token)
-        if enterprise_user_decoded:
-            # a web user has already logged in, exchange a token for this app without redirecting to the login page
-            return exchange_token_for_existing_web_user(
-                app_code=app_code, enterprise_user_decoded=enterprise_user_decoded
-            )
-
-        if system_features.webapp_auth.enabled:
-            app_settings = EnterpriseService.WebAppAuth.get_app_access_mode_by_code(app_code=app_code)
-            if not app_settings or not app_settings.access_mode == "public":
-                raise WebAppAuthRequiredError()
+        if system_features.sso_enforced_for_web:
+            app_web_sso_enabled = EnterpriseService.get_app_web_sso_enabled(app_code).get("enabled", False)
+            if app_web_sso_enabled:
+                raise WebSSOAuthRequiredError()

        # get site from db and check if it is normal
        site = db.session.query(Site).filter(Site.code == app_code, Site.status == "normal").first()
@@ -77,128 +65,6 @@ class PassportResource(Resource):
 api.add_resource(PassportResource, "/passport")


-def decode_enterprise_webapp_user_id(jwt_token: str | None):
-    """
-    Decode the enterprise user session from the Authorization header.
-    """
-    if not jwt_token:
-        return None
-
-    decoded = PassportService().verify(jwt_token)
-    source = decoded.get("token_source")
-    if not source or source != "webapp_login_token":
-        raise Unauthorized("Invalid token source. Expected 'webapp_login_token'.")
-    return decoded
-
-
-def exchange_token_for_existing_web_user(app_code: str, enterprise_user_decoded: dict):
-    """
-    Exchange a token for an existing web user session.
-    """
-    user_id = enterprise_user_decoded.get("user_id")
-    end_user_id = enterprise_user_decoded.get("end_user_id")
-    session_id = enterprise_user_decoded.get("session_id")
-    user_auth_type = enterprise_user_decoded.get("auth_type")
-    if not user_auth_type:
-        raise Unauthorized("Missing auth_type in the token.")
-
-    site = db.session.query(Site).filter(Site.code == app_code, Site.status == "normal").first()
-    if not site:
-        raise NotFound()
-
-    app_model = db.session.query(App).filter(App.id == site.app_id).first()
-    if not app_model or app_model.status != "normal" or not app_model.enable_site:
-        raise NotFound()
-
-    app_auth_type = WebAppAuthService.get_app_auth_type(app_code=app_code)
-
-    if app_auth_type == WebAppAuthType.PUBLIC:
-        return _exchange_for_public_app_token(app_model, site, enterprise_user_decoded)
-    elif app_auth_type == WebAppAuthType.EXTERNAL and user_auth_type != "external":
-        raise WebAppAuthRequiredError("Please login as external user.")
-    elif app_auth_type == WebAppAuthType.INTERNAL and user_auth_type != "internal":
-        raise WebAppAuthRequiredError("Please login as internal user.")
-
-    end_user = None
-    if end_user_id:
-        end_user = db.session.query(EndUser).filter(EndUser.id == end_user_id).first()
-    if session_id:
-        end_user = (
-            db.session.query(EndUser)
-            .filter(
-                EndUser.session_id == session_id,
-                EndUser.tenant_id == app_model.tenant_id,
-                EndUser.app_id == app_model.id,
-            )
-            .first()
-        )
-    if not end_user:
-        if not session_id:
-            raise NotFound("Missing session_id for existing web user.")
-        end_user = EndUser(
-            tenant_id=app_model.tenant_id,
-            app_id=app_model.id,
-            type="browser",
-            is_anonymous=True,
-            session_id=session_id,
-        )
-        db.session.add(end_user)
-        db.session.commit()
-    exp_dt = datetime.now(UTC) + timedelta(hours=dify_config.ACCESS_TOKEN_EXPIRE_MINUTES * 24)
-    exp = int(exp_dt.timestamp())
-    payload = {
-        "iss": site.id,
-        "sub": "Web API Passport",
-        "app_id": site.app_id,
-        "app_code": site.code,
-        "user_id": user_id,
-        "end_user_id": end_user.id,
-        "auth_type": user_auth_type,
-        "granted_at": int(datetime.now(UTC).timestamp()),
-        "token_source": "webapp",
-        "exp": exp,
-    }
-    token: str = PassportService().issue(payload)
-    return {
-        "access_token": token,
-    }
-
-
-def _exchange_for_public_app_token(app_model, site, token_decoded):
-    user_id = token_decoded.get("user_id")
-    end_user = None
-    if user_id:
-        end_user = db.session.query(EndUser).filter(
-            EndUser.app_id == app_model.id, EndUser.session_id == user_id
-        ).first()
-
-    if not end_user:
-        end_user = EndUser(
-            tenant_id=app_model.tenant_id,
-            app_id=app_model.id,
-            type="browser",
-            is_anonymous=True,
-            session_id=generate_session_id(),
-        )
-
-        db.session.add(end_user)
-        db.session.commit()
-
-    payload = {
-        "iss": site.app_id,
-        "sub": "Web API Passport",
-        "app_id": site.app_id,
-        "app_code": site.code,
-        "end_user_id": end_user.id,
-    }
-
-    tk = PassportService().issue(payload)
-
-    return {
-        "access_token": tk,
-    }
-
-
 def generate_session_id():
    """
    Generate a unique session ID.
--- a/api/controllers/web/wraps.py
+++ b/api/controllers/web/wraps.py
@@ -1,19 +1,15 @@
-from datetime import UTC, datetime
 from functools import wraps

-from controllers.web.error import (AppNotPublishedError,
-                                   WebAppAuthAccessDeniedError,
-                                   WebAppAuthRequiredError)
-from extensions.ext_database import db
 from flask import request
 from flask_restful import Resource  # type: ignore
+from werkzeug.exceptions import BadRequest, NotFound, Unauthorized
+
+from controllers.web.error import WebSSOAuthRequiredError
+from extensions.ext_database import db
 from libs.passport import PassportService
 from models.model import App, EndUser, Site
-from services.enterprise.enterprise_service import (EnterpriseService,
-                                                    WebAppSettings)
+from services.enterprise.enterprise_service import EnterpriseService
 from services.feature_service import FeatureService
-from services.webapp_auth_service import WebAppAuthService
-from werkzeug.exceptions import BadRequest, NotFound, Unauthorized


 def validate_jwt_token(view=None):
@@ -49,99 +45,47 @@ def decode_jwt_token():
            raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
        decoded = PassportService().verify(tk)
        app_code = decoded.get("app_code")
-        app_id = decoded.get("app_id")
-        app_model = db.session.query(App).filter(App.id == app_id).first()
+        app_model = db.session.query(App).filter(App.id == decoded["app_id"]).first()
        site = db.session.query(Site).filter(Site.code == app_code).first()
        if not app_model:
            raise NotFound()
        if not app_code or not site:
            raise BadRequest("Site URL is no longer valid.")
-        if app_model.enable_site is False or app_model.status != "normal":
-            raise AppNotPublishedError()
-        end_user_id = decoded.get("end_user_id")
-        end_user = db.session.query(EndUser).filter(EndUser.id == end_user_id).first()
+        if app_model.enable_site is False:
+            raise BadRequest("Site is disabled.")
+        end_user = db.session.query(EndUser).filter(EndUser.id == decoded["end_user_id"]).first()
        if not end_user:
            raise NotFound()

-        # for enterprise webapp auth
-        app_web_auth_enabled = False
-        webapp_settings = None
-        if system_features.webapp_auth.enabled:
-            webapp_settings = EnterpriseService.WebAppAuth.get_app_access_mode_by_code(app_code=app_code)
-            if not webapp_settings:
-                raise NotFound("Web app settings not found.")
-            app_web_auth_enabled = webapp_settings.access_mode != "public"
-
-        _validate_webapp_token(decoded, app_web_auth_enabled, system_features.webapp_auth.enabled)
-        _validate_user_accessibility(
-            decoded, app_code, app_web_auth_enabled, system_features.webapp_auth.enabled, webapp_settings
-        )
+        _validate_web_sso_token(decoded, system_features, app_code)

        return app_model, end_user
    except Unauthorized as e:
-        if system_features.webapp_auth.enabled:
-            if not app_code:
-                raise Unauthorized("Please re-login to access the web app.")
-            app_web_auth_enabled = (
-                EnterpriseService.WebAppAuth.get_app_access_mode_by_code(app_code=app_code).access_mode != "public"
-            )
-            if app_web_auth_enabled:
-                raise WebAppAuthRequiredError()
+        if system_features.sso_enforced_for_web:
+            app_web_sso_enabled = EnterpriseService.get_app_web_sso_enabled(app_code).get("enabled", False)
+            if app_web_sso_enabled:
+                raise WebSSOAuthRequiredError()

        raise Unauthorized(e.description)


-def _validate_webapp_token(decoded, app_web_auth_enabled: bool, system_webapp_auth_enabled: bool):
-    # Check if authentication is enforced for web app, and if the token source is not webapp,
-    # raise an error and redirect to login
-    if system_webapp_auth_enabled and app_web_auth_enabled:
-        source = decoded.get("token_source")
-        if not source or source != "webapp":
-            raise WebAppAuthRequiredError()
+def _validate_web_sso_token(decoded, system_features, app_code):
+    app_web_sso_enabled = False

-    # Check if authentication is not enforced for web, and if the token source is webapp,
+    # Check if SSO is enforced for web, and if the token source is not SSO, raise an error and redirect to SSO login
+    if system_features.sso_enforced_for_web:
+        app_web_sso_enabled = EnterpriseService.get_app_web_sso_enabled(app_code).get("enabled", False)
+        if app_web_sso_enabled:
+            source = decoded.get("token_source")
+            if not source or source != "sso":
+                raise WebSSOAuthRequiredError()
+
+    # Check if SSO is not enforced for web, and if the token source is SSO,
    # raise an error and redirect to normal passport login
-    if not system_webapp_auth_enabled or not app_web_auth_enabled:
+    if not system_features.sso_enforced_for_web or not app_web_sso_enabled:
        source = decoded.get("token_source")
-        if source and source == "webapp":
-            raise Unauthorized("webapp token expired.")
-
-
-def _validate_user_accessibility(
-    decoded,
-    app_code,
-    app_web_auth_enabled: bool,
-    system_webapp_auth_enabled: bool,
-    webapp_settings: WebAppSettings | None,
-):
-    if system_webapp_auth_enabled and app_web_auth_enabled:
-        # Check if the user is allowed to access the web app
-        user_id = decoded.get("user_id")
-        if not user_id:
-            raise WebAppAuthRequiredError()
-
-        if not webapp_settings:
-            raise WebAppAuthRequiredError("Web app settings not found.")
-
-        if WebAppAuthService.is_app_require_permission_check(access_mode=webapp_settings.access_mode):
-            if not EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(user_id, app_code=app_code):
-                raise WebAppAuthAccessDeniedError()
-
-        auth_type = decoded.get("auth_type")
-        granted_at = decoded.get("granted_at")
-        if not auth_type:
-            raise WebAppAuthAccessDeniedError("Missing auth_type in the token.")
-        if not granted_at:
-            raise WebAppAuthAccessDeniedError("Missing granted_at in the token.")
-        # check if sso has been updated
-        if auth_type == "external":
-            last_update_time = EnterpriseService.get_app_sso_settings_last_update_time()
-            if granted_at and datetime.fromtimestamp(granted_at, tz=UTC) < last_update_time:
-                raise WebAppAuthAccessDeniedError("SSO settings have been updated. Please re-login.")
-        elif auth_type == "internal":
-            last_update_time = EnterpriseService.get_workspace_sso_settings_last_update_time()
-            if granted_at and datetime.fromtimestamp(granted_at, tz=UTC) < last_update_time:
-                raise WebAppAuthAccessDeniedError("SSO settings have been updated. Please re-login.")
+        if source and source == "sso":
+            raise Unauthorized("sso token expired.")


 class WebApiResource(Resource):
--- a/api/core/agent/cot_agent_runner.py
+++ b/api/core/agent/cot_agent_runner.py
@@ -104,6 +104,7 @@ class CotAgentRunner(BaseAgentRunner, ABC):

            # recalc llm max tokens
            prompt_messages = self._organize_prompt_messages()
+            self.recalc_llm_max_tokens(self.model_config, prompt_messages)
            # invoke model
            chunks = model_instance.invoke_llm(
                prompt_messages=prompt_messages,
--- a/api/core/agent/fc_agent_runner.py
+++ b/api/core/agent/fc_agent_runner.py
@@ -84,6 +84,7 @@ class FunctionCallAgentRunner(BaseAgentRunner):

            # recalc llm max tokens
            prompt_messages = self._organize_prompt_messages()
+            self.recalc_llm_max_tokens(self.model_config, prompt_messages)
            # invoke model
            chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = model_instance.invoke_llm(
                prompt_messages=prompt_messages,
--- a/api/core/app/apps/advanced_chat/app_generator.py
+++ b/api/core/app/apps/advanced_chat/app_generator.py
@@ -140,9 +140,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
            app_config=app_config,
            file_upload_config=file_extra_config,
            conversation_id=conversation.id if conversation else None,
-            inputs=conversation.inputs
-            if conversation
-            else self._prepare_user_inputs(
+            inputs=self._prepare_user_inputs(
                user_inputs=inputs, variables=app_config.variables, tenant_id=app_model.tenant_id
            ),
            query=query,
--- a/api/core/app/apps/agent_chat/app_generator.py
+++ b/api/core/app/apps/agent_chat/app_generator.py
@@ -148,9 +148,7 @@ class AgentChatAppGenerator(MessageBasedAppGenerator):
            model_conf=ModelConfigConverter.convert(app_config),
            file_upload_config=file_extra_config,
            conversation_id=conversation.id if conversation else None,
-            inputs=conversation.inputs
-            if conversation
-            else self._prepare_user_inputs(
+            inputs=self._prepare_user_inputs(
                user_inputs=inputs, variables=app_config.variables, tenant_id=app_model.tenant_id
            ),
            query=query,
--- a/api/core/app/apps/agent_chat/app_runner.py
+++ b/api/core/app/apps/agent_chat/app_runner.py
@@ -55,6 +55,20 @@ class AgentChatAppRunner(AppRunner):
        query = application_generate_entity.query
        files = application_generate_entity.files

+        # Pre-calculate the number of tokens of the prompt messages,
+        # and return the rest number of tokens by model context token size limit and max token size limit.
+        # If the rest number of tokens is not enough, raise exception.
+        # Include: prompt template, inputs, query(optional), files(optional)
+        # Not Include: memory, external data, dataset context
+        self.get_pre_calculate_rest_tokens(
+            app_record=app_record,
+            model_config=application_generate_entity.model_conf,
+            prompt_template_entity=app_config.prompt_template,
+            inputs=inputs,
+            files=files,
+            query=query,
+        )
+
        memory = None
        if application_generate_entity.conversation_id:
            # get memory of conversation (read-only)
--- a/api/core/app/apps/base_app_runner.py
+++ b/api/core/app/apps/base_app_runner.py
@@ -15,8 +15,10 @@ from core.app.features.annotation_reply.annotation_reply import AnnotationReplyF
 from core.app.features.hosting_moderation.hosting_moderation import HostingModerationFeature
 from core.external_data_tool.external_data_fetch import ExternalDataFetch
 from core.memory.token_buffer_memory import TokenBufferMemory
+from core.model_manager import ModelInstance
 from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
 from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage
+from core.model_runtime.entities.model_entities import ModelPropertyKey
 from core.model_runtime.errors.invoke import InvokeBadRequestError
 from core.moderation.input_moderation import InputModeration
 from core.prompt.advanced_prompt_transform import AdvancedPromptTransform
@@ -29,6 +31,106 @@ if TYPE_CHECKING:


 class AppRunner:
+    def get_pre_calculate_rest_tokens(
+        self,
+        app_record: App,
+        model_config: ModelConfigWithCredentialsEntity,
+        prompt_template_entity: PromptTemplateEntity,
+        inputs: Mapping[str, str],
+        files: Sequence["File"],
+        query: Optional[str] = None,
+    ) -> int:
+        """
+        Get pre calculate rest tokens
+        :param app_record: app record
+        :param model_config: model config entity
+        :param prompt_template_entity: prompt template entity
+        :param inputs: inputs
+        :param files: files
+        :param query: query
+        :return:
+        """
+        # Invoke model
+        model_instance = ModelInstance(
+            provider_model_bundle=model_config.provider_model_bundle, model=model_config.model
+        )
+
+        model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
+
+        max_tokens = 0
+        for parameter_rule in model_config.model_schema.parameter_rules:
+            if parameter_rule.name == "max_tokens" or (
+                parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
+            ):
+                max_tokens = (
+                    model_config.parameters.get(parameter_rule.name)
+                    or model_config.parameters.get(parameter_rule.use_template or "")
+                ) or 0
+
+        if model_context_tokens is None:
+            return -1
+
+        if max_tokens is None:
+            max_tokens = 0
+
+        # get prompt messages without memory and context
+        prompt_messages, stop = self.organize_prompt_messages(
+            app_record=app_record,
+            model_config=model_config,
+            prompt_template_entity=prompt_template_entity,
+            inputs=inputs,
+            files=files,
+            query=query,
+        )
+
+        prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
+
+        rest_tokens: int = model_context_tokens - max_tokens - prompt_tokens
+        if rest_tokens < 0:
+            raise InvokeBadRequestError(
+                "Query or prefix prompt is too long, you can reduce the prefix prompt, "
+                "or shrink the max token, or switch to a llm with a larger token limit size."
+            )
+
+        return rest_tokens
+
+    def recalc_llm_max_tokens(
+        self, model_config: ModelConfigWithCredentialsEntity, prompt_messages: list[PromptMessage]
+    ):
+        # recalc max_tokens if sum(prompt_token +  max_tokens) over model token limit
+        model_instance = ModelInstance(
+            provider_model_bundle=model_config.provider_model_bundle, model=model_config.model
+        )
+
+        model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
+
+        max_tokens = 0
+        for parameter_rule in model_config.model_schema.parameter_rules:
+            if parameter_rule.name == "max_tokens" or (
+                parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
+            ):
+                max_tokens = (
+                    model_config.parameters.get(parameter_rule.name)
+                    or model_config.parameters.get(parameter_rule.use_template or "")
+                ) or 0
+
+        if model_context_tokens is None:
+            return -1
+
+        if max_tokens is None:
+            max_tokens = 0
+
+        prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
+
+        if prompt_tokens + max_tokens > model_context_tokens:
+            max_tokens = max(model_context_tokens - prompt_tokens, 16)
+
+            for parameter_rule in model_config.model_schema.parameter_rules:
+                if parameter_rule.name == "max_tokens" or (
+                    parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
+                ):
+                    model_config.parameters[parameter_rule.name] = max_tokens
+
    def organize_prompt_messages(
        self,
        app_record: App,
--- a/api/core/app/apps/chat/app_generator.py
+++ b/api/core/app/apps/chat/app_generator.py
@@ -141,9 +141,7 @@ class ChatAppGenerator(MessageBasedAppGenerator):
            model_conf=ModelConfigConverter.convert(app_config),
            file_upload_config=file_extra_config,
            conversation_id=conversation.id if conversation else None,
-            inputs=conversation.inputs
-            if conversation
-            else self._prepare_user_inputs(
+            inputs=self._prepare_user_inputs(
                user_inputs=inputs, variables=app_config.variables, tenant_id=app_model.tenant_id
            ),
            query=query,
--- a/api/core/app/apps/chat/app_runner.py
+++ b/api/core/app/apps/chat/app_runner.py
@@ -50,6 +50,20 @@ class ChatAppRunner(AppRunner):
        query = application_generate_entity.query
        files = application_generate_entity.files

+        # Pre-calculate the number of tokens of the prompt messages,
+        # and return the rest number of tokens by model context token size limit and max token size limit.
+        # If the rest number of tokens is not enough, raise exception.
+        # Include: prompt template, inputs, query(optional), files(optional)
+        # Not Include: memory, external data, dataset context
+        self.get_pre_calculate_rest_tokens(
+            app_record=app_record,
+            model_config=application_generate_entity.model_conf,
+            prompt_template_entity=app_config.prompt_template,
+            inputs=inputs,
+            files=files,
+            query=query,
+        )
+
        memory = None
        if application_generate_entity.conversation_id:
            # get memory of conversation (read-only)
@@ -180,6 +194,9 @@ class ChatAppRunner(AppRunner):
        if hosting_moderation_result:
            return

+        # Re-calculate the max tokens if sum(prompt_token +  max_tokens) over model token limit
+        self.recalc_llm_max_tokens(model_config=application_generate_entity.model_conf, prompt_messages=prompt_messages)
+
        # Invoke model
        model_instance = ModelInstance(
            provider_model_bundle=application_generate_entity.model_conf.provider_model_bundle,
--- a/api/core/app/apps/completion/app_runner.py
+++ b/api/core/app/apps/completion/app_runner.py
@@ -43,6 +43,20 @@ class CompletionAppRunner(AppRunner):
        query = application_generate_entity.query
        files = application_generate_entity.files

+        # Pre-calculate the number of tokens of the prompt messages,
+        # and return the rest number of tokens by model context token size limit and max token size limit.
+        # If the rest number of tokens is not enough, raise exception.
+        # Include: prompt template, inputs, query(optional), files(optional)
+        # Not Include: memory, external data, dataset context
+        self.get_pre_calculate_rest_tokens(
+            app_record=app_record,
+            model_config=application_generate_entity.model_conf,
+            prompt_template_entity=app_config.prompt_template,
+            inputs=inputs,
+            files=files,
+            query=query,
+        )
+
        # organize all inputs and template to prompt messages
        # Include: prompt template, inputs, query(optional), files(optional)
        prompt_messages, stop = self.organize_prompt_messages(
@@ -138,6 +152,9 @@ class CompletionAppRunner(AppRunner):
        if hosting_moderation_result:
            return

+        # Re-calculate the max tokens if sum(prompt_token +  max_tokens) over model token limit
+        self.recalc_llm_max_tokens(model_config=application_generate_entity.model_conf, prompt_messages=prompt_messages)
+
        # Invoke model
        model_instance = ModelInstance(
            provider_model_bundle=application_generate_entity.model_conf.provider_model_bundle,
--- a/api/core/app/task_pipeline/workflow_cycle_manage.py
+++ b/api/core/app/task_pipeline/workflow_cycle_manage.py
@@ -842,4 +842,4 @@ class WorkflowCycleManage:
        if node_execution_id not in self._workflow_node_executions:
            raise ValueError(f"Workflow node execution not found: {node_execution_id}")
        cached_workflow_node_execution = self._workflow_node_executions[node_execution_id]
-        return cached_workflow_node_execution
+        return session.merge(cached_workflow_node_execution)
--- a/api/core/memory/token_buffer_memory.py
+++ b/api/core/memory/token_buffer_memory.py
@@ -26,7 +26,7 @@ class TokenBufferMemory:
        self.model_instance = model_instance

    def get_history_prompt_messages(
-        self, max_token_limit: int = 100000, message_limit: Optional[int] = None
+        self, max_token_limit: int = 2000, message_limit: Optional[int] = None
    ) -> Sequence[PromptMessage]:
        """
        Get history prompt messages.
--- a/api/core/model_runtime/model_providers/__base/large_language_model.py
+++ b/api/core/model_runtime/model_providers/__base/large_language_model.py
@@ -30,11 +30,6 @@ from core.model_runtime.model_providers.__base.ai_model import AIModel

 logger = logging.getLogger(__name__)

-HTML_THINKING_TAG = (
-    '<details style="color:gray;background-color: #f8f8f8;padding: 8px;border-radius: 4px;" open> '
-    "<summary> Thinking... </summary>"
-)
-

 class LargeLanguageModel(AIModel):
    """
@@ -408,7 +403,7 @@ if you are not sure about the structure.
    def _wrap_thinking_by_reasoning_content(self, delta: dict, is_reasoning: bool) -> tuple[str, bool]:
        """
        If the reasoning response is from delta.get("reasoning_content"), we wrap
-        it with HTML details tag.
+        it with HTML think tag.

        :param delta: delta dictionary from LLM streaming response
        :param is_reasoning: is reasoning
@@ -420,25 +415,17 @@ if you are not sure about the structure.

        if reasoning_content:
            if not is_reasoning:
-                content = HTML_THINKING_TAG + reasoning_content
+                content = "<think>\n" + reasoning_content
                is_reasoning = True
            else:
                content = reasoning_content
-        elif is_reasoning:
-            content = "</details>" + content
+        elif is_reasoning and content:
+            # do not end reasoning when content is empty
+            # there may be more reasoning_content later that follows previous reasoning closely
+            content = "\n</think>" + content
            is_reasoning = False
        return content, is_reasoning

-    def _wrap_thinking_by_tag(self, content: str) -> str:
-        """
-        if the reasoning response is a <think>...</think> block from delta.get("content"),
-        we replace <think> to <detail>.
-
-        :param content: delta.get("content")
-        :return: processed_content
-        """
-        return content.replace("<think>", HTML_THINKING_TAG).replace("</think>", "</details>")
-
    def _invoke_result_generator(
        self,
        model: str,
--- a/api/core/model_runtime/model_providers/bedrock/llm/eu.anthropic.claude-3.7-sonnet-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/eu.anthropic.claude-3.7-sonnet-v1.yaml
@@ -1,115 +0,0 @@
-model: us.anthropic.claude-3-7-sonnet-20250219-v1:0
-label:
-  en_US: Claude 3.7 Sonnet(US.Cross Region Inference)
-icon: icon_s_en.svg
-model_type: llm
-features:
-  - agent-thought
-  - vision
-  - tool-call
-  - stream-tool-call
-model_properties:
-  mode: chat
-  context_size: 200000
-# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
-parameter_rules:
-  - name: enable_cache
-    label:
-      zh_Hans: 启用提示缓存
-      en_US: Enable Prompt Cache
-    type: boolean
-    required: false
-    default: true
-    help:
-      zh_Hans: 启用提示缓存可以提高性能并降低成本。Claude 3.7 Sonnet支持在system、messages和tools字段中使用缓存检查点。
-      en_US: Enable prompt caching to improve performance and reduce costs. Claude 3.7 Sonnet supports cache checkpoints in system, messages, and tools fields.
-  - name: reasoning_type
-    label:
-      zh_Hans: 推理配置
-      en_US: Reasoning Type
-    type: boolean
-    required: false
-    default: false
-    placeholder:
-      zh_Hans: 设置推理配置
-      en_US: Set reasoning configuration
-    help:
-      zh_Hans: 控制模型的推理能力。启用时，temperature将固定为1且top_p将被禁用。
-      en_US: Controls the model's reasoning capability. When enabled, temperature will be fixed to 1 and top_p will be disabled.
-  - name: reasoning_budget
-    show_on:
-      - variable: reasoning_type
-        value: true
-    label:
-      zh_Hans: 推理预算
-      en_US: Reasoning Budget
-    type: int
-    default: 1024
-    min: 0
-    max: 128000
-    help:
-      zh_Hans: 推理的预算限制（最小1024），必须小于max_tokens。仅在推理类型为enabled时可用。
-      en_US: Budget limit for reasoning (minimum 1024), must be less than max_tokens. Only available when reasoning type is enabled.
-
-  - name: max_tokens
-    use_template: max_tokens
-    required: true
-    label:
-      zh_Hans: 最大token数
-      en_US: Max Tokens
-    type: int
-    default: 8192
-    min: 1
-    max: 128000
-    help:
-      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
-      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
-  - name: temperature
-    use_template: temperature
-    required: false
-    label:
-      zh_Hans: 模型温度
-      en_US: Model Temperature
-    type: float
-    default: 1
-    min: 0.0
-    max: 1.0
-    help:
-      zh_Hans: 生成内容的随机性。当推理功能启用时，该值将被固定为1。
-      en_US: The amount of randomness injected into the response. When reasoning is enabled, this value will be fixed to 1.
-  - name: top_p
-    show_on:
-      - variable: reasoning_type
-        value: disabled
-    use_template: top_p
-    label:
-      zh_Hans: Top P
-      en_US: Top P
-    required: false
-    type: float
-    default: 0.999
-    min: 0.000
-    max: 1.000
-    help:
-      zh_Hans: 在核采样中的概率阈值。当推理功能启用时，该参数将被禁用。
-      en_US: The probability threshold in nucleus sampling. When reasoning is enabled, this parameter will be disabled.
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    required: false
-    type: int
-    default: 0
-    min: 0
-    # tip docs from aws has error, max value is 500
-    max: 500
-    help:
-      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
-      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
-  - name: response_format
-    use_template: response_format
-pricing:
-  input: '0.003'
-  output: '0.015'
-  unit: '0.001'
-  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/llm/llm.py
+++ b/api/core/model_runtime/model_providers/bedrock/llm/llm.py
@@ -58,7 +58,6 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
    # TODO There is invoke issue: context limit on Cohere Model, will add them after fixed.
    CONVERSE_API_ENABLED_MODEL_INFO = [
        {"prefix": "anthropic.claude-v2", "support_system_prompts": True, "support_tool_use": False},
-        {"prefix": "us.deepseek", "support_system_prompts": True, "support_tool_use": False},
        {"prefix": "anthropic.claude-v1", "support_system_prompts": True, "support_tool_use": False},
        {"prefix": "us.anthropic.claude-3", "support_system_prompts": True, "support_tool_use": True},
        {"prefix": "eu.anthropic.claude-3", "support_system_prompts": True, "support_tool_use": True},
--- a/api/core/model_runtime/model_providers/bedrock/llm/us.deepseek-r1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/us.deepseek-r1.yaml
@@ -1,63 +0,0 @@
-model: us.deepseek.r1-v1:0
-label:
-  en_US: DeepSeek-R1(US.Cross Region Inference)
-icon: icon_s_en.svg
-model_type: llm
-features:
-  - agent-thought
-  - vision
-  - tool-call
-  - stream-tool-call
-model_properties:
-  mode: chat
-  context_size: 32768
-parameter_rules:
-  - name: max_tokens
-    use_template: max_tokens
-    required: true
-    label:
-      zh_Hans: 最大token数
-      en_US: Max Tokens
-    type: int
-    default: 8192
-    min: 1
-    max: 128000
-    help:
-      zh_Hans: 停止前生成的最大令牌数。
-      en_US: The maximum number of tokens to generate before stopping.
-  - name: temperature
-    use_template: temperature
-    required: false
-    label:
-      zh_Hans: 模型温度
-      en_US: Model Temperature
-    type: float
-    default: 1
-    min: 0.0
-    max: 1.0
-    help:
-      zh_Hans: 生成内容的随机性。当推理功能启用时，该值将被固定为1。
-      en_US: The amount of randomness injected into the response. When reasoning is enabled, this value will be fixed to 1.
-  - name: top_p
-    show_on:
-      - variable: reasoning_type
-        value: disabled
-    use_template: top_p
-    label:
-      zh_Hans: Top P
-      en_US: Top P
-    required: false
-    type: float
-    default: 0.999
-    min: 0.000
-    max: 1.000
-    help:
-      zh_Hans: 在核采样中的概率阈值。当推理功能启用时，该参数将被禁用。
-      en_US: The probability threshold in nucleus sampling. When reasoning is enabled, this parameter will be disabled.
-  - name: response_format
-    use_template: response_format
-pricing:
-  input: '0.001'
-  output: '0.005'
-  unit: '0.001'
-  currency: USD
--- a/api/core/model_runtime/model_providers/google/google.py
+++ b/api/core/model_runtime/model_providers/google/google.py
@@ -19,8 +19,8 @@ class GoogleProvider(ModelProvider):
        try:
            model_instance = self.get_model_instance(ModelType.LLM)

-            # Use `gemini-2.0-flash` model for validate,
-            model_instance.validate_credentials(model="gemini-2.0-flash", credentials=credentials)
+            # Use `gemini-pro` model for validate,
+            model_instance.validate_credentials(model="gemini-pro", credentials=credentials)
        except CredentialsValidateFailedError as ex:
            raise ex
        except Exception as ex:
--- a/api/core/model_runtime/model_providers/google/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/_position.yaml
@@ -1,5 +1,6 @@
 - gemini-2.0-flash-001
 - gemini-2.0-flash-exp
+- gemini-2.0-flash-lite-preview-02-05
 - gemini-2.0-pro-exp-02-05
 - gemini-2.0-flash-thinking-exp-1219
 - gemini-2.0-flash-thinking-exp-01-21
@@ -19,3 +20,5 @@
 - gemini-exp-1206
 - gemini-exp-1121
 - gemini-exp-1114
+- gemini-pro
+- gemini-pro-vision
--- a/api/core/model_runtime/model_providers/google/llm/gemini-2.0-flash-001.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-2.0-flash-001.yaml
--- a/api/core/model_runtime/model_providers/google/llm/gemini-2.0-flash-lite-preview-02-05.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-2.0-flash-lite-preview-02-05.yaml
@@ -0,0 +1,41 @@
+model: gemini-2.0-flash-lite-preview-02-05
+label:
+  en_US: Gemini 2.0 Flash Lite Preview 0205
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
@@ -0,0 +1,35 @@
+model: gemini-pro-vision
+label:
+  en_US: Gemini Pro Vision
+model_type: llm
+features:
+  - vision
+model_properties:
+  mode: chat
+  context_size: 12288
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
+deprecated: true
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
@@ -0,0 +1,39 @@
+model: gemini-pro
+label:
+  en_US: Gemini Pro
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 30720
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 2048
+    min: 1
+    max: 2048
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
+deprecated: true
--- a/api/core/model_runtime/model_providers/ollama/llm/llm.py
+++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py
@@ -367,7 +367,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):

                # transform assistant message to prompt message
                text = chunk_json["response"]
-            text = self._wrap_thinking_by_tag(text)

            assistant_prompt_message = AssistantPromptMessage(content=text)

--- a/api/core/model_runtime/model_providers/openai/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/openai/llm/_position.yaml
@@ -1,4 +1,3 @@
- gpt-4.1
 - o1
 - o1-2024-12-17
 - o1-mini
--- a/api/core/model_runtime/model_providers/openai/llm/llm.py
+++ b/api/core/model_runtime/model_providers/openai/llm/llm.py
@@ -1049,9 +1049,6 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel):
        """Calculate num tokens for gpt-3.5-turbo and gpt-4 with tiktoken package.

        Official documentation: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb"""
-        if not messages and not tools:
-            return 0
-
        if model.startswith("ft:"):
            model = model.split(":")[1]

@@ -1060,18 +1057,18 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel):
            model = "gpt-4o"

        try:
-            encoding = tiktoken.get_encoding(model)
-        except (KeyError, ValueError) as e:
+            encoding = tiktoken.encoding_for_model(model)
+        except KeyError:
            logger.warning("Warning: model not found. Using cl100k_base encoding.")
-            encoding_name = "cl100k_base"
-            encoding = tiktoken.get_encoding(encoding_name)
+            model = "cl100k_base"
+            encoding = tiktoken.get_encoding(model)

        if model.startswith("gpt-3.5-turbo-0301"):
            # every message follows <im_start>{role/name}\n{content}<im_end>\n
            tokens_per_message = 4
            # if there's a name, the role is omitted
            tokens_per_name = -1
-        elif model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4") or model.startswith(("o1", "o3", "o4")):
+        elif model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4") or model.startswith(("o1", "o3")):
            tokens_per_message = 3
            tokens_per_name = 1
        else:
--- a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
+++ b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
@@ -528,7 +528,6 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
                    delta_content, is_reasoning_started = self._wrap_thinking_by_reasoning_content(
                        delta, is_reasoning_started
                    )
-                    delta_content = self._wrap_thinking_by_tag(delta_content)

                    assistant_message_tool_calls = None

@@ -808,34 +807,37 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):

            # calculate num tokens for function object
            num_tokens += self._get_num_tokens_by_gpt2("name")
-            num_tokens += self._get_num_tokens_by_gpt2(tool.name)
+            if hasattr(tool, "name"):
+                num_tokens += self._get_num_tokens_by_gpt2(tool.name)
            num_tokens += self._get_num_tokens_by_gpt2("description")
-            num_tokens += self._get_num_tokens_by_gpt2(tool.description)
-            parameters = tool.parameters
-            num_tokens += self._get_num_tokens_by_gpt2("parameters")
-            if "title" in parameters:
-                num_tokens += self._get_num_tokens_by_gpt2("title")
-                num_tokens += self._get_num_tokens_by_gpt2(parameters.get("title"))
-            num_tokens += self._get_num_tokens_by_gpt2("type")
-            num_tokens += self._get_num_tokens_by_gpt2(parameters.get("type"))
-            if "properties" in parameters:
-                num_tokens += self._get_num_tokens_by_gpt2("properties")
-                for key, value in parameters.get("properties").items():
-                    num_tokens += self._get_num_tokens_by_gpt2(key)
-                    for field_key, field_value in value.items():
-                        num_tokens += self._get_num_tokens_by_gpt2(field_key)
-                        if field_key == "enum":
-                            for enum_field in field_value:
-                                num_tokens += 3
-                                num_tokens += self._get_num_tokens_by_gpt2(enum_field)
-                        else:
+            if hasattr(tool, "description"):
+                num_tokens += self._get_num_tokens_by_gpt2(tool.description)
+            if hasattr(tool, "parameters"):
+                parameters = tool.parameters
+                num_tokens += self._get_num_tokens_by_gpt2("parameters")
+                if "title" in parameters:
+                    num_tokens += self._get_num_tokens_by_gpt2("title")
+                    num_tokens += self._get_num_tokens_by_gpt2(parameters.get("title"))
+                num_tokens += self._get_num_tokens_by_gpt2("type")
+                num_tokens += self._get_num_tokens_by_gpt2(parameters.get("type"))
+                if "properties" in parameters:
+                    num_tokens += self._get_num_tokens_by_gpt2("properties")
+                    for key, value in parameters.get("properties", {}).items():
+                        num_tokens += self._get_num_tokens_by_gpt2(key)
+                        for field_key, field_value in value.items():
                            num_tokens += self._get_num_tokens_by_gpt2(field_key)
-                            num_tokens += self._get_num_tokens_by_gpt2(str(field_value))
-            if "required" in parameters:
-                num_tokens += self._get_num_tokens_by_gpt2("required")
-                for required_field in parameters["required"]:
-                    num_tokens += 3
-                    num_tokens += self._get_num_tokens_by_gpt2(required_field)
+                            if field_key == "enum":
+                                for enum_field in field_value:
+                                    num_tokens += 3
+                                    num_tokens += self._get_num_tokens_by_gpt2(enum_field)
+                            else:
+                                num_tokens += self._get_num_tokens_by_gpt2(field_key)
+                                num_tokens += self._get_num_tokens_by_gpt2(str(field_value))
+                if "required" in parameters:
+                    num_tokens += self._get_num_tokens_by_gpt2("required")
+                    for required_field in parameters["required"]:
+                        num_tokens += 3
+                        num_tokens += self._get_num_tokens_by_gpt2(required_field)

        return num_tokens

--- a/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
+++ b/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
@@ -430,7 +430,7 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
                type=ParameterType.INT,
                use_template="max_tokens",
                min=1,
-                max=credentials.get("context_length", 2048),
+                max=int(credentials.get("context_length", 2048)),
                default=512,
                label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"),
            ),
@@ -448,7 +448,7 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
        if support_vision:
            features.append(ModelFeature.VISION)

-        context_length = credentials.get("context_length", 2048)
+        context_length = int(credentials.get("context_length", 2048))

        entity = AIModelEntity(
            model=model,
--- a/api/core/model_runtime/model_providers/sagemaker/sagemaker.yaml
+++ b/api/core/model_runtime/model_providers/sagemaker/sagemaker.yaml
@@ -59,6 +59,19 @@ model_credential_schema:
      placeholder:
        zh_Hans: 请输出你的Sagemaker推理端点
        en_US: Enter your Sagemaker Inference endpoint
+    - variable: context_length
+      show_on:
+        - variable: __model_type
+          value: llm
+      label:
+        zh_Hans: 模型上下文长度
+        en_US: Model context size
+      type: text-input
+      default: '4096'
+      required: true
+      placeholder:
+        zh_Hans: 在此输入您的模型上下文长度
+        en_US: Enter your Model context size
    - variable: audio_s3_cache_bucket
      show_on:
        - variable: __model_type
--- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
@@ -17,6 +17,13 @@
 - deepseek-ai/DeepSeek-V2.5
 - deepseek-ai/DeepSeek-V3
 - deepseek-ai/DeepSeek-Coder-V2-Instruct
+- deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+- deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+- deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+- deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
+- deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
+- deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+- deepseek-ai/Janus-Pro-7B
 - THUDM/glm-4-9b-chat
 - 01-ai/Yi-1.5-34B-Chat-16K
 - 01-ai/Yi-1.5-9B-Chat-16K
--- a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-r1-distill-llama-70B.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-r1-distill-llama-70B.yaml
@@ -0,0 +1,21 @@
+model: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+label:
+  zh_Hans: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+  en_US: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 8192
+    default: 4096
+pricing:
+  input: "0.00"
+  output: "4.3"
+  unit: "0.000001"
+  currency: RMB
--- a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-r1-distill-llama-8B.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-r1-distill-llama-8B.yaml
@@ -0,0 +1,21 @@
+model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+label:
+  zh_Hans: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+  en_US: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 8192
+    default: 4096
+pricing:
+  input: "0.00"
+  output: "0.00"
+  unit: "0.000001"
+  currency: RMB
--- a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-r1-distill-qwen-1.5B.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-r1-distill-qwen-1.5B.yaml
@@ -0,0 +1,21 @@
+model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+label:
+  zh_Hans: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+  en_US: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 8192
+    default: 4096
+pricing:
+  input: "0.00"
+  output: "1.26"
+  unit: "0.000001"
+  currency: RMB
--- a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-r1-distill-qwen-14B.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-r1-distill-qwen-14B.yaml
@@ -0,0 +1,21 @@
+model: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
+label:
+  zh_Hans: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
+  en_US: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 8192
+    default: 4096
+pricing:
+  input: "0.00"
+  output: "0.70"
+  unit: "0.000001"
+  currency: RMB
--- a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-r1-distill-qwen-32B.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-r1-distill-qwen-32B.yaml
@@ -0,0 +1,21 @@
+model: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+label:
+  zh_Hans: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+  en_US: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 8192
+    default: 4096
+pricing:
+  input: "0.00"
+  output: "1.26"
+  unit: "0.000001"
+  currency: RMB
--- a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-r1-distill-qwen-7B.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-r1-distill-qwen-7B.yaml
@@ -0,0 +1,21 @@
+model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
+label:
+  zh_Hans: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
+  en_US: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 8192
+    default: 4096
+pricing:
+  input: "0.00"
+  output: "0.00"
+  unit: "0.000001"
+  currency: RMB
--- a/api/core/model_runtime/model_providers/siliconflow/llm/janus-pro-7B.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/janus-pro-7B.yaml
@@ -0,0 +1,22 @@
+model: deepseek-ai/Janus-Pro-7B
+label:
+  zh_Hans: deepseek-ai/Janus-Pro-7B
+  en_US: deepseek-ai/Janus-Pro-7B
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 8192
+    default: 4096
+pricing:
+  input: "0.00"
+  output: "0.00"
+  unit: "0.000001"
+  currency: RMB
--- a/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
@@ -1,3 +1,7 @@
+- deepseek-r1
+- deepseek-r1-distill-qwen-14b
+- deepseek-r1-distill-qwen-32b
+- deepseek-v3
 - qwen-vl-max-0809
 - qwen-vl-max-0201
 - qwen-vl-max
--- a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-14B.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-14B.yaml
@@ -0,0 +1,21 @@
+model: deepseek-r1-distill-qwen-14b
+label:
+  zh_Hans: DeepSeek-R1-Distill-Qwen-14B
+  en_US: DeepSeek-R1-Distill-Qwen-14B
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 8192
+    default: 4096
+pricing:
+  input: "0.001"
+  output: "0.003"
+  unit: "0.001"
+  currency: RMB
--- a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-32B.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-32B.yaml
@@ -0,0 +1,21 @@
+model: deepseek-r1-distill-qwen-32b
+label:
+  zh_Hans: DeepSeek-R1-Distill-Qwen-32B
+  en_US: DeepSeek-R1-Distill-Qwen-32B
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 8192
+    default: 4096
+pricing:
+  input: "0.002"
+  output: "0.006"
+  unit: "0.001"
+  currency: RMB
--- a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1.yaml
@@ -0,0 +1,21 @@
+model: deepseek-r1
+label:
+  zh_Hans: DeepSeek-R1
+  en_US: DeepSeek-R1
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 64000
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 8192
+    default: 4096
+pricing:
+  input: "0.004"
+  output: "0.016"
+  unit: '0.001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-v3.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-v3.yaml
@@ -1,43 +1,38 @@
-model: gpt-4.1
+model: deepseek-v3
 label:
-  zh_Hans: gpt-4.1
-  en_US: gpt-4.1
+  zh_Hans: DeepSeek-V3
+  en_US: DeepSeek-V3
 model_type: llm
 features:
-  - multi-tool-call
  - agent-thought
-  - stream-tool-call
-  - vision
 model_properties:
  mode: chat
-  context_size: 1047576
+  context_size: 64000
 parameter_rules:
  - name: temperature
    use_template: temperature
-  - name: top_p
-    use_template: top_p
-  - name: presence_penalty
-    use_template: presence_penalty
-  - name: frequency_penalty
-    use_template: frequency_penalty
  - name: max_tokens
    use_template: max_tokens
+    type: int
    default: 512
    min: 1
-    max: 32768
-  - name: reasoning_effort
-    label:
-      zh_Hans: 推理工作
-      en_US: Reasoning Effort
-    type: string
+    max: 4096
    help:
-      zh_Hans: 限制推理模型的推理工作
-      en_US: Constrains effort on reasoning for reasoning models
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
    required: false
-    options:
-      - low
-      - medium
-      - high
+  - name: frequency_penalty
+    use_template: frequency_penalty
  - name: response_format
    label:
      zh_Hans: 回复格式
@@ -50,11 +45,8 @@ parameter_rules:
    options:
      - text
      - json_object
-      - json_schema
-  - name: json_schema
-    use_template: json_schema
 pricing:
-  input: '2.00'
-  output: '8.00'
-  unit: '0.000001'
-  currency: USD
+  input: "0.002"
+  output: "0.008"
+  unit: "0.001"
+  currency: RMB
--- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
@@ -197,8 +197,7 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
        else:
            # nothing different between chat model and completion model in tongyi
            params["messages"] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages)
-            response = Generation.call(**params, result_format="message", stream=stream)
-
+            response = Generation.call(**params, result_format="message", stream=stream, incremental_output=stream)
        if stream:
            return self._handle_generate_stream_response(model, credentials, response, prompt_messages)

@@ -258,6 +257,9 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
        """
        full_text = ""
        tool_calls = []
+        is_reasoning_started = False
+        # for index, response in enumerate(responses):
+        index = 0
        for index, response in enumerate(responses):
            if response.status_code not in {200, HTTPStatus.OK}:
                raise ServiceUnavailableError(
@@ -311,7 +313,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
                    ),
                )
            else:
-                resp_content = response.output.choices[0].message.content
+                message = response.output.choices[0].message
+
+                resp_content, is_reasoning_started = self._wrap_thinking_by_reasoning_content(
+                    message, is_reasoning_started
+                )
                if not resp_content:
                    if "tool_calls" in response.output.choices[0].message:
                        tool_calls = response.output.choices[0].message["tool_calls"]
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
@@ -69,6 +69,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
@@ -69,6 +69,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
@@ -69,6 +69,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
@@ -69,6 +69,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
@@ -68,6 +68,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
@@ -69,6 +69,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
@@ -69,6 +69,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
@@ -67,6 +67,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
@@ -67,6 +67,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
@@ -67,6 +67,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
@@ -67,6 +67,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
@@ -67,6 +67,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
@@ -69,6 +69,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
@@ -67,6 +67,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
@@ -68,6 +68,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
@@ -67,6 +67,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
@@ -67,6 +67,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
@@ -69,6 +69,15 @@ parameter_rules:
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    label:
+      zh_Hans: 联网搜索
+      en_US: Web Search
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
jyong	295bbc4a8b	upgrade Unstructured version	2025-02-13 15:08:34 +08:00
Yingchun Lai	a3d3e30e3a	fix: fix tongyi models blocking mode with incremental_output=stream (#13620 )	2025-02-13 10:24:05 +08:00
AugNSo	2b86465d4c	fix document extractor node incorrectly processing doc and ppt files (#12902 )	2025-02-12 18:04:28 +08:00
kimjion	6529240da6	fix: no longer using old app detail cover when switch pathname (#13585 )	2025-02-12 15:02:11 +08:00
Bowen Liang	0751ad1eeb	feat(vdb): add HNSW vector index for TiDB vector store with TiFlash (#12043 )	2025-02-12 13:53:51 +08:00
Riddhimaan-Senapati	786550bdc9	fix: changed topics/keywords to topic/keywords (#13544 )	2025-02-12 09:15:15 +08:00
jiangbo721	bde756a1ab	chore:Remove useless brackets and format code (#13479 ) Co-authored-by: 刘江波 <jiangbo721@163.com>	2025-02-11 22:05:29 +08:00
Wu Jiayang	423fb2d7bc	Ensure the 'inputs' field in /chat-messages takes effect every time (#7955 ) Co-authored-by: Your Name <you@example.com> Co-authored-by: -LAN- <laipz8200@outlook.com>	2025-02-11 18:44:56 +08:00
Novice	f96b4f287a	fix: iteration node log time error (#13511 )	2025-02-11 16:35:21 +08:00
Novice	c00e7d3f65	fix: retry log running error (#13472 ) Co-authored-by: Novice Lee <novicelee@NoviPro.local>	2025-02-11 15:48:55 +08:00
yihong	1f38d4846b	fix: issue #13483 and #13434 (#13518 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2025-02-11 12:45:49 +08:00
liuzhenghua	47a64610ca	Fix the issue of repeated escaping of quotes in hit test (#13477 )	2025-02-11 09:58:31 +08:00
Riddhimaan-Senapati	f0a845f0f9	fix: removed LLM output from the main README (#13504 )	2025-02-11 09:09:07 +08:00
Abdullah AlOsaimi	abec23118d	feat: add support for X-Forwarded-Port in ProxyFix middleware (#13102 )	2025-02-10 22:28:29 +08:00
Xin Zhang	0957119550	fix: update UTC time format for consistency (#13471 )	2025-02-10 19:37:50 +08:00
github-actions[bot]	f48fa3e4e8	chore: translate i18n files (#13452 ) Co-authored-by: douxc <7553076+douxc@users.noreply.github.com>	2025-02-10 14:14:15 +08:00
非法操作	5ffc58d6ca	feat: improve think content display (#13431 )	2025-02-10 14:08:17 +08:00
NFish	7d958635f0	Fix/add trial expire tip time (#13464 )	2025-02-10 12:53:59 +08:00
Wu Tianwei	33990426c1	fix: add ids in FetchDatasetsParams (#13459 )	2025-02-10 12:28:36 +08:00
yihong	9f3fc7ebf8	ci: make ci safe using zizmor (#13397 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2025-02-10 12:26:08 +08:00
ybalbert001	c8357da13b	[Fix] Sagemaker LLM Provider can't adjust context size, it'a always 2… (#13462 ) Co-authored-by: Yuanbo Li <ybalbert@amazon.com>	2025-02-10 12:25:04 +08:00
NFish	2290f14fb1	feat: add tooltip if user's anthropic trial quota still available (#13418 )	2025-02-10 10:44:20 +08:00
Fei He	7796984444	Fix: Removed model params except max_token for deepseek r1 in volcengine (#13446 )	2025-02-10 10:26:26 +08:00
Fei He	75113c26c6	Feat : add deepseek support for tongyi (#13445 )	2025-02-10 10:26:03 +08:00
xhe	939a9ecd21	chore: use the wrap thinking api for volcengine (#13432 ) Signed-off-by: xhe <xw897002528@gmail.com>	2025-02-10 10:25:07 +08:00
Summer-Gu	f307c7cd88	feat: Docker adds SSRF-related timeout settings (#13395 )	2025-02-10 10:21:31 +08:00
Riddhimaan-Senapati	33ecceb90c	Feat: add comparison table to main readme (#13435 )	2025-02-10 10:13:46 +08:00
NFish	e0d1cab079	fix: add missed background color to iteration node (#13448 )	2025-02-10 10:04:56 +08:00
Riddhimaan-Senapati	811d72a727	feat: added a _position.yaml for vertex ai provider (#13367 )	2025-02-09 10:29:07 +08:00
Yi Xiao	c3c575c2e1	Fix: model selector UI hover issue (#13396 )	2025-02-09 10:24:57 +08:00
海狸大師	c189629eca	Fix(i18n): Refine zh-Hant workflow translations (#13421 )	2025-02-09 10:24:45 +08:00
Naoki Takashima	37117c22d4	feat(model): support Gemini 2.0 Flash Lite Preview model (02-05) in Google's model provider (#13399 )	2025-02-09 10:22:33 +08:00
Riddhimaan-Senapati	b05e9d2ab4	feat: update backend documentation (#13374 )	2025-02-08 20:36:33 +08:00
aplio	0451333990	fix(settings): add notClearable prop to language selection (#13406 )	2025-02-08 20:36:23 +08:00
MuYu	ab2e6c19a4	Fixes #13415 reset model-provider-page form value use schema.default (#13416 )	2025-02-08 20:34:52 +08:00
aplio	f7959bc887	fix(chatbot): update button class to include text color for better visibility (#13411 )	2025-02-08 20:34:37 +08:00
aplio	45874c699d	Nitpick/fix typos in document (#13413 )	2025-02-08 20:33:45 +08:00
Junjie.M	286cdc41ab	reasoning model unified think tag is <think></think> (#13392 ) Co-authored-by: crazywoola <427733928@qq.com>	2025-02-08 16:19:41 +08:00
Hash Brown	78708eb5d5	fix: merge conflict between #11301 and #11885 (#13391 )	2025-02-08 14:38:10 +08:00
胡春东	cf36745770	fix(workflow_tool): enable File parameter support after workflow is published as a tool (#13175 )	2025-02-08 12:30:00 +08:00
depy	6622c7f98d	fix: Fix HTTP request node non 443 port SSL site inaccessible (#13376 )	2025-02-08 12:00:45 +08:00
Hash Brown	3112b74527	fix: build failed due to `getPrevChatList` no longer exists (#13383 )	2025-02-08 11:59:02 +08:00
Katy Tao	b3ae6b634f	feat: add pan and zoom support for MiniMap (#13382 )	2025-02-08 11:57:41 +08:00
Xin Zhang	982bca5d40	fix: add rate limiting to prevent brute force on password reset (#13292 )	2025-02-08 10:28:31 +08:00
Kalo Chin	c8dcde6cd0	fix: Gemini 2.0 Flash 001 model yaml file naming (#13372 )	2025-02-08 09:12:42 +08:00
Riddhimaan-Senapati	8f9db61688	feat: added new silicon flow models (#13369 )	2025-02-08 09:12:22 +08:00
github-actions[bot]	ebdbaf34e6	chore: translate i18n files (#13349 ) Co-authored-by: JzoNgKVO <27049666+JzoNgKVO@users.noreply.github.com>	2025-02-07 22:41:25 +08:00
zhu-an	a081b1e79e	fix: add compatibility config for third-party S3-compatible providers (#13354 ) Co-authored-by: zhaoqingyu.1075 <zhaoqingyu.1075@bytedance.com>	2025-02-07 22:35:24 +08:00
Steven sun	38c31e64db	add enable_search parameter to qwen_max, plus, turbo (#13335 ) Co-authored-by: steven <sunzwj@digitalchina.com>	2025-02-07 22:16:26 +08:00
Yi Xiao	ae6f67420c	Chore: update app detail panel (#13337 )	2025-02-07 18:56:43 +08:00