Revert "add clean 7 days datasets (#9424 )"

This reverts commit bd678f9ca1.
2026-01-08 07:14:14 +00:00 · 2024-10-16 22:45:15 +08:00
346 changed files with 15250 additions and 25670 deletions
--- a/.devcontainer/post_create_command.sh
+++ b/.devcontainer/post_create_command.sh
@@ -1,12 +1,11 @@
 #!/bin/bash

-npm add -g pnpm@9.12.2
-cd web && pnpm install
+cd web && npm install
 pipx install poetry

 echo 'alias start-api="cd /workspaces/dify/api && poetry run python -m flask run --host 0.0.0.0 --port=5001 --debug"' >> ~/.bashrc
 echo 'alias start-worker="cd /workspaces/dify/api && poetry run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion"' >> ~/.bashrc
-echo 'alias start-web="cd /workspaces/dify/web && pnpm dev"' >> ~/.bashrc
+echo 'alias start-web="cd /workspaces/dify/web && npm run dev"' >> ~/.bashrc
 echo 'alias start-containers="cd /workspaces/dify/docker && docker-compose -f docker-compose.middleware.yaml -p dify up -d"' >> ~/.bashrc

-source /home/vscode/.bashrc
+source /home/vscode/.bashrc
--- a/.github/workflows/translate-i18n-base-on-english.yml
+++ b/.github/workflows/translate-i18n-base-on-english.yml
@@ -42,7 +42,7 @@ jobs:

      - name: Run npm script
        if: env.FILES_CHANGED == 'true'
-        run: pnpm run auto-gen-i18n
+        run: npm run auto-gen-i18n

      - name: Create Pull Request
        if: env.FILES_CHANGED == 'true'
--- a/.gitignore
+++ b/.gitignore
@@ -175,8 +175,6 @@ docker/volumes/pgvector/data/*
 docker/volumes/pgvecto_rs/data/*

 docker/nginx/conf.d/default.conf
-docker/nginx/ssl/*
-!docker/nginx/ssl/.gitkeep
 docker/middleware.env

 sdks/python-client/build
@@ -189,7 +187,4 @@ pyrightconfig.json
 api/.vscode

 .idea/
-.vscode
-
-# pnpm
-/.pnpm-store
+.vscode
--- a/5
+++ b/5
@@ -6,9 +6,8 @@ Dify is licensed under the Apache License 2.0, with the following additional con

 a. Multi-tenant service: Unless explicitly authorized by Dify in writing, you may not use the Dify source code to operate a multi-tenant environment. 
    - Tenant Definition: Within the context of Dify, one tenant corresponds to one workspace. The workspace provides a separated area for each tenant's data and configurations.
-    
-b. LOGO and copyright information: In the process of using Dify's frontend, you may not remove or modify the LOGO or copyright information in the Dify console or applications. This restriction is inapplicable to uses of Dify that do not involve its frontend.
-    - Frontend Definition: For the purposes of this license, the "frontend" of Dify includes all components located in the `web/` directory when running Dify from the raw source code, or the "web" image when running Dify with Docker.
+
+b. LOGO and copyright information: In the process of using Dify's frontend components, you may not remove or modify the LOGO or copyright information in the Dify console or applications. This restriction is inapplicable to uses of Dify that do not involve its frontend components.

 Please contact business@dify.ai by email to inquire about licensing matters.

--- a/api/.env.example
+++ b/api/.env.example
@@ -42,7 +42,7 @@ DB_DATABASE=dify

 # Storage configuration
 # use for store upload files, private keys...
-# storage type: local, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase
+# storage type: local, s3, azure-blob, google-storage, tencent-cos, huawei-obs, volcengine-tos, baidu-obs, supabase
 STORAGE_TYPE=local
 STORAGE_LOCAL_PATH=storage
 S3_USE_AWS_MANAGED_IAM=false
--- a/api/app.py
+++ b/api/app.py
@@ -10,20 +10,44 @@ if os.environ.get("DEBUG", "false").lower() != "true":
    grpc.experimental.gevent.init_gevent()

 import json
+import logging
+import sys
 import threading
 import time
 import warnings
+from logging.handlers import RotatingFileHandler

-from flask import Response
+from flask import Flask, Response, request
+from flask_cors import CORS
+from werkzeug.exceptions import Unauthorized

-from app_factory import create_app
+import contexts
+from commands import register_commands
+from configs import dify_config

 # DO NOT REMOVE BELOW
 from events import event_handlers  # noqa: F401
+from extensions import (
+    ext_celery,
+    ext_code_based_extension,
+    ext_compress,
+    ext_database,
+    ext_hosting_provider,
+    ext_login,
+    ext_mail,
+    ext_migrate,
+    ext_proxy_fix,
+    ext_redis,
+    ext_sentry,
+    ext_storage,
+)
 from extensions.ext_database import db
+from extensions.ext_login import login_manager
+from libs.passport import PassportService

 # TODO: Find a way to avoid importing models here
 from models import account, dataset, model, source, task, tool, tools, web  # noqa: F401
+from services.account_service import AccountService

 # DO NOT REMOVE ABOVE

@@ -36,12 +60,188 @@ if hasattr(time, "tzset"):
    time.tzset()


+class DifyApp(Flask):
+    pass
+
+
 # -------------
 # Configuration
 # -------------
+
+
 config_type = os.getenv("EDITION", default="SELF_HOSTED")  # ce edition first


+# ----------------------------
+# Application Factory Function
+# ----------------------------
+
+
+def create_flask_app_with_configs() -> Flask:
+    """
+    create a raw flask app
+    with configs loaded from .env file
+    """
+    dify_app = DifyApp(__name__)
+    dify_app.config.from_mapping(dify_config.model_dump())
+
+    # populate configs into system environment variables
+    for key, value in dify_app.config.items():
+        if isinstance(value, str):
+            os.environ[key] = value
+        elif isinstance(value, int | float | bool):
+            os.environ[key] = str(value)
+        elif value is None:
+            os.environ[key] = ""
+
+    return dify_app
+
+
+def create_app() -> Flask:
+    app = create_flask_app_with_configs()
+
+    app.secret_key = app.config["SECRET_KEY"]
+
+    log_handlers = None
+    log_file = app.config.get("LOG_FILE")
+    if log_file:
+        log_dir = os.path.dirname(log_file)
+        os.makedirs(log_dir, exist_ok=True)
+        log_handlers = [
+            RotatingFileHandler(
+                filename=log_file,
+                maxBytes=1024 * 1024 * 1024,
+                backupCount=5,
+            ),
+            logging.StreamHandler(sys.stdout),
+        ]
+
+    logging.basicConfig(
+        level=app.config.get("LOG_LEVEL"),
+        format=app.config.get("LOG_FORMAT"),
+        datefmt=app.config.get("LOG_DATEFORMAT"),
+        handlers=log_handlers,
+        force=True,
+    )
+    log_tz = app.config.get("LOG_TZ")
+    if log_tz:
+        from datetime import datetime
+
+        import pytz
+
+        timezone = pytz.timezone(log_tz)
+
+        def time_converter(seconds):
+            return datetime.utcfromtimestamp(seconds).astimezone(timezone).timetuple()
+
+        for handler in logging.root.handlers:
+            handler.formatter.converter = time_converter
+    initialize_extensions(app)
+    register_blueprints(app)
+    register_commands(app)
+
+    return app
+
+
+def initialize_extensions(app):
+    # Since the application instance is now created, pass it to each Flask
+    # extension instance to bind it to the Flask application instance (app)
+    ext_compress.init_app(app)
+    ext_code_based_extension.init()
+    ext_database.init_app(app)
+    ext_migrate.init(app, db)
+    ext_redis.init_app(app)
+    ext_storage.init_app(app)
+    ext_celery.init_app(app)
+    ext_login.init_app(app)
+    ext_mail.init_app(app)
+    ext_hosting_provider.init_app(app)
+    ext_sentry.init_app(app)
+    ext_proxy_fix.init_app(app)
+
+
+# Flask-Login configuration
+@login_manager.request_loader
+def load_user_from_request(request_from_flask_login):
+    """Load user based on the request."""
+    if request.blueprint not in {"console", "inner_api"}:
+        return None
+    # Check if the user_id contains a dot, indicating the old format
+    auth_header = request.headers.get("Authorization", "")
+    if not auth_header:
+        auth_token = request.args.get("_token")
+        if not auth_token:
+            raise Unauthorized("Invalid Authorization token.")
+    else:
+        if " " not in auth_header:
+            raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
+        auth_scheme, auth_token = auth_header.split(None, 1)
+        auth_scheme = auth_scheme.lower()
+        if auth_scheme != "bearer":
+            raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
+
+    decoded = PassportService().verify(auth_token)
+    user_id = decoded.get("user_id")
+
+    logged_in_account = AccountService.load_logged_in_account(account_id=user_id)
+    if logged_in_account:
+        contexts.tenant_id.set(logged_in_account.current_tenant_id)
+    return logged_in_account
+
+
+@login_manager.unauthorized_handler
+def unauthorized_handler():
+    """Handle unauthorized requests."""
+    return Response(
+        json.dumps({"code": "unauthorized", "message": "Unauthorized."}),
+        status=401,
+        content_type="application/json",
+    )
+
+
+# register blueprint routers
+def register_blueprints(app):
+    from controllers.console import bp as console_app_bp
+    from controllers.files import bp as files_bp
+    from controllers.inner_api import bp as inner_api_bp
+    from controllers.service_api import bp as service_api_bp
+    from controllers.web import bp as web_bp
+
+    CORS(
+        service_api_bp,
+        allow_headers=["Content-Type", "Authorization", "X-App-Code"],
+        methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
+    )
+    app.register_blueprint(service_api_bp)
+
+    CORS(
+        web_bp,
+        resources={r"/*": {"origins": app.config["WEB_API_CORS_ALLOW_ORIGINS"]}},
+        supports_credentials=True,
+        allow_headers=["Content-Type", "Authorization", "X-App-Code"],
+        methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
+        expose_headers=["X-Version", "X-Env"],
+    )
+
+    app.register_blueprint(web_bp)
+
+    CORS(
+        console_app_bp,
+        resources={r"/*": {"origins": app.config["CONSOLE_CORS_ALLOW_ORIGINS"]}},
+        supports_credentials=True,
+        allow_headers=["Content-Type", "Authorization"],
+        methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
+        expose_headers=["X-Version", "X-Env"],
+    )
+
+    app.register_blueprint(console_app_bp)
+
+    CORS(files_bp, allow_headers=["Content-Type"], methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"])
+    app.register_blueprint(files_bp)
+
+    app.register_blueprint(inner_api_bp)
+
+
 # create app
 app = create_app()
 celery = app.extensions["celery"]
--- a/api/app_factory.py
+++ b/api/app_factory.py
@@ -1,213 +0,0 @@
-import os
-
-if os.environ.get("DEBUG", "false").lower() != "true":
-    from gevent import monkey
-
-    monkey.patch_all()
-
-    import grpc.experimental.gevent
-
-    grpc.experimental.gevent.init_gevent()
-
-import json
-import logging
-import sys
-from logging.handlers import RotatingFileHandler
-
-from flask import Flask, Response, request
-from flask_cors import CORS
-from werkzeug.exceptions import Unauthorized
-
-import contexts
-from commands import register_commands
-from configs import dify_config
-from extensions import (
-    ext_celery,
-    ext_code_based_extension,
-    ext_compress,
-    ext_database,
-    ext_hosting_provider,
-    ext_login,
-    ext_mail,
-    ext_migrate,
-    ext_proxy_fix,
-    ext_redis,
-    ext_sentry,
-    ext_storage,
-)
-from extensions.ext_database import db
-from extensions.ext_login import login_manager
-from libs.passport import PassportService
-from services.account_service import AccountService
-
-
-class DifyApp(Flask):
-    pass
-
-
-# ----------------------------
-# Application Factory Function
-# ----------------------------
-def create_flask_app_with_configs() -> Flask:
-    """
-    create a raw flask app
-    with configs loaded from .env file
-    """
-    dify_app = DifyApp(__name__)
-    dify_app.config.from_mapping(dify_config.model_dump())
-
-    # populate configs into system environment variables
-    for key, value in dify_app.config.items():
-        if isinstance(value, str):
-            os.environ[key] = value
-        elif isinstance(value, int | float | bool):
-            os.environ[key] = str(value)
-        elif value is None:
-            os.environ[key] = ""
-
-    return dify_app
-
-
-def create_app() -> Flask:
-    app = create_flask_app_with_configs()
-
-    app.secret_key = app.config["SECRET_KEY"]
-
-    log_handlers = None
-    log_file = app.config.get("LOG_FILE")
-    if log_file:
-        log_dir = os.path.dirname(log_file)
-        os.makedirs(log_dir, exist_ok=True)
-        log_handlers = [
-            RotatingFileHandler(
-                filename=log_file,
-                maxBytes=1024 * 1024 * 1024,
-                backupCount=5,
-            ),
-            logging.StreamHandler(sys.stdout),
-        ]
-
-    logging.basicConfig(
-        level=app.config.get("LOG_LEVEL"),
-        format=app.config.get("LOG_FORMAT"),
-        datefmt=app.config.get("LOG_DATEFORMAT"),
-        handlers=log_handlers,
-        force=True,
-    )
-    log_tz = app.config.get("LOG_TZ")
-    if log_tz:
-        from datetime import datetime
-
-        import pytz
-
-        timezone = pytz.timezone(log_tz)
-
-        def time_converter(seconds):
-            return datetime.utcfromtimestamp(seconds).astimezone(timezone).timetuple()
-
-        for handler in logging.root.handlers:
-            handler.formatter.converter = time_converter
-    initialize_extensions(app)
-    register_blueprints(app)
-    register_commands(app)
-
-    return app
-
-
-def initialize_extensions(app):
-    # Since the application instance is now created, pass it to each Flask
-    # extension instance to bind it to the Flask application instance (app)
-    ext_compress.init_app(app)
-    ext_code_based_extension.init()
-    ext_database.init_app(app)
-    ext_migrate.init(app, db)
-    ext_redis.init_app(app)
-    ext_storage.init_app(app)
-    ext_celery.init_app(app)
-    ext_login.init_app(app)
-    ext_mail.init_app(app)
-    ext_hosting_provider.init_app(app)
-    ext_sentry.init_app(app)
-    ext_proxy_fix.init_app(app)
-
-
-# Flask-Login configuration
-@login_manager.request_loader
-def load_user_from_request(request_from_flask_login):
-    """Load user based on the request."""
-    if request.blueprint not in {"console", "inner_api"}:
-        return None
-    # Check if the user_id contains a dot, indicating the old format
-    auth_header = request.headers.get("Authorization", "")
-    if not auth_header:
-        auth_token = request.args.get("_token")
-        if not auth_token:
-            raise Unauthorized("Invalid Authorization token.")
-    else:
-        if " " not in auth_header:
-            raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
-        auth_scheme, auth_token = auth_header.split(None, 1)
-        auth_scheme = auth_scheme.lower()
-        if auth_scheme != "bearer":
-            raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
-
-    decoded = PassportService().verify(auth_token)
-    user_id = decoded.get("user_id")
-
-    logged_in_account = AccountService.load_logged_in_account(account_id=user_id)
-    if logged_in_account:
-        contexts.tenant_id.set(logged_in_account.current_tenant_id)
-    return logged_in_account
-
-
-@login_manager.unauthorized_handler
-def unauthorized_handler():
-    """Handle unauthorized requests."""
-    return Response(
-        json.dumps({"code": "unauthorized", "message": "Unauthorized."}),
-        status=401,
-        content_type="application/json",
-    )
-
-
-# register blueprint routers
-def register_blueprints(app):
-    from controllers.console import bp as console_app_bp
-    from controllers.files import bp as files_bp
-    from controllers.inner_api import bp as inner_api_bp
-    from controllers.service_api import bp as service_api_bp
-    from controllers.web import bp as web_bp
-
-    CORS(
-        service_api_bp,
-        allow_headers=["Content-Type", "Authorization", "X-App-Code"],
-        methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
-    )
-    app.register_blueprint(service_api_bp)
-
-    CORS(
-        web_bp,
-        resources={r"/*": {"origins": app.config["WEB_API_CORS_ALLOW_ORIGINS"]}},
-        supports_credentials=True,
-        allow_headers=["Content-Type", "Authorization", "X-App-Code"],
-        methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
-        expose_headers=["X-Version", "X-Env"],
-    )
-
-    app.register_blueprint(web_bp)
-
-    CORS(
-        console_app_bp,
-        resources={r"/*": {"origins": app.config["CONSOLE_CORS_ALLOW_ORIGINS"]}},
-        supports_credentials=True,
-        allow_headers=["Content-Type", "Authorization"],
-        methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
-        expose_headers=["X-Version", "X-Env"],
-    )
-
-    app.register_blueprint(console_app_bp)
-
-    CORS(files_bp, allow_headers=["Content-Type"], methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"])
-    app.register_blueprint(files_bp)
-
-    app.register_blueprint(inner_api_bp)
--- a/api/commands.py
+++ b/api/commands.py
@@ -259,25 +259,6 @@ def migrate_knowledge_vector_database():
    skipped_count = 0
    total_count = 0
    vector_type = dify_config.VECTOR_STORE
-    upper_colletion_vector_types = {
-        VectorType.MILVUS,
-        VectorType.PGVECTOR,
-        VectorType.RELYT,
-        VectorType.WEAVIATE,
-        VectorType.ORACLE,
-        VectorType.ELASTICSEARCH,
-    }
-    lower_colletion_vector_types = {
-        VectorType.ANALYTICDB,
-        VectorType.CHROMA,
-        VectorType.MYSCALE,
-        VectorType.PGVECTO_RS,
-        VectorType.TIDB_VECTOR,
-        VectorType.OPENSEARCH,
-        VectorType.TENCENT,
-        VectorType.BAIDU,
-        VectorType.VIKINGDB,
-    }
    page = 1
    while True:
        try:
@@ -303,9 +284,11 @@ def migrate_knowledge_vector_database():
                        skipped_count = skipped_count + 1
                        continue
                collection_name = ""
-                dataset_id = dataset.id
-                if vector_type in upper_colletion_vector_types:
+                if vector_type == VectorType.WEAVIATE:
+                    dataset_id = dataset.id
                    collection_name = Dataset.gen_collection_name_by_id(dataset_id)
+                    index_struct_dict = {"type": VectorType.WEAVIATE, "vector_store": {"class_prefix": collection_name}}
+                    dataset.index_struct = json.dumps(index_struct_dict)
                elif vector_type == VectorType.QDRANT:
                    if dataset.collection_binding_id:
                        dataset_collection_binding = (
@@ -318,15 +301,63 @@ def migrate_knowledge_vector_database():
                        else:
                            raise ValueError("Dataset Collection Binding not found")
                    else:
+                        dataset_id = dataset.id
                        collection_name = Dataset.gen_collection_name_by_id(dataset_id)
+                    index_struct_dict = {"type": VectorType.QDRANT, "vector_store": {"class_prefix": collection_name}}
+                    dataset.index_struct = json.dumps(index_struct_dict)

-                elif vector_type in lower_colletion_vector_types:
-                    collection_name = Dataset.gen_collection_name_by_id(dataset_id).lower()
+                elif vector_type == VectorType.MILVUS:
+                    dataset_id = dataset.id
+                    collection_name = Dataset.gen_collection_name_by_id(dataset_id)
+                    index_struct_dict = {"type": VectorType.MILVUS, "vector_store": {"class_prefix": collection_name}}
+                    dataset.index_struct = json.dumps(index_struct_dict)
+                elif vector_type == VectorType.RELYT:
+                    dataset_id = dataset.id
+                    collection_name = Dataset.gen_collection_name_by_id(dataset_id)
+                    index_struct_dict = {"type": "relyt", "vector_store": {"class_prefix": collection_name}}
+                    dataset.index_struct = json.dumps(index_struct_dict)
+                elif vector_type == VectorType.TENCENT:
+                    dataset_id = dataset.id
+                    collection_name = Dataset.gen_collection_name_by_id(dataset_id)
+                    index_struct_dict = {"type": VectorType.TENCENT, "vector_store": {"class_prefix": collection_name}}
+                    dataset.index_struct = json.dumps(index_struct_dict)
+                elif vector_type == VectorType.PGVECTOR:
+                    dataset_id = dataset.id
+                    collection_name = Dataset.gen_collection_name_by_id(dataset_id)
+                    index_struct_dict = {"type": VectorType.PGVECTOR, "vector_store": {"class_prefix": collection_name}}
+                    dataset.index_struct = json.dumps(index_struct_dict)
+                elif vector_type == VectorType.OPENSEARCH:
+                    dataset_id = dataset.id
+                    collection_name = Dataset.gen_collection_name_by_id(dataset_id)
+                    index_struct_dict = {
+                        "type": VectorType.OPENSEARCH,
+                        "vector_store": {"class_prefix": collection_name},
+                    }
+                    dataset.index_struct = json.dumps(index_struct_dict)
+                elif vector_type == VectorType.ANALYTICDB:
+                    dataset_id = dataset.id
+                    collection_name = Dataset.gen_collection_name_by_id(dataset_id)
+                    index_struct_dict = {
+                        "type": VectorType.ANALYTICDB,
+                        "vector_store": {"class_prefix": collection_name},
+                    }
+                    dataset.index_struct = json.dumps(index_struct_dict)
+                elif vector_type == VectorType.ELASTICSEARCH:
+                    dataset_id = dataset.id
+                    index_name = Dataset.gen_collection_name_by_id(dataset_id)
+                    index_struct_dict = {"type": "elasticsearch", "vector_store": {"class_prefix": index_name}}
+                    dataset.index_struct = json.dumps(index_struct_dict)
+                elif vector_type == VectorType.BAIDU:
+                    dataset_id = dataset.id
+                    collection_name = Dataset.gen_collection_name_by_id(dataset_id)
+                    index_struct_dict = {
+                        "type": VectorType.BAIDU,
+                        "vector_store": {"class_prefix": collection_name},
+                    }
+                    dataset.index_struct = json.dumps(index_struct_dict)
                else:
                    raise ValueError(f"Vector store {vector_type} is not supported.")

-                index_struct_dict = {"type": vector_type, "vector_store": {"class_prefix": collection_name}}
-                dataset.index_struct = json.dumps(index_struct_dict)
                vector = Vector(dataset)
                click.echo(f"Migrating dataset {dataset.id}.")

--- a/api/configs/feature/init.py
+++ b/api/configs/feature/init.py
@@ -506,16 +506,11 @@ class DataSetConfig(BaseSettings):
    Configuration for dataset management
    """

-    PLAN_SANDBOX_CLEAN_DAY_SETTING: PositiveInt = Field(
-        description="Interval in days for dataset cleanup operations - plan: sandbox",
+    CLEAN_DAY_SETTING: PositiveInt = Field(
+        description="Interval in days for dataset cleanup operations",
        default=30,
    )

-    PLAN_PRO_CLEAN_DAY_SETTING: PositiveInt = Field(
-        description="Interval in days for dataset cleanup operations - plan: pro and team",
-        default=7,
-    )
-
    DATASET_OPERATOR_ENABLED: bool = Field(
        description="Enable or disable dataset operator functionality",
        default=False,
--- a/api/configs/middleware/init.py
+++ b/api/configs/middleware/init.py
@@ -35,8 +35,7 @@ from configs.middleware.vdb.weaviate_config import WeaviateConfig
 class StorageConfig(BaseSettings):
    STORAGE_TYPE: str = Field(
        description="Type of storage to use."
-        " Options: 'local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', 'huawei-obs', "
-        "'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'local'.",
+        " Options: 'local', 's3', 'azure-blob', 'aliyun-oss', 'google-storage'. Default is 'local'.",
        default="local",
    )

--- a/api/core/rag/embedding/cached_embedding.py
+++ b/api/core/rag/embedding/cached_embedding.py
@@ -6,11 +6,11 @@ import numpy as np
 from sqlalchemy.exc import IntegrityError

 from configs import dify_config
-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_manager import ModelInstance
 from core.model_runtime.entities.model_entities import ModelPropertyKey
 from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
-from core.rag.embedding.embedding_base import Embeddings
+from core.rag.datasource.entity.embedding import Embeddings
 from extensions.ext_database import db
 from extensions.ext_redis import redis_client
 from libs import helper
--- a/api/core/embedding/embedding_constant.py
+++ b/api/core/embedding/embedding_constant.py
--- a/api/core/model_manager.py
+++ b/api/core/model_manager.py
@@ -3,7 +3,7 @@ import os
 from collections.abc import Callable, Generator, Sequence
 from typing import IO, Optional, Union, cast

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
 from core.entities.provider_entities import ModelLoadBalancingConfiguration
 from core.errors.error import ProviderTokenNotInitError
--- a/api/core/model_runtime/model_providers/__base/text_embedding_model.py
+++ b/api/core/model_runtime/model_providers/__base/text_embedding_model.py
@@ -4,7 +4,7 @@ from typing import Optional

 from pydantic import ConfigDict

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
 from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
 from core.model_runtime.model_providers.__base.ai_model import AIModel
--- a/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
@@ -7,7 +7,7 @@ import numpy as np
 import tiktoken
 from openai import AzureOpenAI

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import AIModelEntity, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
--- a/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
@@ -4,7 +4,7 @@ from typing import Optional

 from requests import post

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
--- a/api/core/model_runtime/model_providers/bedrock/llm/eu.anthropic.claude-3-haiku-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/eu.anthropic.claude-3-haiku-v1.yaml
@@ -52,8 +52,6 @@ parameter_rules:
    help:
      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
-  - name: response_format
-    use_template: response_format
 pricing:
  input: '0.00025'
  output: '0.00125'
--- a/api/core/model_runtime/model_providers/bedrock/llm/eu.anthropic.claude-3-sonnet-v1.5.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/eu.anthropic.claude-3-sonnet-v1.5.yaml
@@ -51,8 +51,6 @@ parameter_rules:
    help:
      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
-  - name: response_format
-    use_template: response_format
 pricing:
  input: '0.003'
  output: '0.015'
--- a/api/core/model_runtime/model_providers/bedrock/llm/eu.anthropic.claude-3-sonnet-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/eu.anthropic.claude-3-sonnet-v1.yaml
@@ -51,8 +51,6 @@ parameter_rules:
    help:
      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
-  - name: response_format
-    use_template: response_format
 pricing:
  input: '0.003'
  output: '0.015'
--- a/api/core/model_runtime/model_providers/bedrock/llm/us.anthropic.claude-3-haiku-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/us.anthropic.claude-3-haiku-v1.yaml
@@ -52,8 +52,6 @@ parameter_rules:
    help:
      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
-  - name: response_format
-    use_template: response_format
 pricing:
  input: '0.00025'
  output: '0.00125'
--- a/api/core/model_runtime/model_providers/bedrock/llm/us.anthropic.claude-3-opus-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/us.anthropic.claude-3-opus-v1.yaml
@@ -52,8 +52,6 @@ parameter_rules:
    help:
      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
-  - name: response_format
-    use_template: response_format
 pricing:
  input: '0.015'
  output: '0.075'
--- a/api/core/model_runtime/model_providers/bedrock/llm/us.anthropic.claude-3-sonnet-v1.5.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/us.anthropic.claude-3-sonnet-v1.5.yaml
@@ -51,8 +51,6 @@ parameter_rules:
    help:
      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
-  - name: response_format
-    use_template: response_format
 pricing:
  input: '0.003'
  output: '0.015'
--- a/api/core/model_runtime/model_providers/bedrock/llm/us.anthropic.claude-3-sonnet-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/us.anthropic.claude-3-sonnet-v1.yaml
@@ -51,8 +51,6 @@ parameter_rules:
    help:
      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
-  - name: response_format
-    use_template: response_format
 pricing:
  input: '0.003'
  output: '0.015'
--- a/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
@@ -13,7 +13,7 @@ from botocore.exceptions import (
    UnknownServiceError,
 )

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
@@ -5,7 +5,7 @@ import cohere
 import numpy as np
 from cohere.core import RequestOptions

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py
@@ -5,7 +5,7 @@ from typing import Optional, Union
 import numpy as np
 from openai import OpenAI

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
--- a/api/core/model_runtime/model_providers/groq/groq.yaml
+++ b/api/core/model_runtime/model_providers/groq/groq.yaml
@@ -18,7 +18,6 @@ help:
    en_US: https://console.groq.com/
 supported_model_types:
  - llm
-  - speech2text
 configurate_methods:
  - predefined-model
 provider_credential_schema:
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-vision-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-vision-preview.yaml
@@ -1,26 +0,0 @@
-model: llama-3.2-11b-vision-preview
-label:
-  zh_Hans: Llama 3.2 11B Vision (Preview)
-  en_US: Llama 3.2 11B Vision (Preview)
-model_type: llm
-features:
-  - agent-thought
-  - vision
-model_properties:
-  mode: chat
-  context_size: 131072
-parameter_rules:
-  - name: temperature
-    use_template: temperature
-  - name: top_p
-    use_template: top_p
-  - name: max_tokens
-    use_template: max_tokens
-    default: 512
-    min: 1
-    max: 8192
-pricing:
-  input: '0.05'
-  output: '0.1'
-  unit: '0.000001'
-  currency: USD
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-vision-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-vision-preview.yaml
@@ -1,26 +0,0 @@
-model: llama-3.2-90b-vision-preview
-label:
-  zh_Hans: Llama 3.2 90B Vision (Preview)
-  en_US: Llama 3.2 90B Vision (Preview)
-model_type: llm
-features:
-  - agent-thought
-  - vision
-model_properties:
-  mode: chat
-  context_size: 131072
-parameter_rules:
-  - name: temperature
-    use_template: temperature
-  - name: top_p
-    use_template: top_p
-  - name: max_tokens
-    use_template: max_tokens
-    default: 512
-    min: 1
-    max: 8192
-pricing:
-  input: '0.05'
-  output: '0.1'
-  unit: '0.000001'
-  currency: USD
--- a/api/core/model_runtime/model_providers/groq/speech2text/init.py
+++ b/api/core/model_runtime/model_providers/groq/speech2text/init.py
--- a/api/core/model_runtime/model_providers/groq/speech2text/distil-whisper-large-v3-en.yaml
+++ b/api/core/model_runtime/model_providers/groq/speech2text/distil-whisper-large-v3-en.yaml
@@ -1,5 +0,0 @@
-model: distil-whisper-large-v3-en
-model_type: speech2text
-model_properties:
-  file_upload_limit: 1
-  supported_file_extensions: flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav,webm
--- a/api/core/model_runtime/model_providers/groq/speech2text/speech2text.py
+++ b/api/core/model_runtime/model_providers/groq/speech2text/speech2text.py
@@ -1,30 +0,0 @@
-from typing import IO, Optional
-
-from core.model_runtime.model_providers.openai_api_compatible.speech2text.speech2text import OAICompatSpeech2TextModel
-
-
-class GroqSpeech2TextModel(OAICompatSpeech2TextModel):
-    """
-    Model class for Groq Speech to text model.
-    """
-
-    def _invoke(self, model: str, credentials: dict, file: IO[bytes], user: Optional[str] = None) -> str:
-        """
-        Invoke speech2text model
-
-        :param model: model name
-        :param credentials: model credentials
-        :param file: audio file
-        :param user: unique user id
-        :return: text for given audio file
-        """
-        self._add_custom_parameters(credentials)
-        return super()._invoke(model, credentials, file)
-
-    def validate_credentials(self, model: str, credentials: dict) -> None:
-        self._add_custom_parameters(credentials)
-        return super().validate_credentials(model, credentials)
-
-    @classmethod
-    def _add_custom_parameters(cls, credentials: dict) -> None:
-        credentials["endpoint_url"] = "https://api.groq.com/openai/v1"
--- a/api/core/model_runtime/model_providers/groq/speech2text/whisper-large-v3-turbo.yaml
+++ b/api/core/model_runtime/model_providers/groq/speech2text/whisper-large-v3-turbo.yaml
@@ -1,5 +0,0 @@
-model: whisper-large-v3-turbo
-model_type: speech2text
-model_properties:
-  file_upload_limit: 1
-  supported_file_extensions: flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav,webm
--- a/api/core/model_runtime/model_providers/groq/speech2text/whisper-large-v3.yaml
+++ b/api/core/model_runtime/model_providers/groq/speech2text/whisper-large-v3.yaml
@@ -1,5 +0,0 @@
-model: whisper-large-v3
-model_type: speech2text
-model_properties:
-  file_upload_limit: 1
-  supported_file_extensions: flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav,webm
--- a/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
@@ -6,7 +6,7 @@ import numpy as np
 import requests
 from huggingface_hub import HfApi, InferenceClient

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
--- a/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
@@ -1,7 +1,7 @@
 import time
 from typing import Optional

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
--- a/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
@@ -9,7 +9,7 @@ from tencentcloud.common.profile.client_profile import ClientProfile
 from tencentcloud.common.profile.http_profile import HttpProfile
 from tencentcloud.hunyuan.v20230901 import hunyuan_client, models

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
--- a/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
@@ -4,7 +4,7 @@ from typing import Optional

 from requests import post

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
--- a/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
@@ -5,7 +5,7 @@ from typing import Optional
 from requests import post
 from yarl import URL

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
--- a/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
@@ -4,7 +4,7 @@ from typing import Optional

 from requests import post

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
--- a/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py
@@ -4,7 +4,7 @@ from typing import Optional

 import requests

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
--- a/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py
@@ -5,7 +5,7 @@ from typing import Optional
 from nomic import embed
 from nomic import login as nomic_login

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import (
    EmbeddingUsage,
--- a/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
@@ -4,7 +4,7 @@ from typing import Optional

 from requests import post

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
--- a/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py
@@ -6,7 +6,7 @@ from typing import Optional
 import numpy as np
 import oci

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
--- a/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py
@@ -8,7 +8,7 @@ from urllib.parse import urljoin
 import numpy as np
 import requests

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import (
    AIModelEntity,
--- a/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
@@ -6,7 +6,7 @@ import numpy as np
 import tiktoken
 from openai import OpenAI

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
--- a/api/core/model_runtime/model_providers/openai_api_compatible/openai_api_compatible.yaml
+++ b/api/core/model_runtime/model_providers/openai_api_compatible/openai_api_compatible.yaml
@@ -8,7 +8,6 @@ supported_model_types:
  - llm
  - text-embedding
  - speech2text
-  - rerank
 configurate_methods:
  - customizable-model
 model_credential_schema:
@@ -84,19 +83,6 @@ model_credential_schema:
      placeholder:
        zh_Hans: 在此输入您的模型上下文长度
        en_US: Enter your Model context size
-    - variable: context_size
-      label:
-        zh_Hans: 模型上下文长度
-        en_US: Model context size
-      required: true
-      show_on:
-        - variable: __model_type
-          value: rerank
-      type: text-input
-      default: '4096'
-      placeholder:
-        zh_Hans: 在此输入您的模型上下文长度
-        en_US: Enter your Model context size
    - variable: max_tokens_to_sample
      label:
        zh_Hans: 最大 token 上限
--- a/api/core/model_runtime/model_providers/openai_api_compatible/rerank/init.py
+++ b/api/core/model_runtime/model_providers/openai_api_compatible/rerank/init.py
--- a/api/core/model_runtime/model_providers/openai_api_compatible/rerank/rerank.py
+++ b/api/core/model_runtime/model_providers/openai_api_compatible/rerank/rerank.py
@@ -1,159 +0,0 @@
-from json import dumps
-from typing import Optional
-
-import httpx
-from requests import post
-from yarl import URL
-
-from core.model_runtime.entities.common_entities import I18nObject
-from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType
-from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
-from core.model_runtime.errors.invoke import (
-    InvokeAuthorizationError,
-    InvokeBadRequestError,
-    InvokeConnectionError,
-    InvokeError,
-    InvokeRateLimitError,
-    InvokeServerUnavailableError,
-)
-from core.model_runtime.errors.validate import CredentialsValidateFailedError
-from core.model_runtime.model_providers.__base.rerank_model import RerankModel
-
-
-class OAICompatRerankModel(RerankModel):
-    """
-    rerank model API is compatible with Jina rerank model API. So copy the JinaRerankModel class code here.
-    we need enhance for llama.cpp , which return raw score, not normalize score 0~1.  It seems Dify need it
-    """
-
-    def _invoke(
-        self,
-        model: str,
-        credentials: dict,
-        query: str,
-        docs: list[str],
-        score_threshold: Optional[float] = None,
-        top_n: Optional[int] = None,
-        user: Optional[str] = None,
-    ) -> RerankResult:
-        """
-        Invoke rerank model
-
-        :param model: model name
-        :param credentials: model credentials
-        :param query: search query
-        :param docs: docs for reranking
-        :param score_threshold: score threshold
-        :param top_n: top n documents to return
-        :param user: unique user id
-        :return: rerank result
-        """
-        if len(docs) == 0:
-            return RerankResult(model=model, docs=[])
-
-        server_url = credentials["endpoint_url"]
-        model_name = model
-
-        if not server_url:
-            raise CredentialsValidateFailedError("server_url is required")
-        if not model_name:
-            raise CredentialsValidateFailedError("model_name is required")
-
-        url = server_url
-        headers = {"Authorization": f"Bearer {credentials.get('api_key')}", "Content-Type": "application/json"}
-
-        # TODO: Do we need truncate docs to avoid llama.cpp return error?
-
-        data = {"model": model_name, "query": query, "documents": docs, "top_n": top_n}
-
-        try:
-            response = post(str(URL(url) / "rerank"), headers=headers, data=dumps(data), timeout=60)
-            response.raise_for_status()
-            results = response.json()
-
-            rerank_documents = []
-            scores = [result["relevance_score"] for result in results["results"]]
-
-            # Min-Max Normalization: Normalize scores to 0 ~ 1.0 range
-            min_score = min(scores)
-            max_score = max(scores)
-            score_range = max_score - min_score if max_score != min_score else 1.0  # Avoid division by zero
-
-            for result in results["results"]:
-                index = result["index"]
-
-                # Retrieve document text (fallback if llama.cpp rerank doesn't return it)
-                text = result.get("document", {}).get("text", docs[index])
-
-                # Normalize the score
-                normalized_score = (result["relevance_score"] - min_score) / score_range
-
-                # Create RerankDocument object with normalized score
-                rerank_document = RerankDocument(
-                    index=index,
-                    text=text,
-                    score=normalized_score,
-                )
-
-                # Apply threshold (if defined)
-                if score_threshold is None or normalized_score >= score_threshold:
-                    rerank_documents.append(rerank_document)
-
-            # Sort rerank_documents by normalized score in descending order
-            rerank_documents.sort(key=lambda doc: doc.score, reverse=True)
-
-            return RerankResult(model=model, docs=rerank_documents)
-
-        except httpx.HTTPStatusError as e:
-            raise InvokeServerUnavailableError(str(e))
-
-    def validate_credentials(self, model: str, credentials: dict) -> None:
-        """
-        Validate model credentials
-
-        :param model: model name
-        :param credentials: model credentials
-        :return:
-        """
-        try:
-            self._invoke(
-                model=model,
-                credentials=credentials,
-                query="What is the capital of the United States?",
-                docs=[
-                    "Carson City is the capital city of the American state of Nevada. At the 2010 United States "
-                    "Census, Carson City had a population of 55,274.",
-                    "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that "
-                    "are a political division controlled by the United States. Its capital is Saipan.",
-                ],
-                score_threshold=0.8,
-            )
-        except Exception as ex:
-            raise CredentialsValidateFailedError(str(ex))
-
-    @property
-    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
-        """
-        Map model invoke error to unified error
-        """
-        return {
-            InvokeConnectionError: [httpx.ConnectError],
-            InvokeServerUnavailableError: [httpx.RemoteProtocolError],
-            InvokeRateLimitError: [],
-            InvokeAuthorizationError: [httpx.HTTPStatusError],
-            InvokeBadRequestError: [httpx.RequestError],
-        }
-
-    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
-        """
-        generate custom model entities from credentials
-        """
-        entity = AIModelEntity(
-            model=model,
-            label=I18nObject(en_US=model),
-            model_type=ModelType.RERANK,
-            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
-            model_properties={},
-        )
-
-        return entity
--- a/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py
@@ -7,7 +7,7 @@ from urllib.parse import urljoin
 import numpy as np
 import requests

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import (
    AIModelEntity,
--- a/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py
@@ -5,7 +5,7 @@ from typing import Optional
 from requests import post
 from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
--- a/api/core/model_runtime/model_providers/openrouter/llm/deepseek-chat.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/deepseek-chat.yaml
@@ -35,15 +35,6 @@ parameter_rules:
    help:
      zh_Hans: 控制生成结果的随机性。数值越小，随机性越弱；数值越大，随机性越强。一般而言，top_p 和 temperature 两个参数选择一个进行调整即可。
      en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
    default: 0
--- a/api/core/model_runtime/model_providers/openrouter/llm/deepseek-coder.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/deepseek-coder.yaml
@@ -18,15 +18,6 @@ parameter_rules:
    min: 0
    max: 1
    default: 1
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: max_tokens
    use_template: max_tokens
    min: 1
--- a/api/core/model_runtime/model_providers/openrouter/llm/gpt-3.5-turbo.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gpt-3.5-turbo.yaml
@@ -14,15 +14,6 @@ parameter_rules:
    use_template: temperature
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: presence_penalty
    use_template: presence_penalty
  - name: frequency_penalty
--- a/api/core/model_runtime/model_providers/openrouter/llm/gpt-4-32k.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gpt-4-32k.yaml
@@ -14,15 +14,6 @@ parameter_rules:
    use_template: temperature
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: presence_penalty
    use_template: presence_penalty
  - name: frequency_penalty
--- a/api/core/model_runtime/model_providers/openrouter/llm/gpt-4.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gpt-4.yaml
@@ -14,15 +14,6 @@ parameter_rules:
    use_template: temperature
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: presence_penalty
    use_template: presence_penalty
  - name: frequency_penalty
--- a/api/core/model_runtime/model_providers/openrouter/llm/gpt-4o-2024-08-06.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gpt-4o-2024-08-06.yaml
@@ -16,15 +16,6 @@ parameter_rules:
    use_template: temperature
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: presence_penalty
    use_template: presence_penalty
  - name: frequency_penalty
--- a/api/core/model_runtime/model_providers/openrouter/llm/gpt-4o-mini.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gpt-4o-mini.yaml
@@ -15,15 +15,6 @@ parameter_rules:
    use_template: temperature
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: presence_penalty
    use_template: presence_penalty
  - name: frequency_penalty
--- a/api/core/model_runtime/model_providers/openrouter/llm/gpt-4o.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/gpt-4o.yaml
@@ -15,15 +15,6 @@ parameter_rules:
    use_template: temperature
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: presence_penalty
    use_template: presence_penalty
  - name: frequency_penalty
--- a/api/core/model_runtime/model_providers/openrouter/llm/llama-3-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/llama-3-70b-instruct.yaml
@@ -10,15 +10,6 @@ parameter_rules:
    use_template: temperature
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: max_tokens
    use_template: max_tokens
    required: true
--- a/api/core/model_runtime/model_providers/openrouter/llm/llama-3-8b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/llama-3-8b-instruct.yaml
@@ -10,15 +10,6 @@ parameter_rules:
    use_template: temperature
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: max_tokens
    use_template: max_tokens
    required: true
--- a/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-405b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-405b-instruct.yaml
@@ -10,15 +10,6 @@ parameter_rules:
    use_template: temperature
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: max_tokens
    use_template: max_tokens
    required: true
--- a/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-70b-instruct.yaml
@@ -10,15 +10,6 @@ parameter_rules:
    use_template: temperature
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: max_tokens
    use_template: max_tokens
    required: true
--- a/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-8b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-8b-instruct.yaml
@@ -10,15 +10,6 @@ parameter_rules:
    use_template: temperature
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: max_tokens
    use_template: max_tokens
    required: true
--- a/api/core/model_runtime/model_providers/openrouter/llm/mistral-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/mistral-7b-instruct.yaml
@@ -18,15 +18,6 @@ parameter_rules:
    default: 1
    min: 0
    max: 1
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: max_tokens
    use_template: max_tokens
    default: 1024
--- a/api/core/model_runtime/model_providers/openrouter/llm/mixtral-8x22b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/mixtral-8x22b-instruct.yaml
@@ -18,15 +18,6 @@ parameter_rules:
    default: 1
    min: 0
    max: 1
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: max_tokens
    use_template: max_tokens
    default: 1024
--- a/api/core/model_runtime/model_providers/openrouter/llm/mixtral-8x7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/mixtral-8x7b-instruct.yaml
@@ -19,15 +19,6 @@ parameter_rules:
    default: 1
    min: 0
    max: 1
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: max_tokens
    use_template: max_tokens
    default: 1024
--- a/api/core/model_runtime/model_providers/openrouter/llm/o1-mini.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/o1-mini.yaml
@@ -12,15 +12,6 @@ parameter_rules:
    use_template: temperature
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: presence_penalty
    use_template: presence_penalty
  - name: frequency_penalty
--- a/api/core/model_runtime/model_providers/openrouter/llm/o1-preview.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/o1-preview.yaml
@@ -12,15 +12,6 @@ parameter_rules:
    use_template: temperature
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: presence_penalty
    use_template: presence_penalty
  - name: frequency_penalty
--- a/api/core/model_runtime/model_providers/openrouter/llm/qwen2-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/qwen2-72b-instruct.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/openrouter/llm/qwen2.5-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/qwen2.5-72b-instruct.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
@@ -7,7 +7,7 @@ from urllib.parse import urljoin
 import numpy as np
 import requests

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import (
    AIModelEntity,
--- a/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py
@@ -4,7 +4,7 @@ from typing import Optional

 from replicate import Client as ReplicateClient

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
--- a/api/core/model_runtime/model_providers/sagemaker/speech2text/speech2text.py
+++ b/api/core/model_runtime/model_providers/sagemaker/speech2text/speech2text.py
@@ -14,7 +14,6 @@ from core.model_runtime.errors.invoke import (
    InvokeRateLimitError,
    InvokeServerUnavailableError,
 )
-from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel
 from core.model_runtime.model_providers.sagemaker.sagemaker import generate_presigned_url

@@ -78,8 +77,7 @@ class SageMakerSpeech2TextModel(Speech2TextModel):
            json_obj = json.loads(json_str)
            asr_text = json_obj["text"]
        except Exception as e:
-            logger.exception(f"failed to invoke speech2text model, {e}")
-            raise CredentialsValidateFailedError(str(e))
+            logger.exception(f"Exception {e}, line : {line}")

        return asr_text

--- a/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
@@ -6,7 +6,7 @@ from typing import Any, Optional

 import boto3

-from core.entities.embedding_type import EmbeddingInputType
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
--- a/api/core/model_runtime/model_providers/siliconflow/llm/deepdeek-coder-v2-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepdeek-coder-v2-instruct.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2.5.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2.5.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-27b-it.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-27b-it.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-9b-it.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/gemma-2-9b-it.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/glm4-9b-chat.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/glm4-9b-chat.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-20b-chat.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-20b-chat.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-7b-chat.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/internlm2_5-7b-chat.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-405b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-405b-instruct.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-8b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-8b-instruct.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-1.5b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-1.5b-instruct.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-14b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-14b-instruct.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-32b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-32b-instruct.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-7b-instruct.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-34b-chat.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-34b-chat.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-6b-chat.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/yi-1.5-6b-chat.yaml
@@ -21,15 +21,6 @@ parameter_rules:
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
 pricing:
--- a/Show More
+++ b/Show More