test: avoid leaking tool parameter decrypt mock

refactor(workflow): remove unused workflow package re-export
fix(api): validate plugin upload size using file content
2026-03-14 11:47:05 +00:00 · 2026-03-14 03:56:47 +08:00 · 2026-03-14 02:44:08 +08:00 · 2026-03-14 02:21:03 +08:00 · 2026-03-14 02:07:22 +08:00 · 2026-03-13 23:24:40 +08:00
798 changed files with 88854 additions and 68547 deletions
--- a/.github/actions/setup-web/action.yml
+++ b/.github/actions/setup-web/action.yml
@@ -1,33 +1,13 @@
 name: Setup Web Environment
-description: Setup pnpm, Node.js, and install web dependencies.
-
-inputs:
-  node-version:
-    description: Node.js version to use
-    required: false
-    default: "22"
-  install-dependencies:
-    description: Whether to install web dependencies after setting up Node.js
-    required: false
-    default: "true"

 runs:
  using: composite
  steps:
-    - name: Install pnpm
-      uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0
+    - name: Setup Vite+
+      uses: voidzero-dev/setup-vp@b5d848f5a62488f3d3d920f8aa6ac318a60c5f07 # v1
      with:
-        package_json_file: web/package.json
-        run_install: false
-
-    - name: Setup Node.js
-      uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
-      with:
-        node-version: ${{ inputs.node-version }}
-        cache: pnpm
-        cache-dependency-path: ./web/pnpm-lock.yaml
-
-    - name: Install dependencies
-      if: ${{ inputs.install-dependencies == 'true' }}
-      shell: bash
-      run: pnpm --dir web install --frozen-lockfile
+        node-version-file: "./web/.nvmrc"
+        cache: true
+        run-install: |
+          - cwd: ./web
+            args: ['--frozen-lockfile']
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -3,55 +3,210 @@ version: 2
 updates:
  - package-ecosystem: "pip"
    directory: "/api"
-    open-pull-requests-limit: 2
+    open-pull-requests-limit: 10
    schedule:
      interval: "weekly"
    groups:
-      python-dependencies:
+      flask:
+        patterns:
+          - "flask"
+          - "flask-*"
+          - "werkzeug"
+          - "gunicorn"
+      google:
+        patterns:
+          - "google-*"
+          - "googleapis-*"
+      opentelemetry:
+        patterns:
+          - "opentelemetry-*"
+      pydantic:
+        patterns:
+          - "pydantic"
+          - "pydantic-*"
+      llm:
+        patterns:
+          - "langfuse"
+          - "langsmith"
+          - "litellm"
+          - "mlflow*"
+          - "opik"
+          - "weave*"
+          - "arize*"
+          - "tiktoken"
+          - "transformers"
+      database:
+        patterns:
+          - "sqlalchemy"
+          - "psycopg2*"
+          - "psycogreen"
+          - "redis*"
+          - "alembic*"
+      storage:
+        patterns:
+          - "boto3*"
+          - "botocore*"
+          - "azure-*"
+          - "bce-*"
+          - "cos-python-*"
+          - "esdk-obs-*"
+          - "google-cloud-storage"
+          - "opendal"
+          - "oss2"
+          - "supabase*"
+          - "tos*"
+      vdb:
+        patterns:
+          - "alibabacloud*"
+          - "chromadb"
+          - "clickhouse-*"
+          - "clickzetta-*"
+          - "couchbase"
+          - "elasticsearch"
+          - "opensearch-py"
+          - "oracledb"
+          - "pgvect*"
+          - "pymilvus"
+          - "pymochow"
+          - "pyobvector"
+          - "qdrant-client"
+          - "intersystems-*"
+          - "tablestore"
+          - "tcvectordb"
+          - "tidb-vector"
+          - "upstash-*"
+          - "volcengine-*"
+          - "weaviate-*"
+          - "xinference-*"
+          - "mo-vector"
+          - "mysql-connector-*"
+      dev:
+        patterns:
+          - "coverage"
+          - "dotenv-linter"
+          - "faker"
+          - "lxml-stubs"
+          - "basedpyright"
+          - "ruff"
+          - "pytest*"
+          - "types-*"
+          - "boto3-stubs"
+          - "hypothesis"
+          - "pandas-stubs"
+          - "scipy-stubs"
+          - "import-linter"
+          - "celery-types"
+          - "mypy*"
+          - "pyrefly"
+      python-packages:
        patterns:
          - "*"
  - package-ecosystem: "uv"
    directory: "/api"
-    open-pull-requests-limit: 2
+    open-pull-requests-limit: 10
    schedule:
      interval: "weekly"
    groups:
-      uv-dependencies:
+      flask:
+        patterns:
+          - "flask"
+          - "flask-*"
+          - "werkzeug"
+          - "gunicorn"
+      google:
+        patterns:
+          - "google-*"
+          - "googleapis-*"
+      opentelemetry:
+        patterns:
+          - "opentelemetry-*"
+      pydantic:
+        patterns:
+          - "pydantic"
+          - "pydantic-*"
+      llm:
+        patterns:
+          - "langfuse"
+          - "langsmith"
+          - "litellm"
+          - "mlflow*"
+          - "opik"
+          - "weave*"
+          - "arize*"
+          - "tiktoken"
+          - "transformers"
+      database:
+        patterns:
+          - "sqlalchemy"
+          - "psycopg2*"
+          - "psycogreen"
+          - "redis*"
+          - "alembic*"
+      storage:
+        patterns:
+          - "boto3*"
+          - "botocore*"
+          - "azure-*"
+          - "bce-*"
+          - "cos-python-*"
+          - "esdk-obs-*"
+          - "google-cloud-storage"
+          - "opendal"
+          - "oss2"
+          - "supabase*"
+          - "tos*"
+      vdb:
+        patterns:
+          - "alibabacloud*"
+          - "chromadb"
+          - "clickhouse-*"
+          - "clickzetta-*"
+          - "couchbase"
+          - "elasticsearch"
+          - "opensearch-py"
+          - "oracledb"
+          - "pgvect*"
+          - "pymilvus"
+          - "pymochow"
+          - "pyobvector"
+          - "qdrant-client"
+          - "intersystems-*"
+          - "tablestore"
+          - "tcvectordb"
+          - "tidb-vector"
+          - "upstash-*"
+          - "volcengine-*"
+          - "weaviate-*"
+          - "xinference-*"
+          - "mo-vector"
+          - "mysql-connector-*"
+      dev:
+        patterns:
+          - "coverage"
+          - "dotenv-linter"
+          - "faker"
+          - "lxml-stubs"
+          - "basedpyright"
+          - "ruff"
+          - "pytest*"
+          - "types-*"
+          - "boto3-stubs"
+          - "hypothesis"
+          - "pandas-stubs"
+          - "scipy-stubs"
+          - "import-linter"
+          - "celery-types"
+          - "mypy*"
+          - "pyrefly"
+      python-packages:
        patterns:
          - "*"
-  - package-ecosystem: "npm"
-    directory: "/web"
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    open-pull-requests-limit: 5
    schedule:
      interval: "weekly"
-    open-pull-requests-limit: 2
-    ignore:
-      - dependency-name: "ky"
-      - dependency-name: "tailwind-merge"
-        update-types: ["version-update:semver-major"]
-      - dependency-name: "tailwindcss"
-        update-types: ["version-update:semver-major"]
-      - dependency-name: "react-syntax-highlighter"
-        update-types: ["version-update:semver-major"]
-      - dependency-name: "react-window"
-        update-types: ["version-update:semver-major"]
    groups:
-      lexical:
-        patterns:
-          - "lexical"
-          - "@lexical/*"
-      storybook:
-        patterns:
-          - "storybook"
-          - "@storybook/*"
-      eslint-group:
-        patterns:
-          - "*eslint*"
-      npm-dependencies:
+      github-actions-dependencies:
        patterns:
          - "*"
-        exclude-patterns:
-          - "lexical"
-          - "@lexical/*"
-          - "storybook"
-          - "@storybook/*"
-          - "*eslint*"
--- a/.github/workflows/anti-slop.yml
+++ b/.github/workflows/anti-slop.yml
@@ -15,3 +15,5 @@ jobs:
      - uses: peakoss/anti-slop@v0
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
+          close-pr: false
+          failure-add-pr-labels: "needs-revision"
--- a/.github/workflows/api-tests.yml
+++ b/.github/workflows/api-tests.yml
@@ -27,7 +27,7 @@ jobs:
          persist-credentials: false

      - name: Setup UV and Python
-        uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
+        uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0
        with:
          enable-cache: true
          python-version: ${{ matrix.python-version }}
--- a/.github/workflows/autofix.yml
+++ b/.github/workflows/autofix.yml
@@ -23,11 +23,23 @@ jobs:
            docker/.env.example
            docker/docker-compose-template.yaml
            docker/docker-compose.yaml
+      - name: Check web inputs
+        id: web-changes
+        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        with:
+          files: |
+            web/**
+      - name: Check api inputs
+        id: api-changes
+        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        with:
+          files: |
+            api/**
      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: "3.11"

-      - uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
+      - uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0

      - name: Generate Docker Compose
        if: steps.docker-compose-changes.outputs.any_changed == 'true'
@@ -35,7 +47,8 @@ jobs:
          cd docker
          ./generate_docker_compose

-      - run: |
+      - if: steps.api-changes.outputs.any_changed == 'true'
+        run: |
          cd api
          uv sync --dev
          # fmt first to avoid line too long
@@ -46,11 +59,13 @@ jobs:
          uv run ruff format ..

      - name: count migration progress
+        if: steps.api-changes.outputs.any_changed == 'true'
        run: |
          cd api
          ./cnt_base.sh

      - name: ast-grep
+        if: steps.api-changes.outputs.any_changed == 'true'
        run: |
          # ast-grep exits 1 if no matches are found; allow idempotent runs.
          uvx --from ast-grep-cli ast-grep --pattern 'db.session.query($WHATEVER).filter($HERE)' --rewrite 'db.session.query($WHATEVER).where($HERE)' -l py --update-all || true
@@ -85,13 +100,13 @@ jobs:
          uvx --python 3.13 mdformat . --exclude ".agents/skills/**"

      - name: Setup web environment
+        if: steps.web-changes.outputs.any_changed == 'true'
        uses: ./.github/actions/setup-web
-        with:
-          node-version: "24"

      - name: ESLint autofix
+        if: steps.web-changes.outputs.any_changed == 'true'
        run: |
          cd web
-          pnpm eslint --concurrency=2 --prune-suppressions
+          vp exec eslint --concurrency=2 --prune-suppressions --quiet || true

      - uses: autofix-ci/action@7a166d7532b277f34e16238930461bf77f9d7ed8 # v1.3.3
--- a/.github/workflows/db-migration-test.yml
+++ b/.github/workflows/db-migration-test.yml
@@ -19,7 +19,7 @@ jobs:
          persist-credentials: false

      - name: Setup UV and Python
-        uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
+        uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0
        with:
          enable-cache: true
          python-version: "3.12"
@@ -69,7 +69,7 @@ jobs:
          persist-credentials: false

      - name: Setup UV and Python
-        uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
+        uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0
        with:
          enable-cache: true
          python-version: "3.12"
--- a/.github/workflows/main-ci.yml
+++ b/.github/workflows/main-ci.yml
@@ -62,6 +62,9 @@ jobs:
    needs: check-changes
    if: needs.check-changes.outputs.web-changed == 'true'
    uses: ./.github/workflows/web-tests.yml
+    with:
+      base_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.base.sha || github.event.before }}
+      head_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}

  style-check:
    name: Style Check
--- a/.github/workflows/pyrefly-diff.yml
+++ b/.github/workflows/pyrefly-diff.yml
@@ -22,7 +22,7 @@ jobs:
          fetch-depth: 0

      - name: Setup Python & UV
-        uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
+        uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0
        with:
          enable-cache: true

--- a/.github/workflows/style.yml
+++ b/.github/workflows/style.yml
@@ -33,7 +33,7 @@ jobs:

      - name: Setup UV and Python
        if: steps.changed-files.outputs.any_changed == 'true'
-        uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
+        uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0
        with:
          enable-cache: false
          python-version: "3.12"
@@ -88,7 +88,7 @@ jobs:
        if: steps.changed-files.outputs.any_changed == 'true'
        working-directory: ./web
        run: |
-          pnpm run lint:ci
+          vp run lint:ci
        # pnpm run lint:report
        # continue-on-error: true

@@ -102,17 +102,17 @@ jobs:
      - name: Web tsslint
        if: steps.changed-files.outputs.any_changed == 'true'
        working-directory: ./web
-        run: pnpm run lint:tss
+        run: vp run lint:tss

      - name: Web type check
        if: steps.changed-files.outputs.any_changed == 'true'
        working-directory: ./web
-        run: pnpm run type-check
+        run: vp run type-check

      - name: Web dead code check
        if: steps.changed-files.outputs.any_changed == 'true'
        working-directory: ./web
-        run: pnpm run knip
+        run: vp run knip

  superlinter:
    name: SuperLinter
--- a/.github/workflows/translate-i18n-claude.yml
+++ b/.github/workflows/translate-i18n-claude.yml
@@ -50,8 +50,6 @@ jobs:

      - name: Setup web environment
        uses: ./.github/actions/setup-web
-        with:
-          install-dependencies: "false"

      - name: Detect changed files and generate diff
        id: detect_changes
--- a/.github/workflows/vdb-tests.yml
+++ b/.github/workflows/vdb-tests.yml
@@ -31,7 +31,7 @@ jobs:
          remove_tool_cache: true

      - name: Setup UV and Python
-        uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
+        uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0
        with:
          enable-cache: true
          python-version: ${{ matrix.python-version }}
--- a/.github/workflows/web-tests.yml
+++ b/.github/workflows/web-tests.yml
@@ -2,6 +2,13 @@ name: Web Tests

 on:
  workflow_call:
+    inputs:
+      base_sha:
+        required: false
+        type: string
+      head_sha:
+        required: false
+        type: string

 permissions:
  contents: read
@@ -14,6 +21,8 @@ jobs:
  test:
    name: Web Tests (${{ matrix.shardIndex }}/${{ matrix.shardTotal }})
    runs-on: ubuntu-latest
+    env:
+      VITEST_COVERAGE_SCOPE: app-components
    strategy:
      fail-fast: false
      matrix:
@@ -34,7 +43,7 @@ jobs:
        uses: ./.github/actions/setup-web

      - name: Run tests
-        run: pnpm vitest run --reporter=blob --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }} --coverage
+        run: vp test run --reporter=blob --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }} --coverage

      - name: Upload blob report
        if: ${{ !cancelled() }}
@@ -50,6 +59,8 @@ jobs:
    if: ${{ !cancelled() }}
    needs: [test]
    runs-on: ubuntu-latest
+    env:
+      VITEST_COVERAGE_SCOPE: app-components
    defaults:
      run:
        shell: bash
@@ -59,6 +70,7 @@ jobs:
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
+          fetch-depth: 0
          persist-credentials: false

      - name: Setup web environment
@@ -72,7 +84,13 @@ jobs:
          merge-multiple: true

      - name: Merge reports
-        run: pnpm vitest --merge-reports --coverage --silent=passed-only
+        run: vp test --merge-reports --reporter=json --reporter=agent --coverage
+
+      - name: Check app/components diff coverage
+        env:
+          BASE_SHA: ${{ inputs.base_sha }}
+          HEAD_SHA: ${{ inputs.head_sha }}
+        run: node ./scripts/check-components-diff-coverage.mjs

      - name: Coverage Summary
        if: always()
@@ -429,4 +447,4 @@ jobs:
      - name: Web build check
        if: steps.changed-files.outputs.any_changed == 'true'
        working-directory: ./web
-        run: pnpm run build
+        run: vp run build
--- a/api/.env.example
+++ b/api/.env.example
@@ -188,7 +188,6 @@ VECTOR_INDEX_NAME_PREFIX=Vector_index
 # Weaviate configuration
 WEAVIATE_ENDPOINT=http://localhost:8080
 WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih
-WEAVIATE_GRPC_ENABLED=false
 WEAVIATE_BATCH_SIZE=100
 WEAVIATE_TOKENIZATION=word

--- a/api/.importlinter
+++ b/api/.importlinter
@@ -43,7 +43,6 @@ forbidden_modules =
    extensions.ext_redis
 allow_indirect_imports = True
 ignore_imports =
-    dify_graph.nodes.agent.agent_node -> extensions.ext_database
    dify_graph.nodes.llm.node -> extensions.ext_database
    dify_graph.model_runtime.model_providers.__base.ai_model -> extensions.ext_redis
    dify_graph.model_runtime.model_providers.model_provider_factory -> extensions.ext_redis
@@ -90,9 +89,6 @@ forbidden_modules =
    core.trigger
    core.variables
 ignore_imports =
-    dify_graph.nodes.agent.agent_node -> core.model_manager
-    dify_graph.nodes.agent.agent_node -> core.provider_manager
-    dify_graph.nodes.agent.agent_node -> core.tools.tool_manager
    dify_graph.nodes.llm.llm_utils -> core.model_manager
    dify_graph.nodes.llm.protocols -> core.model_manager
    dify_graph.nodes.llm.llm_utils -> dify_graph.model_runtime.model_providers.__base.large_language_model
@@ -100,8 +96,6 @@ ignore_imports =
    dify_graph.nodes.tool.tool_node -> core.callback_handler.workflow_tool_callback_handler
    dify_graph.nodes.tool.tool_node -> core.tools.tool_engine
    dify_graph.nodes.tool.tool_node -> core.tools.tool_manager
-    dify_graph.nodes.agent.agent_node -> core.agent.entities
-    dify_graph.nodes.agent.agent_node -> core.agent.plugin_entities
    dify_graph.nodes.knowledge_retrieval.knowledge_retrieval_node -> core.app.app_config.entities
    dify_graph.nodes.parameter_extractor.parameter_extractor_node -> core.prompt.advanced_prompt_transform
    dify_graph.nodes.parameter_extractor.parameter_extractor_node -> core.prompt.simple_prompt_transform
@@ -110,12 +104,10 @@ ignore_imports =
    dify_graph.nodes.parameter_extractor.parameter_extractor_node -> core.model_manager
    dify_graph.nodes.question_classifier.question_classifier_node -> core.model_manager
    dify_graph.nodes.tool.tool_node -> core.tools.utils.message_transformer
-    dify_graph.nodes.agent.agent_node -> models.model
    dify_graph.nodes.llm.node -> core.helper.code_executor
    dify_graph.nodes.llm.node -> core.llm_generator.output_parser.errors
    dify_graph.nodes.llm.node -> core.llm_generator.output_parser.structured_output
    dify_graph.nodes.llm.node -> core.model_manager
-    dify_graph.nodes.agent.entities -> core.prompt.entities.advanced_prompt_entities
    dify_graph.nodes.llm.entities -> core.prompt.entities.advanced_prompt_entities
    dify_graph.nodes.llm.node -> core.prompt.entities.advanced_prompt_entities
    dify_graph.nodes.llm.node -> core.prompt.utils.prompt_message_util
@@ -126,15 +118,11 @@ ignore_imports =
    dify_graph.nodes.question_classifier.question_classifier_node -> core.prompt.utils.prompt_message_util
    dify_graph.nodes.knowledge_index.entities -> core.rag.retrieval.retrieval_methods
    dify_graph.nodes.llm.node -> models.dataset
-    dify_graph.nodes.agent.agent_node -> core.tools.utils.message_transformer
    dify_graph.nodes.llm.file_saver -> core.tools.signature
    dify_graph.nodes.llm.file_saver -> core.tools.tool_file_manager
    dify_graph.nodes.tool.tool_node -> core.tools.errors
-    dify_graph.nodes.agent.agent_node -> extensions.ext_database
    dify_graph.nodes.llm.node -> extensions.ext_database
-    dify_graph.nodes.agent.agent_node -> models
    dify_graph.nodes.llm.node -> models.model
-    dify_graph.nodes.agent.agent_node -> services
    dify_graph.nodes.tool.tool_node -> services
    dify_graph.model_runtime.model_providers.__base.ai_model -> configs
    dify_graph.model_runtime.model_providers.__base.ai_model -> extensions.ext_redis
--- a/api/commands.py
+++ b/api/commands.py
--- a/api/commands/init.py
+++ b/api/commands/init.py
@@ -0,0 +1,71 @@
+"""
+CLI command modules extracted from `commands.py`.
+"""
+
+from .account import create_tenant, reset_email, reset_password
+from .plugin import (
+    extract_plugins,
+    extract_unique_plugins,
+    install_plugins,
+    install_rag_pipeline_plugins,
+    migrate_data_for_plugin,
+    setup_datasource_oauth_client,
+    setup_system_tool_oauth_client,
+    setup_system_trigger_oauth_client,
+    transform_datasource_credentials,
+)
+from .retention import (
+    archive_workflow_runs,
+    clean_expired_messages,
+    clean_workflow_runs,
+    cleanup_orphaned_draft_variables,
+    clear_free_plan_tenant_expired_logs,
+    delete_archived_workflow_runs,
+    export_app_messages,
+    restore_workflow_runs,
+)
+from .storage import clear_orphaned_file_records, file_usage, migrate_oss, remove_orphaned_files_on_storage
+from .system import convert_to_agent_apps, fix_app_site_missing, reset_encrypt_key_pair, upgrade_db
+from .vector import (
+    add_qdrant_index,
+    migrate_annotation_vector_database,
+    migrate_knowledge_vector_database,
+    old_metadata_migration,
+    vdb_migrate,
+)
+
+__all__ = [
+    "add_qdrant_index",
+    "archive_workflow_runs",
+    "clean_expired_messages",
+    "clean_workflow_runs",
+    "cleanup_orphaned_draft_variables",
+    "clear_free_plan_tenant_expired_logs",
+    "clear_orphaned_file_records",
+    "convert_to_agent_apps",
+    "create_tenant",
+    "delete_archived_workflow_runs",
+    "export_app_messages",
+    "extract_plugins",
+    "extract_unique_plugins",
+    "file_usage",
+    "fix_app_site_missing",
+    "install_plugins",
+    "install_rag_pipeline_plugins",
+    "migrate_annotation_vector_database",
+    "migrate_data_for_plugin",
+    "migrate_knowledge_vector_database",
+    "migrate_oss",
+    "old_metadata_migration",
+    "remove_orphaned_files_on_storage",
+    "reset_email",
+    "reset_encrypt_key_pair",
+    "reset_password",
+    "restore_workflow_runs",
+    "setup_datasource_oauth_client",
+    "setup_system_tool_oauth_client",
+    "setup_system_trigger_oauth_client",
+    "transform_datasource_credentials",
+    "upgrade_db",
+    "vdb_migrate",
+]
--- a/api/commands/account.py
+++ b/api/commands/account.py
@@ -0,0 +1,130 @@
+import base64
+import secrets
+
+import click
+from sqlalchemy.orm import sessionmaker
+
+from constants.languages import languages
+from extensions.ext_database import db
+from libs.helper import email as email_validate
+from libs.password import hash_password, password_pattern, valid_password
+from services.account_service import AccountService, RegisterService, TenantService
+
+
+@click.command("reset-password", help="Reset the account password.")
+@click.option("--email", prompt=True, help="Account email to reset password for")
+@click.option("--new-password", prompt=True, help="New password")
+@click.option("--password-confirm", prompt=True, help="Confirm new password")
+def reset_password(email, new_password, password_confirm):
+    """
+    Reset password of owner account
+    Only available in SELF_HOSTED mode
+    """
+    if str(new_password).strip() != str(password_confirm).strip():
+        click.echo(click.style("Passwords do not match.", fg="red"))
+        return
+    normalized_email = email.strip().lower()
+
+    with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
+        account = AccountService.get_account_by_email_with_case_fallback(email.strip(), session=session)
+
+        if not account:
+            click.echo(click.style(f"Account not found for email: {email}", fg="red"))
+            return
+
+        try:
+            valid_password(new_password)
+        except:
+            click.echo(click.style(f"Invalid password. Must match {password_pattern}", fg="red"))
+            return
+
+        # generate password salt
+        salt = secrets.token_bytes(16)
+        base64_salt = base64.b64encode(salt).decode()
+
+        # encrypt password with salt
+        password_hashed = hash_password(new_password, salt)
+        base64_password_hashed = base64.b64encode(password_hashed).decode()
+        account.password = base64_password_hashed
+        account.password_salt = base64_salt
+        AccountService.reset_login_error_rate_limit(normalized_email)
+        click.echo(click.style("Password reset successfully.", fg="green"))
+
+
+@click.command("reset-email", help="Reset the account email.")
+@click.option("--email", prompt=True, help="Current account email")
+@click.option("--new-email", prompt=True, help="New email")
+@click.option("--email-confirm", prompt=True, help="Confirm new email")
+def reset_email(email, new_email, email_confirm):
+    """
+    Replace account email
+    :return:
+    """
+    if str(new_email).strip() != str(email_confirm).strip():
+        click.echo(click.style("New emails do not match.", fg="red"))
+        return
+    normalized_new_email = new_email.strip().lower()
+
+    with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
+        account = AccountService.get_account_by_email_with_case_fallback(email.strip(), session=session)
+
+        if not account:
+            click.echo(click.style(f"Account not found for email: {email}", fg="red"))
+            return
+
+        try:
+            email_validate(normalized_new_email)
+        except:
+            click.echo(click.style(f"Invalid email: {new_email}", fg="red"))
+            return
+
+        account.email = normalized_new_email
+        click.echo(click.style("Email updated successfully.", fg="green"))
+
+
+@click.command("create-tenant", help="Create account and tenant.")
+@click.option("--email", prompt=True, help="Tenant account email.")
+@click.option("--name", prompt=True, help="Workspace name.")
+@click.option("--language", prompt=True, help="Account language, default: en-US.")
+def create_tenant(email: str, language: str | None = None, name: str | None = None):
+    """
+    Create tenant account
+    """
+    if not email:
+        click.echo(click.style("Email is required.", fg="red"))
+        return
+
+    # Create account
+    email = email.strip().lower()
+
+    if "@" not in email:
+        click.echo(click.style("Invalid email address.", fg="red"))
+        return
+
+    account_name = email.split("@")[0]
+
+    if language not in languages:
+        language = "en-US"
+
+    # Validates name encoding for non-Latin characters.
+    name = name.strip().encode("utf-8").decode("utf-8") if name else None
+
+    # generate random password
+    new_password = secrets.token_urlsafe(16)
+
+    # register account
+    account = RegisterService.register(
+        email=email,
+        name=account_name,
+        password=new_password,
+        language=language,
+        create_workspace_required=False,
+    )
+    TenantService.create_owner_tenant_if_not_exist(account, name)
+
+    click.echo(
+        click.style(
+            f"Account and tenant created.\nAccount: {email}\nPassword: {new_password}",
+            fg="green",
+        )
+    )
--- a/api/commands/plugin.py
+++ b/api/commands/plugin.py
@@ -0,0 +1,467 @@
+import json
+import logging
+from typing import Any
+
+import click
+from pydantic import TypeAdapter
+
+from configs import dify_config
+from core.helper import encrypter
+from core.plugin.entities.plugin_daemon import CredentialType
+from core.plugin.impl.plugin import PluginInstaller
+from core.tools.utils.system_oauth_encryption import encrypt_system_oauth_params
+from extensions.ext_database import db
+from models import Tenant
+from models.oauth import DatasourceOauthParamConfig, DatasourceProvider
+from models.provider_ids import DatasourceProviderID, ToolProviderID
+from models.source import DataSourceApiKeyAuthBinding, DataSourceOauthBinding
+from models.tools import ToolOAuthSystemClient
+from services.plugin.data_migration import PluginDataMigration
+from services.plugin.plugin_migration import PluginMigration
+from services.plugin.plugin_service import PluginService
+
+logger = logging.getLogger(__name__)
+
+
+@click.command("setup-system-tool-oauth-client", help="Setup system tool oauth client.")
+@click.option("--provider", prompt=True, help="Provider name")
+@click.option("--client-params", prompt=True, help="Client Params")
+def setup_system_tool_oauth_client(provider, client_params):
+    """
+    Setup system tool oauth client
+    """
+    provider_id = ToolProviderID(provider)
+    provider_name = provider_id.provider_name
+    plugin_id = provider_id.plugin_id
+
+    try:
+        # json validate
+        click.echo(click.style(f"Validating client params: {client_params}", fg="yellow"))
+        client_params_dict = TypeAdapter(dict[str, Any]).validate_json(client_params)
+        click.echo(click.style("Client params validated successfully.", fg="green"))
+
+        click.echo(click.style(f"Encrypting client params: {client_params}", fg="yellow"))
+        click.echo(click.style(f"Using SECRET_KEY: `{dify_config.SECRET_KEY}`", fg="yellow"))
+        oauth_client_params = encrypt_system_oauth_params(client_params_dict)
+        click.echo(click.style("Client params encrypted successfully.", fg="green"))
+    except Exception as e:
+        click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red"))
+        return
+
+    deleted_count = (
+        db.session.query(ToolOAuthSystemClient)
+        .filter_by(
+            provider=provider_name,
+            plugin_id=plugin_id,
+        )
+        .delete()
+    )
+    if deleted_count > 0:
+        click.echo(click.style(f"Deleted {deleted_count} existing oauth client params.", fg="yellow"))
+
+    oauth_client = ToolOAuthSystemClient(
+        provider=provider_name,
+        plugin_id=plugin_id,
+        encrypted_oauth_params=oauth_client_params,
+    )
+    db.session.add(oauth_client)
+    db.session.commit()
+    click.echo(click.style(f"OAuth client params setup successfully. id: {oauth_client.id}", fg="green"))
+
+
+@click.command("setup-system-trigger-oauth-client", help="Setup system trigger oauth client.")
+@click.option("--provider", prompt=True, help="Provider name")
+@click.option("--client-params", prompt=True, help="Client Params")
+def setup_system_trigger_oauth_client(provider, client_params):
+    """
+    Setup system trigger oauth client
+    """
+    from models.provider_ids import TriggerProviderID
+    from models.trigger import TriggerOAuthSystemClient
+
+    provider_id = TriggerProviderID(provider)
+    provider_name = provider_id.provider_name
+    plugin_id = provider_id.plugin_id
+
+    try:
+        # json validate
+        click.echo(click.style(f"Validating client params: {client_params}", fg="yellow"))
+        client_params_dict = TypeAdapter(dict[str, Any]).validate_json(client_params)
+        click.echo(click.style("Client params validated successfully.", fg="green"))
+
+        click.echo(click.style(f"Encrypting client params: {client_params}", fg="yellow"))
+        click.echo(click.style(f"Using SECRET_KEY: `{dify_config.SECRET_KEY}`", fg="yellow"))
+        oauth_client_params = encrypt_system_oauth_params(client_params_dict)
+        click.echo(click.style("Client params encrypted successfully.", fg="green"))
+    except Exception as e:
+        click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red"))
+        return
+
+    deleted_count = (
+        db.session.query(TriggerOAuthSystemClient)
+        .filter_by(
+            provider=provider_name,
+            plugin_id=plugin_id,
+        )
+        .delete()
+    )
+    if deleted_count > 0:
+        click.echo(click.style(f"Deleted {deleted_count} existing oauth client params.", fg="yellow"))
+
+    oauth_client = TriggerOAuthSystemClient(
+        provider=provider_name,
+        plugin_id=plugin_id,
+        encrypted_oauth_params=oauth_client_params,
+    )
+    db.session.add(oauth_client)
+    db.session.commit()
+    click.echo(click.style(f"OAuth client params setup successfully. id: {oauth_client.id}", fg="green"))
+
+
+@click.command("setup-datasource-oauth-client", help="Setup datasource oauth client.")
+@click.option("--provider", prompt=True, help="Provider name")
+@click.option("--client-params", prompt=True, help="Client Params")
+def setup_datasource_oauth_client(provider, client_params):
+    """
+    Setup datasource oauth client
+    """
+    provider_id = DatasourceProviderID(provider)
+    provider_name = provider_id.provider_name
+    plugin_id = provider_id.plugin_id
+
+    try:
+        # json validate
+        click.echo(click.style(f"Validating client params: {client_params}", fg="yellow"))
+        client_params_dict = TypeAdapter(dict[str, Any]).validate_json(client_params)
+        click.echo(click.style("Client params validated successfully.", fg="green"))
+    except Exception as e:
+        click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red"))
+        return
+
+    click.echo(click.style(f"Ready to delete existing oauth client params: {provider_name}", fg="yellow"))
+    deleted_count = (
+        db.session.query(DatasourceOauthParamConfig)
+        .filter_by(
+            provider=provider_name,
+            plugin_id=plugin_id,
+        )
+        .delete()
+    )
+    if deleted_count > 0:
+        click.echo(click.style(f"Deleted {deleted_count} existing oauth client params.", fg="yellow"))
+
+    click.echo(click.style(f"Ready to setup datasource oauth client: {provider_name}", fg="yellow"))
+    oauth_client = DatasourceOauthParamConfig(
+        provider=provider_name,
+        plugin_id=plugin_id,
+        system_credentials=client_params_dict,
+    )
+    db.session.add(oauth_client)
+    db.session.commit()
+    click.echo(click.style(f"provider: {provider_name}", fg="green"))
+    click.echo(click.style(f"plugin_id: {plugin_id}", fg="green"))
+    click.echo(click.style(f"params: {json.dumps(client_params_dict, indent=2, ensure_ascii=False)}", fg="green"))
+    click.echo(click.style(f"Datasource oauth client setup successfully. id: {oauth_client.id}", fg="green"))
+
+
+@click.command("transform-datasource-credentials", help="Transform datasource credentials.")
+@click.option(
+    "--environment", prompt=True, help="the environment to transform datasource credentials", default="online"
+)
+def transform_datasource_credentials(environment: str):
+    """
+    Transform datasource credentials
+    """
+    try:
+        installer_manager = PluginInstaller()
+        plugin_migration = PluginMigration()
+
+        notion_plugin_id = "langgenius/notion_datasource"
+        firecrawl_plugin_id = "langgenius/firecrawl_datasource"
+        jina_plugin_id = "langgenius/jina_datasource"
+        if environment == "online":
+            notion_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(notion_plugin_id)  # pyright: ignore[reportPrivateUsage]
+            firecrawl_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(firecrawl_plugin_id)  # pyright: ignore[reportPrivateUsage]
+            jina_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(jina_plugin_id)  # pyright: ignore[reportPrivateUsage]
+        else:
+            notion_plugin_unique_identifier = None
+            firecrawl_plugin_unique_identifier = None
+            jina_plugin_unique_identifier = None
+        oauth_credential_type = CredentialType.OAUTH2
+        api_key_credential_type = CredentialType.API_KEY
+
+        # deal notion credentials
+        deal_notion_count = 0
+        notion_credentials = db.session.query(DataSourceOauthBinding).filter_by(provider="notion").all()
+        if notion_credentials:
+            notion_credentials_tenant_mapping: dict[str, list[DataSourceOauthBinding]] = {}
+            for notion_credential in notion_credentials:
+                tenant_id = notion_credential.tenant_id
+                if tenant_id not in notion_credentials_tenant_mapping:
+                    notion_credentials_tenant_mapping[tenant_id] = []
+                notion_credentials_tenant_mapping[tenant_id].append(notion_credential)
+            for tenant_id, notion_tenant_credentials in notion_credentials_tenant_mapping.items():
+                tenant = db.session.query(Tenant).filter_by(id=tenant_id).first()
+                if not tenant:
+                    continue
+                try:
+                    # check notion plugin is installed
+                    installed_plugins = installer_manager.list_plugins(tenant_id)
+                    installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins]
+                    if notion_plugin_id not in installed_plugins_ids:
+                        if notion_plugin_unique_identifier:
+                            # install notion plugin
+                            PluginService.install_from_marketplace_pkg(tenant_id, [notion_plugin_unique_identifier])
+                    auth_count = 0
+                    for notion_tenant_credential in notion_tenant_credentials:
+                        auth_count += 1
+                        # get credential oauth params
+                        access_token = notion_tenant_credential.access_token
+                        # notion info
+                        notion_info = notion_tenant_credential.source_info
+                        workspace_id = notion_info.get("workspace_id")
+                        workspace_name = notion_info.get("workspace_name")
+                        workspace_icon = notion_info.get("workspace_icon")
+                        new_credentials = {
+                            "integration_secret": encrypter.encrypt_token(tenant_id, access_token),
+                            "workspace_id": workspace_id,
+                            "workspace_name": workspace_name,
+                            "workspace_icon": workspace_icon,
+                        }
+                        datasource_provider = DatasourceProvider(
+                            provider="notion_datasource",
+                            tenant_id=tenant_id,
+                            plugin_id=notion_plugin_id,
+                            auth_type=oauth_credential_type.value,
+                            encrypted_credentials=new_credentials,
+                            name=f"Auth {auth_count}",
+                            avatar_url=workspace_icon or "default",
+                            is_default=False,
+                        )
+                        db.session.add(datasource_provider)
+                        deal_notion_count += 1
+                except Exception as e:
+                    click.echo(
+                        click.style(
+                            f"Error transforming notion credentials: {str(e)}, tenant_id: {tenant_id}", fg="red"
+                        )
+                    )
+                    continue
+                db.session.commit()
+        # deal firecrawl credentials
+        deal_firecrawl_count = 0
+        firecrawl_credentials = db.session.query(DataSourceApiKeyAuthBinding).filter_by(provider="firecrawl").all()
+        if firecrawl_credentials:
+            firecrawl_credentials_tenant_mapping: dict[str, list[DataSourceApiKeyAuthBinding]] = {}
+            for firecrawl_credential in firecrawl_credentials:
+                tenant_id = firecrawl_credential.tenant_id
+                if tenant_id not in firecrawl_credentials_tenant_mapping:
+                    firecrawl_credentials_tenant_mapping[tenant_id] = []
+                firecrawl_credentials_tenant_mapping[tenant_id].append(firecrawl_credential)
+            for tenant_id, firecrawl_tenant_credentials in firecrawl_credentials_tenant_mapping.items():
+                tenant = db.session.query(Tenant).filter_by(id=tenant_id).first()
+                if not tenant:
+                    continue
+                try:
+                    # check firecrawl plugin is installed
+                    installed_plugins = installer_manager.list_plugins(tenant_id)
+                    installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins]
+                    if firecrawl_plugin_id not in installed_plugins_ids:
+                        if firecrawl_plugin_unique_identifier:
+                            # install firecrawl plugin
+                            PluginService.install_from_marketplace_pkg(tenant_id, [firecrawl_plugin_unique_identifier])
+
+                    auth_count = 0
+                    for firecrawl_tenant_credential in firecrawl_tenant_credentials:
+                        auth_count += 1
+                        if not firecrawl_tenant_credential.credentials:
+                            click.echo(
+                                click.style(
+                                    f"Skipping firecrawl credential for tenant {tenant_id} due to missing credentials.",
+                                    fg="yellow",
+                                )
+                            )
+                            continue
+                        # get credential api key
+                        credentials_json = json.loads(firecrawl_tenant_credential.credentials)
+                        api_key = credentials_json.get("config", {}).get("api_key")
+                        base_url = credentials_json.get("config", {}).get("base_url")
+                        new_credentials = {
+                            "firecrawl_api_key": api_key,
+                            "base_url": base_url,
+                        }
+                        datasource_provider = DatasourceProvider(
+                            provider="firecrawl",
+                            tenant_id=tenant_id,
+                            plugin_id=firecrawl_plugin_id,
+                            auth_type=api_key_credential_type.value,
+                            encrypted_credentials=new_credentials,
+                            name=f"Auth {auth_count}",
+                            avatar_url="default",
+                            is_default=False,
+                        )
+                        db.session.add(datasource_provider)
+                        deal_firecrawl_count += 1
+                except Exception as e:
+                    click.echo(
+                        click.style(
+                            f"Error transforming firecrawl credentials: {str(e)}, tenant_id: {tenant_id}", fg="red"
+                        )
+                    )
+                    continue
+                db.session.commit()
+        # deal jina credentials
+        deal_jina_count = 0
+        jina_credentials = db.session.query(DataSourceApiKeyAuthBinding).filter_by(provider="jinareader").all()
+        if jina_credentials:
+            jina_credentials_tenant_mapping: dict[str, list[DataSourceApiKeyAuthBinding]] = {}
+            for jina_credential in jina_credentials:
+                tenant_id = jina_credential.tenant_id
+                if tenant_id not in jina_credentials_tenant_mapping:
+                    jina_credentials_tenant_mapping[tenant_id] = []
+                jina_credentials_tenant_mapping[tenant_id].append(jina_credential)
+            for tenant_id, jina_tenant_credentials in jina_credentials_tenant_mapping.items():
+                tenant = db.session.query(Tenant).filter_by(id=tenant_id).first()
+                if not tenant:
+                    continue
+                try:
+                    # check jina plugin is installed
+                    installed_plugins = installer_manager.list_plugins(tenant_id)
+                    installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins]
+                    if jina_plugin_id not in installed_plugins_ids:
+                        if jina_plugin_unique_identifier:
+                            # install jina plugin
+                            logger.debug("Installing Jina plugin %s", jina_plugin_unique_identifier)
+                            PluginService.install_from_marketplace_pkg(tenant_id, [jina_plugin_unique_identifier])
+
+                    auth_count = 0
+                    for jina_tenant_credential in jina_tenant_credentials:
+                        auth_count += 1
+                        if not jina_tenant_credential.credentials:
+                            click.echo(
+                                click.style(
+                                    f"Skipping jina credential for tenant {tenant_id} due to missing credentials.",
+                                    fg="yellow",
+                                )
+                            )
+                            continue
+                        # get credential api key
+                        credentials_json = json.loads(jina_tenant_credential.credentials)
+                        api_key = credentials_json.get("config", {}).get("api_key")
+                        new_credentials = {
+                            "integration_secret": api_key,
+                        }
+                        datasource_provider = DatasourceProvider(
+                            provider="jinareader",
+                            tenant_id=tenant_id,
+                            plugin_id=jina_plugin_id,
+                            auth_type=api_key_credential_type.value,
+                            encrypted_credentials=new_credentials,
+                            name=f"Auth {auth_count}",
+                            avatar_url="default",
+                            is_default=False,
+                        )
+                        db.session.add(datasource_provider)
+                        deal_jina_count += 1
+                except Exception as e:
+                    click.echo(
+                        click.style(f"Error transforming jina credentials: {str(e)}, tenant_id: {tenant_id}", fg="red")
+                    )
+                    continue
+                db.session.commit()
+    except Exception as e:
+        click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red"))
+        return
+    click.echo(click.style(f"Transforming notion successfully. deal_notion_count: {deal_notion_count}", fg="green"))
+    click.echo(
+        click.style(f"Transforming firecrawl successfully. deal_firecrawl_count: {deal_firecrawl_count}", fg="green")
+    )
+    click.echo(click.style(f"Transforming jina successfully. deal_jina_count: {deal_jina_count}", fg="green"))
+
+
+@click.command("migrate-data-for-plugin", help="Migrate data for plugin.")
+def migrate_data_for_plugin():
+    """
+    Migrate data for plugin.
+    """
+    click.echo(click.style("Starting migrate data for plugin.", fg="white"))
+
+    PluginDataMigration.migrate()
+
+    click.echo(click.style("Migrate data for plugin completed.", fg="green"))
+
+
+@click.command("extract-plugins", help="Extract plugins.")
+@click.option("--output_file", prompt=True, help="The file to store the extracted plugins.", default="plugins.jsonl")
+@click.option("--workers", prompt=True, help="The number of workers to extract plugins.", default=10)
+def extract_plugins(output_file: str, workers: int):
+    """
+    Extract plugins.
+    """
+    click.echo(click.style("Starting extract plugins.", fg="white"))
+
+    PluginMigration.extract_plugins(output_file, workers)
+
+    click.echo(click.style("Extract plugins completed.", fg="green"))
+
+
+@click.command("extract-unique-identifiers", help="Extract unique identifiers.")
+@click.option(
+    "--output_file",
+    prompt=True,
+    help="The file to store the extracted unique identifiers.",
+    default="unique_identifiers.json",
+)
+@click.option(
+    "--input_file", prompt=True, help="The file to store the extracted unique identifiers.", default="plugins.jsonl"
+)
+def extract_unique_plugins(output_file: str, input_file: str):
+    """
+    Extract unique plugins.
+    """
+    click.echo(click.style("Starting extract unique plugins.", fg="white"))
+
+    PluginMigration.extract_unique_plugins_to_file(input_file, output_file)
+
+    click.echo(click.style("Extract unique plugins completed.", fg="green"))
+
+
+@click.command("install-plugins", help="Install plugins.")
+@click.option(
+    "--input_file", prompt=True, help="The file to store the extracted unique identifiers.", default="plugins.jsonl"
+)
+@click.option(
+    "--output_file", prompt=True, help="The file to store the installed plugins.", default="installed_plugins.jsonl"
+)
+@click.option("--workers", prompt=True, help="The number of workers to install plugins.", default=100)
+def install_plugins(input_file: str, output_file: str, workers: int):
+    """
+    Install plugins.
+    """
+    click.echo(click.style("Starting install plugins.", fg="white"))
+
+    PluginMigration.install_plugins(input_file, output_file, workers)
+
+    click.echo(click.style("Install plugins completed.", fg="green"))
+
+
+@click.command("install-rag-pipeline-plugins", help="Install rag pipeline plugins.")
+@click.option(
+    "--input_file", prompt=True, help="The file to store the extracted unique identifiers.", default="plugins.jsonl"
+)
+@click.option(
+    "--output_file", prompt=True, help="The file to store the installed plugins.", default="installed_plugins.jsonl"
+)
+@click.option("--workers", prompt=True, help="The number of workers to install plugins.", default=100)
+def install_rag_pipeline_plugins(input_file, output_file, workers):
+    """
+    Install rag pipeline plugins
+    """
+    click.echo(click.style("Installing rag pipeline plugins", fg="yellow"))
+    plugin_migration = PluginMigration()
+    plugin_migration.install_rag_pipeline_plugins(
+        input_file,
+        output_file,
+        workers,
+    )
+    click.echo(click.style("Installing rag pipeline plugins successfully", fg="green"))
--- a/api/commands/retention.py
+++ b/api/commands/retention.py
@@ -0,0 +1,830 @@
+import datetime
+import logging
+import time
+from typing import Any
+
+import click
+import sqlalchemy as sa
+
+from extensions.ext_database import db
+from libs.datetime_utils import naive_utc_now
+from services.clear_free_plan_tenant_expired_logs import ClearFreePlanTenantExpiredLogs
+from services.retention.conversation.messages_clean_policy import create_message_clean_policy
+from services.retention.conversation.messages_clean_service import MessagesCleanService
+from services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs import WorkflowRunCleanup
+from tasks.remove_app_and_related_data_task import delete_draft_variables_batch
+
+logger = logging.getLogger(__name__)
+
+
+@click.command("clear-free-plan-tenant-expired-logs", help="Clear free plan tenant expired logs.")
+@click.option("--days", prompt=True, help="The days to clear free plan tenant expired logs.", default=30)
+@click.option("--batch", prompt=True, help="The batch size to clear free plan tenant expired logs.", default=100)
+@click.option(
+    "--tenant_ids",
+    prompt=True,
+    multiple=True,
+    help="The tenant ids to clear free plan tenant expired logs.",
+)
+def clear_free_plan_tenant_expired_logs(days: int, batch: int, tenant_ids: list[str]):
+    """
+    Clear free plan tenant expired logs.
+    """
+    click.echo(click.style("Starting clear free plan tenant expired logs.", fg="white"))
+
+    ClearFreePlanTenantExpiredLogs.process(days, batch, tenant_ids)
+
+    click.echo(click.style("Clear free plan tenant expired logs completed.", fg="green"))
+
+
+@click.command("clean-workflow-runs", help="Clean expired workflow runs and related data for free tenants.")
+@click.option(
+    "--before-days",
+    "--days",
+    default=30,
+    show_default=True,
+    type=click.IntRange(min=0),
+    help="Delete workflow runs created before N days ago.",
+)
+@click.option("--batch-size", default=200, show_default=True, help="Batch size for selecting workflow runs.")
+@click.option(
+    "--from-days-ago",
+    default=None,
+    type=click.IntRange(min=0),
+    help="Lower bound in days ago (older). Must be paired with --to-days-ago.",
+)
+@click.option(
+    "--to-days-ago",
+    default=None,
+    type=click.IntRange(min=0),
+    help="Upper bound in days ago (newer). Must be paired with --from-days-ago.",
+)
+@click.option(
+    "--start-from",
+    type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
+    default=None,
+    help="Optional lower bound (inclusive) for created_at; must be paired with --end-before.",
+)
+@click.option(
+    "--end-before",
+    type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
+    default=None,
+    help="Optional upper bound (exclusive) for created_at; must be paired with --start-from.",
+)
+@click.option(
+    "--dry-run",
+    is_flag=True,
+    help="Preview cleanup results without deleting any workflow run data.",
+)
+def clean_workflow_runs(
+    before_days: int,
+    batch_size: int,
+    from_days_ago: int | None,
+    to_days_ago: int | None,
+    start_from: datetime.datetime | None,
+    end_before: datetime.datetime | None,
+    dry_run: bool,
+):
+    """
+    Clean workflow runs and related workflow data for free tenants.
+    """
+    if (start_from is None) ^ (end_before is None):
+        raise click.UsageError("--start-from and --end-before must be provided together.")
+
+    if (from_days_ago is None) ^ (to_days_ago is None):
+        raise click.UsageError("--from-days-ago and --to-days-ago must be provided together.")
+
+    if from_days_ago is not None and to_days_ago is not None:
+        if start_from or end_before:
+            raise click.UsageError("Choose either day offsets or explicit dates, not both.")
+        if from_days_ago <= to_days_ago:
+            raise click.UsageError("--from-days-ago must be greater than --to-days-ago.")
+        now = datetime.datetime.now()
+        start_from = now - datetime.timedelta(days=from_days_ago)
+        end_before = now - datetime.timedelta(days=to_days_ago)
+        before_days = 0
+
+    start_time = datetime.datetime.now(datetime.UTC)
+    click.echo(click.style(f"Starting workflow run cleanup at {start_time.isoformat()}.", fg="white"))
+
+    WorkflowRunCleanup(
+        days=before_days,
+        batch_size=batch_size,
+        start_from=start_from,
+        end_before=end_before,
+        dry_run=dry_run,
+    ).run()
+
+    end_time = datetime.datetime.now(datetime.UTC)
+    elapsed = end_time - start_time
+    click.echo(
+        click.style(
+            f"Workflow run cleanup completed. start={start_time.isoformat()} "
+            f"end={end_time.isoformat()} duration={elapsed}",
+            fg="green",
+        )
+    )
+
+
+@click.command(
+    "archive-workflow-runs",
+    help="Archive workflow runs for paid plan tenants to S3-compatible storage.",
+)
+@click.option("--tenant-ids", default=None, help="Optional comma-separated tenant IDs for grayscale rollout.")
+@click.option("--before-days", default=90, show_default=True, help="Archive runs older than N days.")
+@click.option(
+    "--from-days-ago",
+    default=None,
+    type=click.IntRange(min=0),
+    help="Lower bound in days ago (older). Must be paired with --to-days-ago.",
+)
+@click.option(
+    "--to-days-ago",
+    default=None,
+    type=click.IntRange(min=0),
+    help="Upper bound in days ago (newer). Must be paired with --from-days-ago.",
+)
+@click.option(
+    "--start-from",
+    type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
+    default=None,
+    help="Archive runs created at or after this timestamp (UTC if no timezone).",
+)
+@click.option(
+    "--end-before",
+    type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
+    default=None,
+    help="Archive runs created before this timestamp (UTC if no timezone).",
+)
+@click.option("--batch-size", default=100, show_default=True, help="Batch size for processing.")
+@click.option("--workers", default=1, show_default=True, type=int, help="Concurrent workflow runs to archive.")
+@click.option("--limit", default=None, type=int, help="Maximum number of runs to archive.")
+@click.option("--dry-run", is_flag=True, help="Preview without archiving.")
+@click.option("--delete-after-archive", is_flag=True, help="Delete runs and related data after archiving.")
+def archive_workflow_runs(
+    tenant_ids: str | None,
+    before_days: int,
+    from_days_ago: int | None,
+    to_days_ago: int | None,
+    start_from: datetime.datetime | None,
+    end_before: datetime.datetime | None,
+    batch_size: int,
+    workers: int,
+    limit: int | None,
+    dry_run: bool,
+    delete_after_archive: bool,
+):
+    """
+    Archive workflow runs for paid plan tenants older than the specified days.
+
+    This command archives the following tables to storage:
+    - workflow_node_executions
+    - workflow_node_execution_offload
+    - workflow_pauses
+    - workflow_pause_reasons
+    - workflow_trigger_logs
+
+    The workflow_runs and workflow_app_logs tables are preserved for UI listing.
+    """
+    from services.retention.workflow_run.archive_paid_plan_workflow_run import WorkflowRunArchiver
+
+    run_started_at = datetime.datetime.now(datetime.UTC)
+    click.echo(
+        click.style(
+            f"Starting workflow run archiving at {run_started_at.isoformat()}.",
+            fg="white",
+        )
+    )
+
+    if (start_from is None) ^ (end_before is None):
+        click.echo(click.style("start-from and end-before must be provided together.", fg="red"))
+        return
+
+    if (from_days_ago is None) ^ (to_days_ago is None):
+        click.echo(click.style("from-days-ago and to-days-ago must be provided together.", fg="red"))
+        return
+
+    if from_days_ago is not None and to_days_ago is not None:
+        if start_from or end_before:
+            click.echo(click.style("Choose either day offsets or explicit dates, not both.", fg="red"))
+            return
+        if from_days_ago <= to_days_ago:
+            click.echo(click.style("from-days-ago must be greater than to-days-ago.", fg="red"))
+            return
+        now = datetime.datetime.now()
+        start_from = now - datetime.timedelta(days=from_days_ago)
+        end_before = now - datetime.timedelta(days=to_days_ago)
+        before_days = 0
+
+    if start_from and end_before and start_from >= end_before:
+        click.echo(click.style("start-from must be earlier than end-before.", fg="red"))
+        return
+    if workers < 1:
+        click.echo(click.style("workers must be at least 1.", fg="red"))
+        return
+
+    archiver = WorkflowRunArchiver(
+        days=before_days,
+        batch_size=batch_size,
+        start_from=start_from,
+        end_before=end_before,
+        workers=workers,
+        tenant_ids=[tid.strip() for tid in tenant_ids.split(",")] if tenant_ids else None,
+        limit=limit,
+        dry_run=dry_run,
+        delete_after_archive=delete_after_archive,
+    )
+    summary = archiver.run()
+    click.echo(
+        click.style(
+            f"Summary: processed={summary.total_runs_processed}, archived={summary.runs_archived}, "
+            f"skipped={summary.runs_skipped}, failed={summary.runs_failed}, "
+            f"time={summary.total_elapsed_time:.2f}s",
+            fg="cyan",
+        )
+    )
+
+    run_finished_at = datetime.datetime.now(datetime.UTC)
+    elapsed = run_finished_at - run_started_at
+    click.echo(
+        click.style(
+            f"Workflow run archiving completed. start={run_started_at.isoformat()} "
+            f"end={run_finished_at.isoformat()} duration={elapsed}",
+            fg="green",
+        )
+    )
+
+
+@click.command(
+    "restore-workflow-runs",
+    help="Restore archived workflow runs from S3-compatible storage.",
+)
+@click.option(
+    "--tenant-ids",
+    required=False,
+    help="Tenant IDs (comma-separated).",
+)
+@click.option("--run-id", required=False, help="Workflow run ID to restore.")
+@click.option(
+    "--start-from",
+    type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
+    default=None,
+    help="Optional lower bound (inclusive) for created_at; must be paired with --end-before.",
+)
+@click.option(
+    "--end-before",
+    type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
+    default=None,
+    help="Optional upper bound (exclusive) for created_at; must be paired with --start-from.",
+)
+@click.option("--workers", default=1, show_default=True, type=int, help="Concurrent workflow runs to restore.")
+@click.option("--limit", type=int, default=100, show_default=True, help="Maximum number of runs to restore.")
+@click.option("--dry-run", is_flag=True, help="Preview without restoring.")
+def restore_workflow_runs(
+    tenant_ids: str | None,
+    run_id: str | None,
+    start_from: datetime.datetime | None,
+    end_before: datetime.datetime | None,
+    workers: int,
+    limit: int,
+    dry_run: bool,
+):
+    """
+    Restore an archived workflow run from storage to the database.
+
+    This restores the following tables:
+    - workflow_node_executions
+    - workflow_node_execution_offload
+    - workflow_pauses
+    - workflow_pause_reasons
+    - workflow_trigger_logs
+    """
+    from services.retention.workflow_run.restore_archived_workflow_run import WorkflowRunRestore
+
+    parsed_tenant_ids = None
+    if tenant_ids:
+        parsed_tenant_ids = [tid.strip() for tid in tenant_ids.split(",") if tid.strip()]
+        if not parsed_tenant_ids:
+            raise click.BadParameter("tenant-ids must not be empty")
+
+    if (start_from is None) ^ (end_before is None):
+        raise click.UsageError("--start-from and --end-before must be provided together.")
+    if run_id is None and (start_from is None or end_before is None):
+        raise click.UsageError("--start-from and --end-before are required for batch restore.")
+    if workers < 1:
+        raise click.BadParameter("workers must be at least 1")
+
+    start_time = datetime.datetime.now(datetime.UTC)
+    click.echo(
+        click.style(
+            f"Starting restore of workflow run {run_id} at {start_time.isoformat()}.",
+            fg="white",
+        )
+    )
+
+    restorer = WorkflowRunRestore(dry_run=dry_run, workers=workers)
+    if run_id:
+        results = [restorer.restore_by_run_id(run_id)]
+    else:
+        assert start_from is not None
+        assert end_before is not None
+        results = restorer.restore_batch(
+            parsed_tenant_ids,
+            start_date=start_from,
+            end_date=end_before,
+            limit=limit,
+        )
+
+    end_time = datetime.datetime.now(datetime.UTC)
+    elapsed = end_time - start_time
+
+    successes = sum(1 for result in results if result.success)
+    failures = len(results) - successes
+
+    if failures == 0:
+        click.echo(
+            click.style(
+                f"Restore completed successfully. success={successes} duration={elapsed}",
+                fg="green",
+            )
+        )
+    else:
+        click.echo(
+            click.style(
+                f"Restore completed with failures. success={successes} failed={failures} duration={elapsed}",
+                fg="red",
+            )
+        )
+
+
+@click.command(
+    "delete-archived-workflow-runs",
+    help="Delete archived workflow runs from the database.",
+)
+@click.option(
+    "--tenant-ids",
+    required=False,
+    help="Tenant IDs (comma-separated).",
+)
+@click.option("--run-id", required=False, help="Workflow run ID to delete.")
+@click.option(
+    "--start-from",
+    type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
+    default=None,
+    help="Optional lower bound (inclusive) for created_at; must be paired with --end-before.",
+)
+@click.option(
+    "--end-before",
+    type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
+    default=None,
+    help="Optional upper bound (exclusive) for created_at; must be paired with --start-from.",
+)
+@click.option("--limit", type=int, default=100, show_default=True, help="Maximum number of runs to delete.")
+@click.option("--dry-run", is_flag=True, help="Preview without deleting.")
+def delete_archived_workflow_runs(
+    tenant_ids: str | None,
+    run_id: str | None,
+    start_from: datetime.datetime | None,
+    end_before: datetime.datetime | None,
+    limit: int,
+    dry_run: bool,
+):
+    """
+    Delete archived workflow runs from the database.
+    """
+    from services.retention.workflow_run.delete_archived_workflow_run import ArchivedWorkflowRunDeletion
+
+    parsed_tenant_ids = None
+    if tenant_ids:
+        parsed_tenant_ids = [tid.strip() for tid in tenant_ids.split(",") if tid.strip()]
+        if not parsed_tenant_ids:
+            raise click.BadParameter("tenant-ids must not be empty")
+
+    if (start_from is None) ^ (end_before is None):
+        raise click.UsageError("--start-from and --end-before must be provided together.")
+    if run_id is None and (start_from is None or end_before is None):
+        raise click.UsageError("--start-from and --end-before are required for batch delete.")
+
+    start_time = datetime.datetime.now(datetime.UTC)
+    target_desc = f"workflow run {run_id}" if run_id else "workflow runs"
+    click.echo(
+        click.style(
+            f"Starting delete of {target_desc} at {start_time.isoformat()}.",
+            fg="white",
+        )
+    )
+
+    deleter = ArchivedWorkflowRunDeletion(dry_run=dry_run)
+    if run_id:
+        results = [deleter.delete_by_run_id(run_id)]
+    else:
+        assert start_from is not None
+        assert end_before is not None
+        results = deleter.delete_batch(
+            parsed_tenant_ids,
+            start_date=start_from,
+            end_date=end_before,
+            limit=limit,
+        )
+
+    for result in results:
+        if result.success:
+            click.echo(
+                click.style(
+                    f"{'[DRY RUN] Would delete' if dry_run else 'Deleted'} "
+                    f"workflow run {result.run_id} (tenant={result.tenant_id})",
+                    fg="green",
+                )
+            )
+        else:
+            click.echo(
+                click.style(
+                    f"Failed to delete workflow run {result.run_id}: {result.error}",
+                    fg="red",
+                )
+            )
+
+    end_time = datetime.datetime.now(datetime.UTC)
+    elapsed = end_time - start_time
+
+    successes = sum(1 for result in results if result.success)
+    failures = len(results) - successes
+
+    if failures == 0:
+        click.echo(
+            click.style(
+                f"Delete completed successfully. success={successes} duration={elapsed}",
+                fg="green",
+            )
+        )
+    else:
+        click.echo(
+            click.style(
+                f"Delete completed with failures. success={successes} failed={failures} duration={elapsed}",
+                fg="red",
+            )
+        )
+
+
+def _find_orphaned_draft_variables(batch_size: int = 1000) -> list[str]:
+    """
+    Find draft variables that reference non-existent apps.
+
+    Args:
+        batch_size: Maximum number of orphaned app IDs to return
+
+    Returns:
+        List of app IDs that have draft variables but don't exist in the apps table
+    """
+    query = """
+        SELECT DISTINCT wdv.app_id
+        FROM workflow_draft_variables AS wdv
+        WHERE NOT EXISTS(
+            SELECT 1 FROM apps WHERE apps.id = wdv.app_id
+        )
+        LIMIT :batch_size
+    """
+
+    with db.engine.connect() as conn:
+        result = conn.execute(sa.text(query), {"batch_size": batch_size})
+        return [row[0] for row in result]
+
+
+def _count_orphaned_draft_variables() -> dict[str, Any]:
+    """
+    Count orphaned draft variables by app, including associated file counts.
+
+    Returns:
+        Dictionary with statistics about orphaned variables and files
+    """
+    # Count orphaned variables by app
+    variables_query = """
+        SELECT
+            wdv.app_id,
+            COUNT(*) as variable_count,
+            COUNT(wdv.file_id) as file_count
+        FROM workflow_draft_variables AS wdv
+        WHERE NOT EXISTS(
+            SELECT 1 FROM apps WHERE apps.id = wdv.app_id
+        )
+        GROUP BY wdv.app_id
+        ORDER BY variable_count DESC
+    """
+
+    with db.engine.connect() as conn:
+        result = conn.execute(sa.text(variables_query))
+        orphaned_by_app = {}
+        total_files = 0
+
+        for row in result:
+            app_id, variable_count, file_count = row
+            orphaned_by_app[app_id] = {"variables": variable_count, "files": file_count}
+            total_files += file_count
+
+        total_orphaned = sum(app_data["variables"] for app_data in orphaned_by_app.values())
+        app_count = len(orphaned_by_app)
+
+        return {
+            "total_orphaned_variables": total_orphaned,
+            "total_orphaned_files": total_files,
+            "orphaned_app_count": app_count,
+            "orphaned_by_app": orphaned_by_app,
+        }
+
+
+@click.command()
+@click.option("--dry-run", is_flag=True, help="Show what would be deleted without actually deleting")
+@click.option("--batch-size", default=1000, help="Number of records to process per batch (default 1000)")
+@click.option("--max-apps", default=None, type=int, help="Maximum number of apps to process (default: no limit)")
+@click.option("-f", "--force", is_flag=True, help="Skip user confirmation and force the command to execute.")
+def cleanup_orphaned_draft_variables(
+    dry_run: bool,
+    batch_size: int,
+    max_apps: int | None,
+    force: bool = False,
+):
+    """
+    Clean up orphaned draft variables from the database.
+
+    This script finds and removes draft variables that belong to apps
+    that no longer exist in the database.
+    """
+    logger = logging.getLogger(__name__)
+
+    # Get statistics
+    stats = _count_orphaned_draft_variables()
+
+    logger.info("Found %s orphaned draft variables", stats["total_orphaned_variables"])
+    logger.info("Found %s associated offload files", stats["total_orphaned_files"])
+    logger.info("Across %s non-existent apps", stats["orphaned_app_count"])
+
+    if stats["total_orphaned_variables"] == 0:
+        logger.info("No orphaned draft variables found. Exiting.")
+        return
+
+    if dry_run:
+        logger.info("DRY RUN: Would delete the following:")
+        for app_id, data in sorted(stats["orphaned_by_app"].items(), key=lambda x: x[1]["variables"], reverse=True)[
+            :10
+        ]:  # Show top 10
+            logger.info("  App %s: %s variables, %s files", app_id, data["variables"], data["files"])
+        if len(stats["orphaned_by_app"]) > 10:
+            logger.info("  ... and %s more apps", len(stats["orphaned_by_app"]) - 10)
+        return
+
+    # Confirm deletion
+    if not force:
+        click.confirm(
+            f"Are you sure you want to delete {stats['total_orphaned_variables']} "
+            f"orphaned draft variables and {stats['total_orphaned_files']} associated files "
+            f"from {stats['orphaned_app_count']} apps?",
+            abort=True,
+        )
+
+    total_deleted = 0
+    processed_apps = 0
+
+    while True:
+        if max_apps and processed_apps >= max_apps:
+            logger.info("Reached maximum app limit (%s). Stopping.", max_apps)
+            break
+
+        orphaned_app_ids = _find_orphaned_draft_variables(batch_size=10)
+        if not orphaned_app_ids:
+            logger.info("No more orphaned draft variables found.")
+            break
+
+        for app_id in orphaned_app_ids:
+            if max_apps and processed_apps >= max_apps:
+                break
+
+            try:
+                deleted_count = delete_draft_variables_batch(app_id, batch_size)
+                total_deleted += deleted_count
+                processed_apps += 1
+
+                logger.info("Deleted %s variables for app %s", deleted_count, app_id)
+
+            except Exception:
+                logger.exception("Error processing app %s", app_id)
+                continue
+
+    logger.info("Cleanup completed. Total deleted: %s variables across %s apps", total_deleted, processed_apps)
+
+
+@click.command("clean-expired-messages", help="Clean expired messages.")
+@click.option(
+    "--start-from",
+    type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
+    required=False,
+    default=None,
+    help="Lower bound (inclusive) for created_at.",
+)
+@click.option(
+    "--end-before",
+    type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
+    required=False,
+    default=None,
+    help="Upper bound (exclusive) for created_at.",
+)
+@click.option(
+    "--from-days-ago",
+    type=int,
+    default=None,
+    help="Relative lower bound in days ago (inclusive). Must be used with --before-days.",
+)
+@click.option(
+    "--before-days",
+    type=int,
+    default=None,
+    help="Relative upper bound in days ago (exclusive). Required for relative mode.",
+)
+@click.option("--batch-size", default=1000, show_default=True, help="Batch size for selecting messages.")
+@click.option(
+    "--graceful-period",
+    default=21,
+    show_default=True,
+    help="Graceful period in days after subscription expiration, will be ignored when billing is disabled.",
+)
+@click.option("--dry-run", is_flag=True, default=False, help="Show messages logs would be cleaned without deleting")
+def clean_expired_messages(
+    batch_size: int,
+    graceful_period: int,
+    start_from: datetime.datetime | None,
+    end_before: datetime.datetime | None,
+    from_days_ago: int | None,
+    before_days: int | None,
+    dry_run: bool,
+):
+    """
+    Clean expired messages and related data for tenants based on clean policy.
+    """
+    click.echo(click.style("clean_messages: start clean messages.", fg="green"))
+
+    start_at = time.perf_counter()
+
+    try:
+        abs_mode = start_from is not None and end_before is not None
+        rel_mode = before_days is not None
+
+        if abs_mode and rel_mode:
+            raise click.UsageError(
+                "Options are mutually exclusive: use either (--start-from,--end-before) "
+                "or (--from-days-ago,--before-days)."
+            )
+
+        if from_days_ago is not None and before_days is None:
+            raise click.UsageError("--from-days-ago must be used together with --before-days.")
+
+        if (start_from is None) ^ (end_before is None):
+            raise click.UsageError("Both --start-from and --end-before are required when using absolute time range.")
+
+        if not abs_mode and not rel_mode:
+            raise click.UsageError(
+                "You must provide either (--start-from,--end-before) or (--before-days [--from-days-ago])."
+            )
+
+        if rel_mode:
+            assert before_days is not None
+            if before_days < 0:
+                raise click.UsageError("--before-days must be >= 0.")
+            if from_days_ago is not None:
+                if from_days_ago < 0:
+                    raise click.UsageError("--from-days-ago must be >= 0.")
+                if from_days_ago <= before_days:
+                    raise click.UsageError("--from-days-ago must be greater than --before-days.")
+
+        # Create policy based on billing configuration
+        # NOTE: graceful_period will be ignored when billing is disabled.
+        policy = create_message_clean_policy(graceful_period_days=graceful_period)
+
+        # Create and run the cleanup service
+        if abs_mode:
+            assert start_from is not None
+            assert end_before is not None
+            service = MessagesCleanService.from_time_range(
+                policy=policy,
+                start_from=start_from,
+                end_before=end_before,
+                batch_size=batch_size,
+                dry_run=dry_run,
+            )
+        elif from_days_ago is None:
+            assert before_days is not None
+            service = MessagesCleanService.from_days(
+                policy=policy,
+                days=before_days,
+                batch_size=batch_size,
+                dry_run=dry_run,
+            )
+        else:
+            assert before_days is not None
+            assert from_days_ago is not None
+            now = naive_utc_now()
+            service = MessagesCleanService.from_time_range(
+                policy=policy,
+                start_from=now - datetime.timedelta(days=from_days_ago),
+                end_before=now - datetime.timedelta(days=before_days),
+                batch_size=batch_size,
+                dry_run=dry_run,
+            )
+        stats = service.run()
+
+        end_at = time.perf_counter()
+        click.echo(
+            click.style(
+                f"clean_messages: completed successfully\n"
+                f"  - Latency: {end_at - start_at:.2f}s\n"
+                f"  - Batches processed: {stats['batches']}\n"
+                f"  - Total messages scanned: {stats['total_messages']}\n"
+                f"  - Messages filtered: {stats['filtered_messages']}\n"
+                f"  - Messages deleted: {stats['total_deleted']}",
+                fg="green",
+            )
+        )
+    except Exception as e:
+        end_at = time.perf_counter()
+        logger.exception("clean_messages failed")
+        click.echo(
+            click.style(
+                f"clean_messages: failed after {end_at - start_at:.2f}s - {str(e)}",
+                fg="red",
+            )
+        )
+        raise
+
+    click.echo(click.style("messages cleanup completed.", fg="green"))
+
+
+@click.command("export-app-messages", help="Export messages for an app to JSONL.GZ.")
+@click.option("--app-id", required=True, help="Application ID to export messages for.")
+@click.option(
+    "--start-from",
+    type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
+    default=None,
+    help="Optional lower bound (inclusive) for created_at.",
+)
+@click.option(
+    "--end-before",
+    type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
+    required=True,
+    help="Upper bound (exclusive) for created_at.",
+)
+@click.option(
+    "--filename",
+    required=True,
+    help="Base filename (relative path). Do not include suffix like .jsonl.gz.",
+)
+@click.option("--use-cloud-storage", is_flag=True, default=False, help="Upload to cloud storage instead of local file.")
+@click.option("--batch-size", default=1000, show_default=True, help="Batch size for cursor pagination.")
+@click.option("--dry-run", is_flag=True, default=False, help="Scan only, print stats without writing any file.")
+def export_app_messages(
+    app_id: str,
+    start_from: datetime.datetime | None,
+    end_before: datetime.datetime,
+    filename: str,
+    use_cloud_storage: bool,
+    batch_size: int,
+    dry_run: bool,
+):
+    if start_from and start_from >= end_before:
+        raise click.UsageError("--start-from must be before --end-before.")
+
+    from services.retention.conversation.message_export_service import AppMessageExportService
+
+    try:
+        validated_filename = AppMessageExportService.validate_export_filename(filename)
+    except ValueError as e:
+        raise click.BadParameter(str(e), param_hint="--filename") from e
+
+    click.echo(click.style(f"export_app_messages: starting export for app {app_id}.", fg="green"))
+    start_at = time.perf_counter()
+
+    try:
+        service = AppMessageExportService(
+            app_id=app_id,
+            end_before=end_before,
+            filename=validated_filename,
+            start_from=start_from,
+            batch_size=batch_size,
+            use_cloud_storage=use_cloud_storage,
+            dry_run=dry_run,
+        )
+        stats = service.run()
+
+        elapsed = time.perf_counter() - start_at
+        click.echo(
+            click.style(
+                f"export_app_messages: completed in {elapsed:.2f}s\n"
+                f"  - Batches: {stats.batches}\n"
+                f"  - Total messages: {stats.total_messages}\n"
+                f"  - Messages with feedback: {stats.messages_with_feedback}\n"
+                f"  - Total feedbacks: {stats.total_feedbacks}",
+                fg="green",
+            )
+        )
+    except Exception as e:
+        elapsed = time.perf_counter() - start_at
+        logger.exception("export_app_messages failed")
+        click.echo(click.style(f"export_app_messages: failed after {elapsed:.2f}s - {e}", fg="red"))
+        raise
--- a/api/commands/storage.py
+++ b/api/commands/storage.py
@@ -0,0 +1,755 @@
+import json
+
+import click
+import sqlalchemy as sa
+
+from configs import dify_config
+from extensions.ext_database import db
+from extensions.ext_storage import storage
+from extensions.storage.opendal_storage import OpenDALStorage
+from extensions.storage.storage_type import StorageType
+from models.model import UploadFile
+
+
+@click.option("-f", "--force", is_flag=True, help="Skip user confirmation and force the command to execute.")
+@click.command("clear-orphaned-file-records", help="Clear orphaned file records.")
+def clear_orphaned_file_records(force: bool):
+    """
+    Clear orphaned file records in the database.
+    """
+
+    # define tables and columns to process
+    files_tables = [
+        {"table": "upload_files", "id_column": "id", "key_column": "key"},
+        {"table": "tool_files", "id_column": "id", "key_column": "file_key"},
+    ]
+    ids_tables = [
+        {"type": "uuid", "table": "message_files", "column": "upload_file_id"},
+        {"type": "text", "table": "documents", "column": "data_source_info"},
+        {"type": "text", "table": "document_segments", "column": "content"},
+        {"type": "text", "table": "messages", "column": "answer"},
+        {"type": "text", "table": "workflow_node_executions", "column": "inputs"},
+        {"type": "text", "table": "workflow_node_executions", "column": "process_data"},
+        {"type": "text", "table": "workflow_node_executions", "column": "outputs"},
+        {"type": "text", "table": "conversations", "column": "introduction"},
+        {"type": "text", "table": "conversations", "column": "system_instruction"},
+        {"type": "text", "table": "accounts", "column": "avatar"},
+        {"type": "text", "table": "apps", "column": "icon"},
+        {"type": "text", "table": "sites", "column": "icon"},
+        {"type": "json", "table": "messages", "column": "inputs"},
+        {"type": "json", "table": "messages", "column": "message"},
+    ]
+
+    # notify user and ask for confirmation
+    click.echo(
+        click.style(
+            "This command will first find and delete orphaned file records from the message_files table,", fg="yellow"
+        )
+    )
+    click.echo(
+        click.style(
+            "and then it will find and delete orphaned file records in the following tables:",
+            fg="yellow",
+        )
+    )
+    for files_table in files_tables:
+        click.echo(click.style(f"- {files_table['table']}", fg="yellow"))
+    click.echo(
+        click.style("The following tables and columns will be scanned to find orphaned file records:", fg="yellow")
+    )
+    for ids_table in ids_tables:
+        click.echo(click.style(f"- {ids_table['table']} ({ids_table['column']})", fg="yellow"))
+    click.echo("")
+
+    click.echo(click.style("!!! USE WITH CAUTION !!!", fg="red"))
+    click.echo(
+        click.style(
+            (
+                "Since not all patterns have been fully tested, "
+                "please note that this command may delete unintended file records."
+            ),
+            fg="yellow",
+        )
+    )
+    click.echo(
+        click.style("This cannot be undone. Please make sure to back up your database before proceeding.", fg="yellow")
+    )
+    click.echo(
+        click.style(
+            (
+                "It is also recommended to run this during the maintenance window, "
+                "as this may cause high load on your instance."
+            ),
+            fg="yellow",
+        )
+    )
+    if not force:
+        click.confirm("Do you want to proceed?", abort=True)
+
+    # start the cleanup process
+    click.echo(click.style("Starting orphaned file records cleanup.", fg="white"))
+
+    # clean up the orphaned records in the message_files table where message_id doesn't exist in messages table
+    try:
+        click.echo(
+            click.style("- Listing message_files records where message_id doesn't exist in messages table", fg="white")
+        )
+        query = (
+            "SELECT mf.id, mf.message_id "
+            "FROM message_files mf LEFT JOIN messages m ON mf.message_id = m.id "
+            "WHERE m.id IS NULL"
+        )
+        orphaned_message_files = []
+        with db.engine.begin() as conn:
+            rs = conn.execute(sa.text(query))
+            for i in rs:
+                orphaned_message_files.append({"id": str(i[0]), "message_id": str(i[1])})
+
+        if orphaned_message_files:
+            click.echo(click.style(f"Found {len(orphaned_message_files)} orphaned message_files records:", fg="white"))
+            for record in orphaned_message_files:
+                click.echo(click.style(f"  - id: {record['id']}, message_id: {record['message_id']}", fg="black"))
+
+            if not force:
+                click.confirm(
+                    (
+                        f"Do you want to proceed "
+                        f"to delete all {len(orphaned_message_files)} orphaned message_files records?"
+                    ),
+                    abort=True,
+                )
+
+            click.echo(click.style("- Deleting orphaned message_files records", fg="white"))
+            query = "DELETE FROM message_files WHERE id IN :ids"
+            with db.engine.begin() as conn:
+                conn.execute(sa.text(query), {"ids": tuple(record["id"] for record in orphaned_message_files)})
+            click.echo(
+                click.style(f"Removed {len(orphaned_message_files)} orphaned message_files records.", fg="green")
+            )
+        else:
+            click.echo(click.style("No orphaned message_files records found. There is nothing to delete.", fg="green"))
+    except Exception as e:
+        click.echo(click.style(f"Error deleting orphaned message_files records: {str(e)}", fg="red"))
+
+    # clean up the orphaned records in the rest of the *_files tables
+    try:
+        # fetch file id and keys from each table
+        all_files_in_tables = []
+        for files_table in files_tables:
+            click.echo(click.style(f"- Listing file records in table {files_table['table']}", fg="white"))
+            query = f"SELECT {files_table['id_column']}, {files_table['key_column']} FROM {files_table['table']}"
+            with db.engine.begin() as conn:
+                rs = conn.execute(sa.text(query))
+            for i in rs:
+                all_files_in_tables.append({"table": files_table["table"], "id": str(i[0]), "key": i[1]})
+        click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white"))
+
+        # fetch referred table and columns
+        guid_regexp = "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"
+        all_ids_in_tables = []
+        for ids_table in ids_tables:
+            query = ""
+            match ids_table["type"]:
+                case "uuid":
+                    click.echo(
+                        click.style(
+                            f"- Listing file ids in column {ids_table['column']} in table {ids_table['table']}",
+                            fg="white",
+                        )
+                    )
+                    c = ids_table["column"]
+                    query = f"SELECT {c} FROM {ids_table['table']} WHERE {c} IS NOT NULL"
+                    with db.engine.begin() as conn:
+                        rs = conn.execute(sa.text(query))
+                    for i in rs:
+                        all_ids_in_tables.append({"table": ids_table["table"], "id": str(i[0])})
+                case "text":
+                    t = ids_table["table"]
+                    click.echo(
+                        click.style(
+                            f"- Listing file-id-like strings in column {ids_table['column']} in table {t}",
+                            fg="white",
+                        )
+                    )
+                    query = (
+                        f"SELECT regexp_matches({ids_table['column']}, '{guid_regexp}', 'g') AS extracted_id "
+                        f"FROM {ids_table['table']}"
+                    )
+                    with db.engine.begin() as conn:
+                        rs = conn.execute(sa.text(query))
+                    for i in rs:
+                        for j in i[0]:
+                            all_ids_in_tables.append({"table": ids_table["table"], "id": j})
+                case "json":
+                    click.echo(
+                        click.style(
+                            (
+                                f"- Listing file-id-like JSON string in column {ids_table['column']} "
+                                f"in table {ids_table['table']}"
+                            ),
+                            fg="white",
+                        )
+                    )
+                    query = (
+                        f"SELECT regexp_matches({ids_table['column']}::text, '{guid_regexp}', 'g') AS extracted_id "
+                        f"FROM {ids_table['table']}"
+                    )
+                    with db.engine.begin() as conn:
+                        rs = conn.execute(sa.text(query))
+                    for i in rs:
+                        for j in i[0]:
+                            all_ids_in_tables.append({"table": ids_table["table"], "id": j})
+                case _:
+                    pass
+        click.echo(click.style(f"Found {len(all_ids_in_tables)} file ids in tables.", fg="white"))
+
+    except Exception as e:
+        click.echo(click.style(f"Error fetching keys: {str(e)}", fg="red"))
+        return
+
+    # find orphaned files
+    all_files = [file["id"] for file in all_files_in_tables]
+    all_ids = [file["id"] for file in all_ids_in_tables]
+    orphaned_files = list(set(all_files) - set(all_ids))
+    if not orphaned_files:
+        click.echo(click.style("No orphaned file records found. There is nothing to delete.", fg="green"))
+        return
+    click.echo(click.style(f"Found {len(orphaned_files)} orphaned file records.", fg="white"))
+    for file in orphaned_files:
+        click.echo(click.style(f"- orphaned file id: {file}", fg="black"))
+    if not force:
+        click.confirm(f"Do you want to proceed to delete all {len(orphaned_files)} orphaned file records?", abort=True)
+
+    # delete orphaned records for each file
+    try:
+        for files_table in files_tables:
+            click.echo(click.style(f"- Deleting orphaned file records in table {files_table['table']}", fg="white"))
+            query = f"DELETE FROM {files_table['table']} WHERE {files_table['id_column']} IN :ids"
+            with db.engine.begin() as conn:
+                conn.execute(sa.text(query), {"ids": tuple(orphaned_files)})
+    except Exception as e:
+        click.echo(click.style(f"Error deleting orphaned file records: {str(e)}", fg="red"))
+        return
+    click.echo(click.style(f"Removed {len(orphaned_files)} orphaned file records.", fg="green"))
+
+
+@click.option("-f", "--force", is_flag=True, help="Skip user confirmation and force the command to execute.")
+@click.command("remove-orphaned-files-on-storage", help="Remove orphaned files on the storage.")
+def remove_orphaned_files_on_storage(force: bool):
+    """
+    Remove orphaned files on the storage.
+    """
+
+    # define tables and columns to process
+    files_tables = [
+        {"table": "upload_files", "key_column": "key"},
+        {"table": "tool_files", "key_column": "file_key"},
+    ]
+    storage_paths = ["image_files", "tools", "upload_files"]
+
+    # notify user and ask for confirmation
+    click.echo(click.style("This command will find and remove orphaned files on the storage,", fg="yellow"))
+    click.echo(
+        click.style("by comparing the files on the storage with the records in the following tables:", fg="yellow")
+    )
+    for files_table in files_tables:
+        click.echo(click.style(f"- {files_table['table']}", fg="yellow"))
+    click.echo(click.style("The following paths on the storage will be scanned to find orphaned files:", fg="yellow"))
+    for storage_path in storage_paths:
+        click.echo(click.style(f"- {storage_path}", fg="yellow"))
+    click.echo("")
+
+    click.echo(click.style("!!! USE WITH CAUTION !!!", fg="red"))
+    click.echo(
+        click.style(
+            "Currently, this command will work only for opendal based storage (STORAGE_TYPE=opendal).", fg="yellow"
+        )
+    )
+    click.echo(
+        click.style(
+            "Since not all patterns have been fully tested, please note that this command may delete unintended files.",
+            fg="yellow",
+        )
+    )
+    click.echo(
+        click.style("This cannot be undone. Please make sure to back up your storage before proceeding.", fg="yellow")
+    )
+    click.echo(
+        click.style(
+            (
+                "It is also recommended to run this during the maintenance window, "
+                "as this may cause high load on your instance."
+            ),
+            fg="yellow",
+        )
+    )
+    if not force:
+        click.confirm("Do you want to proceed?", abort=True)
+
+    # start the cleanup process
+    click.echo(click.style("Starting orphaned files cleanup.", fg="white"))
+
+    # fetch file id and keys from each table
+    all_files_in_tables = []
+    try:
+        for files_table in files_tables:
+            click.echo(click.style(f"- Listing files from table {files_table['table']}", fg="white"))
+            query = f"SELECT {files_table['key_column']} FROM {files_table['table']}"
+            with db.engine.begin() as conn:
+                rs = conn.execute(sa.text(query))
+            for i in rs:
+                all_files_in_tables.append(str(i[0]))
+        click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white"))
+    except Exception as e:
+        click.echo(click.style(f"Error fetching keys: {str(e)}", fg="red"))
+        return
+
+    all_files_on_storage = []
+    for storage_path in storage_paths:
+        try:
+            click.echo(click.style(f"- Scanning files on storage path {storage_path}", fg="white"))
+            files = storage.scan(path=storage_path, files=True, directories=False)
+            all_files_on_storage.extend(files)
+        except FileNotFoundError:
+            click.echo(click.style(f"  -> Skipping path {storage_path} as it does not exist.", fg="yellow"))
+            continue
+        except Exception as e:
+            click.echo(click.style(f"  -> Error scanning files on storage path {storage_path}: {str(e)}", fg="red"))
+            continue
+    click.echo(click.style(f"Found {len(all_files_on_storage)} files on storage.", fg="white"))
+
+    # find orphaned files
+    orphaned_files = list(set(all_files_on_storage) - set(all_files_in_tables))
+    if not orphaned_files:
+        click.echo(click.style("No orphaned files found. There is nothing to remove.", fg="green"))
+        return
+    click.echo(click.style(f"Found {len(orphaned_files)} orphaned files.", fg="white"))
+    for file in orphaned_files:
+        click.echo(click.style(f"- orphaned file: {file}", fg="black"))
+    if not force:
+        click.confirm(f"Do you want to proceed to remove all {len(orphaned_files)} orphaned files?", abort=True)
+
+    # delete orphaned files
+    removed_files = 0
+    error_files = 0
+    for file in orphaned_files:
+        try:
+            storage.delete(file)
+            removed_files += 1
+            click.echo(click.style(f"- Removing orphaned file: {file}", fg="white"))
+        except Exception as e:
+            error_files += 1
+            click.echo(click.style(f"- Error deleting orphaned file {file}: {str(e)}", fg="red"))
+            continue
+    if error_files == 0:
+        click.echo(click.style(f"Removed {removed_files} orphaned files without errors.", fg="green"))
+    else:
+        click.echo(click.style(f"Removed {removed_files} orphaned files, with {error_files} errors.", fg="yellow"))
+
+
+@click.command("file-usage", help="Query file usages and show where files are referenced.")
+@click.option("--file-id", type=str, default=None, help="Filter by file UUID.")
+@click.option("--key", type=str, default=None, help="Filter by storage key.")
+@click.option("--src", type=str, default=None, help="Filter by table.column pattern (e.g., 'documents.%' or '%.icon').")
+@click.option("--limit", type=int, default=100, help="Limit number of results (default: 100).")
+@click.option("--offset", type=int, default=0, help="Offset for pagination (default: 0).")
+@click.option("--json", "output_json", is_flag=True, help="Output results in JSON format.")
+def file_usage(
+    file_id: str | None,
+    key: str | None,
+    src: str | None,
+    limit: int,
+    offset: int,
+    output_json: bool,
+):
+    """
+    Query file usages and show where files are referenced in the database.
+
+    This command reuses the same reference checking logic as clear-orphaned-file-records
+    and displays detailed information about where each file is referenced.
+    """
+    # define tables and columns to process
+    files_tables = [
+        {"table": "upload_files", "id_column": "id", "key_column": "key"},
+        {"table": "tool_files", "id_column": "id", "key_column": "file_key"},
+    ]
+    ids_tables = [
+        {"type": "uuid", "table": "message_files", "column": "upload_file_id", "pk_column": "id"},
+        {"type": "text", "table": "documents", "column": "data_source_info", "pk_column": "id"},
+        {"type": "text", "table": "document_segments", "column": "content", "pk_column": "id"},
+        {"type": "text", "table": "messages", "column": "answer", "pk_column": "id"},
+        {"type": "text", "table": "workflow_node_executions", "column": "inputs", "pk_column": "id"},
+        {"type": "text", "table": "workflow_node_executions", "column": "process_data", "pk_column": "id"},
+        {"type": "text", "table": "workflow_node_executions", "column": "outputs", "pk_column": "id"},
+        {"type": "text", "table": "conversations", "column": "introduction", "pk_column": "id"},
+        {"type": "text", "table": "conversations", "column": "system_instruction", "pk_column": "id"},
+        {"type": "text", "table": "accounts", "column": "avatar", "pk_column": "id"},
+        {"type": "text", "table": "apps", "column": "icon", "pk_column": "id"},
+        {"type": "text", "table": "sites", "column": "icon", "pk_column": "id"},
+        {"type": "json", "table": "messages", "column": "inputs", "pk_column": "id"},
+        {"type": "json", "table": "messages", "column": "message", "pk_column": "id"},
+    ]
+
+    # Stream file usages with pagination to avoid holding all results in memory
+    paginated_usages = []
+    total_count = 0
+
+    # First, build a mapping of file_id -> storage_key from the base tables
+    file_key_map = {}
+    for files_table in files_tables:
+        query = f"SELECT {files_table['id_column']}, {files_table['key_column']} FROM {files_table['table']}"
+        with db.engine.begin() as conn:
+            rs = conn.execute(sa.text(query))
+            for row in rs:
+                file_key_map[str(row[0])] = f"{files_table['table']}:{row[1]}"
+
+    # If filtering by key or file_id, verify it exists
+    if file_id and file_id not in file_key_map:
+        if output_json:
+            click.echo(json.dumps({"error": f"File ID {file_id} not found in base tables"}))
+        else:
+            click.echo(click.style(f"File ID {file_id} not found in base tables.", fg="red"))
+        return
+
+    if key:
+        valid_prefixes = {f"upload_files:{key}", f"tool_files:{key}"}
+        matching_file_ids = [fid for fid, fkey in file_key_map.items() if fkey in valid_prefixes]
+        if not matching_file_ids:
+            if output_json:
+                click.echo(json.dumps({"error": f"Key {key} not found in base tables"}))
+            else:
+                click.echo(click.style(f"Key {key} not found in base tables.", fg="red"))
+            return
+
+    guid_regexp = "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"
+
+    # For each reference table/column, find matching file IDs and record the references
+    for ids_table in ids_tables:
+        src_filter = f"{ids_table['table']}.{ids_table['column']}"
+
+        # Skip if src filter doesn't match (use fnmatch for wildcard patterns)
+        if src:
+            if "%" in src or "_" in src:
+                import fnmatch
+
+                # Convert SQL LIKE wildcards to fnmatch wildcards (% -> *, _ -> ?)
+                pattern = src.replace("%", "*").replace("_", "?")
+                if not fnmatch.fnmatch(src_filter, pattern):
+                    continue
+            else:
+                if src_filter != src:
+                    continue
+
+        match ids_table["type"]:
+            case "uuid":
+                # Direct UUID match
+                query = (
+                    f"SELECT {ids_table['pk_column']}, {ids_table['column']} "
+                    f"FROM {ids_table['table']} WHERE {ids_table['column']} IS NOT NULL"
+                )
+                with db.engine.begin() as conn:
+                    rs = conn.execute(sa.text(query))
+                    for row in rs:
+                        record_id = str(row[0])
+                        ref_file_id = str(row[1])
+                        if ref_file_id not in file_key_map:
+                            continue
+                        storage_key = file_key_map[ref_file_id]
+
+                        # Apply filters
+                        if file_id and ref_file_id != file_id:
+                            continue
+                        if key and not storage_key.endswith(key):
+                            continue
+
+                        # Only collect items within the requested page range
+                        if offset <= total_count < offset + limit:
+                            paginated_usages.append(
+                                {
+                                    "src": f"{ids_table['table']}.{ids_table['column']}",
+                                    "record_id": record_id,
+                                    "file_id": ref_file_id,
+                                    "key": storage_key,
+                                }
+                            )
+                        total_count += 1
+
+            case "text" | "json":
+                # Extract UUIDs from text/json content
+                column_cast = f"{ids_table['column']}::text" if ids_table["type"] == "json" else ids_table["column"]
+                query = (
+                    f"SELECT {ids_table['pk_column']}, {column_cast} "
+                    f"FROM {ids_table['table']} WHERE {ids_table['column']} IS NOT NULL"
+                )
+                with db.engine.begin() as conn:
+                    rs = conn.execute(sa.text(query))
+                    for row in rs:
+                        record_id = str(row[0])
+                        content = str(row[1])
+
+                        # Find all UUIDs in the content
+                        import re
+
+                        uuid_pattern = re.compile(guid_regexp, re.IGNORECASE)
+                        matches = uuid_pattern.findall(content)
+
+                        for ref_file_id in matches:
+                            if ref_file_id not in file_key_map:
+                                continue
+                            storage_key = file_key_map[ref_file_id]
+
+                            # Apply filters
+                            if file_id and ref_file_id != file_id:
+                                continue
+                            if key and not storage_key.endswith(key):
+                                continue
+
+                            # Only collect items within the requested page range
+                            if offset <= total_count < offset + limit:
+                                paginated_usages.append(
+                                    {
+                                        "src": f"{ids_table['table']}.{ids_table['column']}",
+                                        "record_id": record_id,
+                                        "file_id": ref_file_id,
+                                        "key": storage_key,
+                                    }
+                                )
+                            total_count += 1
+            case _:
+                pass
+
+    # Output results
+    if output_json:
+        result = {
+            "total": total_count,
+            "offset": offset,
+            "limit": limit,
+            "usages": paginated_usages,
+        }
+        click.echo(json.dumps(result, indent=2))
+    else:
+        click.echo(
+            click.style(f"Found {total_count} file usages (showing {len(paginated_usages)} results)", fg="white")
+        )
+        click.echo("")
+
+        if not paginated_usages:
+            click.echo(click.style("No file usages found matching the specified criteria.", fg="yellow"))
+            return
+
+        # Print table header
+        click.echo(
+            click.style(
+                f"{'Src (Table.Column)':<50} {'Record ID':<40} {'File ID':<40} {'Storage Key':<60}",
+                fg="cyan",
+            )
+        )
+        click.echo(click.style("-" * 190, fg="white"))
+
+        # Print each usage
+        for usage in paginated_usages:
+            click.echo(f"{usage['src']:<50} {usage['record_id']:<40} {usage['file_id']:<40} {usage['key']:<60}")
+
+        # Show pagination info
+        if offset + limit < total_count:
+            click.echo("")
+            click.echo(
+                click.style(
+                    f"Showing {offset + 1}-{offset + len(paginated_usages)} of {total_count} results", fg="white"
+                )
+            )
+            click.echo(click.style(f"Use --offset {offset + limit} to see next page", fg="white"))
+
+
+@click.command(
+    "migrate-oss",
+    help="Migrate files from Local or OpenDAL source to a cloud OSS storage (destination must NOT be local/opendal).",
+)
+@click.option(
+    "--path",
+    "paths",
+    multiple=True,
+    help="Storage path prefixes to migrate (repeatable). Defaults: privkeys, upload_files, image_files,"
+    " tools, website_files, keyword_files, ops_trace",
+)
+@click.option(
+    "--source",
+    type=click.Choice(["local", "opendal"], case_sensitive=False),
+    default="opendal",
+    show_default=True,
+    help="Source storage type to read from",
+)
+@click.option("--overwrite", is_flag=True, default=False, help="Overwrite destination if file already exists")
+@click.option("--dry-run", is_flag=True, default=False, help="Show what would be migrated without uploading")
+@click.option("-f", "--force", is_flag=True, help="Skip confirmation and run without prompts")
+@click.option(
+    "--update-db/--no-update-db",
+    default=True,
+    help="Update upload_files.storage_type from source type to current storage after migration",
+)
+def migrate_oss(
+    paths: tuple[str, ...],
+    source: str,
+    overwrite: bool,
+    dry_run: bool,
+    force: bool,
+    update_db: bool,
+):
+    """
+    Copy all files under selected prefixes from a source storage
+    (Local filesystem or OpenDAL-backed) into the currently configured
+    destination storage backend, then optionally update DB records.
+
+    Expected usage: set STORAGE_TYPE (and its credentials) to your target backend.
+    """
+    # Ensure target storage is not local/opendal
+    if dify_config.STORAGE_TYPE in (StorageType.LOCAL, StorageType.OPENDAL):
+        click.echo(
+            click.style(
+                "Target STORAGE_TYPE must be a cloud OSS (not 'local' or 'opendal').\n"
+                "Please set STORAGE_TYPE to one of: s3, aliyun-oss, azure-blob, google-storage, tencent-cos, \n"
+                "volcengine-tos, supabase, oci-storage, huawei-obs, baidu-obs, clickzetta-volume.",
+                fg="red",
+            )
+        )
+        return
+
+    # Default paths if none specified
+    default_paths = ("privkeys", "upload_files", "image_files", "tools", "website_files", "keyword_files", "ops_trace")
+    path_list = list(paths) if paths else list(default_paths)
+    is_source_local = source.lower() == "local"
+
+    click.echo(click.style("Preparing migration to target storage.", fg="yellow"))
+    click.echo(click.style(f"Target storage type: {dify_config.STORAGE_TYPE}", fg="white"))
+    if is_source_local:
+        src_root = dify_config.STORAGE_LOCAL_PATH
+        click.echo(click.style(f"Source: local fs, root: {src_root}", fg="white"))
+    else:
+        click.echo(click.style(f"Source: opendal scheme={dify_config.OPENDAL_SCHEME}", fg="white"))
+    click.echo(click.style(f"Paths to migrate: {', '.join(path_list)}", fg="white"))
+    click.echo("")
+
+    if not force:
+        click.confirm("Proceed with migration?", abort=True)
+
+    # Instantiate source storage
+    try:
+        if is_source_local:
+            src_root = dify_config.STORAGE_LOCAL_PATH
+            source_storage = OpenDALStorage(scheme="fs", root=src_root)
+        else:
+            source_storage = OpenDALStorage(scheme=dify_config.OPENDAL_SCHEME)
+    except Exception as e:
+        click.echo(click.style(f"Failed to initialize source storage: {str(e)}", fg="red"))
+        return
+
+    total_files = 0
+    copied_files = 0
+    skipped_files = 0
+    errored_files = 0
+    copied_upload_file_keys: list[str] = []
+
+    for prefix in path_list:
+        click.echo(click.style(f"Scanning source path: {prefix}", fg="white"))
+        try:
+            keys = source_storage.scan(path=prefix, files=True, directories=False)
+        except FileNotFoundError:
+            click.echo(click.style(f"  -> Skipping missing path: {prefix}", fg="yellow"))
+            continue
+        except NotImplementedError:
+            click.echo(click.style("  -> Source storage does not support scanning.", fg="red"))
+            return
+        except Exception as e:
+            click.echo(click.style(f"  -> Error scanning '{prefix}': {str(e)}", fg="red"))
+            continue
+
+        click.echo(click.style(f"Found {len(keys)} files under {prefix}", fg="white"))
+
+        for key in keys:
+            total_files += 1
+
+            # check destination existence
+            if not overwrite:
+                try:
+                    if storage.exists(key):
+                        skipped_files += 1
+                        continue
+                except Exception as e:
+                    # existence check failures should not block migration attempt
+                    # but should be surfaced to user as a warning for visibility
+                    click.echo(
+                        click.style(
+                            f"  -> Warning: failed target existence check for {key}: {str(e)}",
+                            fg="yellow",
+                        )
+                    )
+
+            if dry_run:
+                copied_files += 1
+                continue
+
+            # read from source and write to destination
+            try:
+                data = source_storage.load_once(key)
+            except FileNotFoundError:
+                errored_files += 1
+                click.echo(click.style(f"  -> Missing on source: {key}", fg="yellow"))
+                continue
+            except Exception as e:
+                errored_files += 1
+                click.echo(click.style(f"  -> Error reading {key}: {str(e)}", fg="red"))
+                continue
+
+            try:
+                storage.save(key, data)
+                copied_files += 1
+                if prefix == "upload_files":
+                    copied_upload_file_keys.append(key)
+            except Exception as e:
+                errored_files += 1
+                click.echo(click.style(f"  -> Error writing {key} to target: {str(e)}", fg="red"))
+                continue
+
+    click.echo("")
+    click.echo(click.style("Migration summary:", fg="yellow"))
+    click.echo(click.style(f"  Total:   {total_files}", fg="white"))
+    click.echo(click.style(f"  Copied:  {copied_files}", fg="green"))
+    click.echo(click.style(f"  Skipped: {skipped_files}", fg="white"))
+    if errored_files:
+        click.echo(click.style(f"  Errors:  {errored_files}", fg="red"))
+
+    if dry_run:
+        click.echo(click.style("Dry-run complete. No changes were made.", fg="green"))
+        return
+
+    if errored_files:
+        click.echo(
+            click.style(
+                "Some files failed to migrate. Review errors above before updating DB records.",
+                fg="yellow",
+            )
+        )
+        if update_db and not force:
+            if not click.confirm("Proceed to update DB storage_type despite errors?", default=False):
+                update_db = False
+
+    # Optionally update DB records for upload_files.storage_type (only for successfully copied upload_files)
+    if update_db:
+        if not copied_upload_file_keys:
+            click.echo(click.style("No upload_files copied. Skipping DB storage_type update.", fg="yellow"))
+        else:
+            try:
+                source_storage_type = StorageType.LOCAL if is_source_local else StorageType.OPENDAL
+                updated = (
+                    db.session.query(UploadFile)
+                    .where(
+                        UploadFile.storage_type == source_storage_type,
+                        UploadFile.key.in_(copied_upload_file_keys),
+                    )
+                    .update({UploadFile.storage_type: dify_config.STORAGE_TYPE}, synchronize_session=False)
+                )
+                db.session.commit()
+                click.echo(click.style(f"Updated storage_type for {updated} upload_files records.", fg="green"))
+            except Exception as e:
+                db.session.rollback()
+                click.echo(click.style(f"Failed to update DB storage_type: {str(e)}", fg="red"))
--- a/api/commands/system.py
+++ b/api/commands/system.py
@@ -0,0 +1,204 @@
+import logging
+
+import click
+import sqlalchemy as sa
+from sqlalchemy.orm import sessionmaker
+
+from configs import dify_config
+from events.app_event import app_was_created
+from extensions.ext_database import db
+from extensions.ext_redis import redis_client
+from libs.db_migration_lock import DbMigrationAutoRenewLock
+from libs.rsa import generate_key_pair
+from models import Tenant
+from models.model import App, AppMode, Conversation
+from models.provider import Provider, ProviderModel
+
+logger = logging.getLogger(__name__)
+
+DB_UPGRADE_LOCK_TTL_SECONDS = 60
+
+
+@click.command(
+    "reset-encrypt-key-pair",
+    help="Reset the asymmetric key pair of workspace for encrypt LLM credentials. "
+    "After the reset, all LLM credentials will become invalid, "
+    "requiring re-entry."
+    "Only support SELF_HOSTED mode.",
+)
+@click.confirmation_option(
+    prompt=click.style(
+        "Are you sure you want to reset encrypt key pair? This operation cannot be rolled back!", fg="red"
+    )
+)
+def reset_encrypt_key_pair():
+    """
+    Reset the encrypted key pair of workspace for encrypt LLM credentials.
+    After the reset, all LLM credentials will become invalid, requiring re-entry.
+    Only support SELF_HOSTED mode.
+    """
+    if dify_config.EDITION != "SELF_HOSTED":
+        click.echo(click.style("This command is only for SELF_HOSTED installations.", fg="red"))
+        return
+    with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
+        tenants = session.query(Tenant).all()
+        for tenant in tenants:
+            if not tenant:
+                click.echo(click.style("No workspaces found. Run /install first.", fg="red"))
+                return
+
+            tenant.encrypt_public_key = generate_key_pair(tenant.id)
+
+            session.query(Provider).where(Provider.provider_type == "custom", Provider.tenant_id == tenant.id).delete()
+            session.query(ProviderModel).where(ProviderModel.tenant_id == tenant.id).delete()
+
+            click.echo(
+                click.style(
+                    f"Congratulations! The asymmetric key pair of workspace {tenant.id} has been reset.",
+                    fg="green",
+                )
+            )
+
+
+@click.command("convert-to-agent-apps", help="Convert Agent Assistant to Agent App.")
+def convert_to_agent_apps():
+    """
+    Convert Agent Assistant to Agent App.
+    """
+    click.echo(click.style("Starting convert to agent apps.", fg="green"))
+
+    proceeded_app_ids = []
+
+    while True:
+        # fetch first 1000 apps
+        sql_query = """SELECT a.id AS id FROM apps a
+            INNER JOIN app_model_configs am ON a.app_model_config_id=am.id
+            WHERE a.mode = 'chat'
+            AND am.agent_mode is not null
+            AND (
+                am.agent_mode like '%"strategy": "function_call"%'
+                OR am.agent_mode  like '%"strategy": "react"%'
+            )
+            AND (
+                am.agent_mode like '{"enabled": true%'
+                OR am.agent_mode like '{"max_iteration": %'
+            ) ORDER BY a.created_at DESC LIMIT 1000
+        """
+
+        with db.engine.begin() as conn:
+            rs = conn.execute(sa.text(sql_query))
+
+            apps = []
+            for i in rs:
+                app_id = str(i.id)
+                if app_id not in proceeded_app_ids:
+                    proceeded_app_ids.append(app_id)
+                    app = db.session.query(App).where(App.id == app_id).first()
+                    if app is not None:
+                        apps.append(app)
+
+            if len(apps) == 0:
+                break
+
+        for app in apps:
+            click.echo(f"Converting app: {app.id}")
+
+            try:
+                app.mode = AppMode.AGENT_CHAT
+                db.session.commit()
+
+                # update conversation mode to agent
+                db.session.query(Conversation).where(Conversation.app_id == app.id).update(
+                    {Conversation.mode: AppMode.AGENT_CHAT}
+                )
+
+                db.session.commit()
+                click.echo(click.style(f"Converted app: {app.id}", fg="green"))
+            except Exception as e:
+                click.echo(click.style(f"Convert app error: {e.__class__.__name__} {str(e)}", fg="red"))
+
+    click.echo(click.style(f"Conversion complete. Converted {len(proceeded_app_ids)} agent apps.", fg="green"))
+
+
+@click.command("upgrade-db", help="Upgrade the database")
+def upgrade_db():
+    click.echo("Preparing database migration...")
+    lock = DbMigrationAutoRenewLock(
+        redis_client=redis_client,
+        name="db_upgrade_lock",
+        ttl_seconds=DB_UPGRADE_LOCK_TTL_SECONDS,
+        logger=logger,
+        log_context="db_migration",
+    )
+    if lock.acquire(blocking=False):
+        migration_succeeded = False
+        try:
+            click.echo(click.style("Starting database migration.", fg="green"))
+
+            # run db migration
+            import flask_migrate
+
+            flask_migrate.upgrade()
+
+            migration_succeeded = True
+            click.echo(click.style("Database migration successful!", fg="green"))
+
+        except Exception as e:
+            logger.exception("Failed to execute database migration")
+            click.echo(click.style(f"Database migration failed: {e}", fg="red"))
+            raise SystemExit(1)
+        finally:
+            status = "successful" if migration_succeeded else "failed"
+            lock.release_safely(status=status)
+    else:
+        click.echo("Database migration skipped")
+
+
+@click.command("fix-app-site-missing", help="Fix app related site missing issue.")
+def fix_app_site_missing():
+    """
+    Fix app related site missing issue.
+    """
+    click.echo(click.style("Starting fix for missing app-related sites.", fg="green"))
+
+    failed_app_ids = []
+    while True:
+        sql = """select apps.id as id from apps left join sites on sites.app_id=apps.id
+where sites.id is null limit 1000"""
+        with db.engine.begin() as conn:
+            rs = conn.execute(sa.text(sql))
+
+            processed_count = 0
+            for i in rs:
+                processed_count += 1
+                app_id = str(i.id)
+
+                if app_id in failed_app_ids:
+                    continue
+
+                try:
+                    app = db.session.query(App).where(App.id == app_id).first()
+                    if not app:
+                        logger.info("App %s not found", app_id)
+                        continue
+
+                    tenant = app.tenant
+                    if tenant:
+                        accounts = tenant.get_accounts()
+                        if not accounts:
+                            logger.info("Fix failed for app %s", app.id)
+                            continue
+
+                        account = accounts[0]
+                        logger.info("Fixing missing site for app %s", app.id)
+                        app_was_created.send(app, account=account)
+                except Exception:
+                    failed_app_ids.append(app_id)
+                    click.echo(click.style(f"Failed to fix missing site for app {app_id}", fg="red"))
+                    logger.exception("Failed to fix app related site missing issue, app_id: %s", app_id)
+                    continue
+
+            if not processed_count:
+                break
+
+    click.echo(click.style("Fix for missing app-related sites completed successfully!", fg="green"))
--- a/api/commands/vector.py
+++ b/api/commands/vector.py
@@ -0,0 +1,466 @@
+import json
+
+import click
+from flask import current_app
+from sqlalchemy import select
+from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.orm import sessionmaker
+
+from configs import dify_config
+from core.rag.datasource.vdb.vector_factory import Vector
+from core.rag.datasource.vdb.vector_type import VectorType
+from core.rag.index_processor.constant.built_in_field import BuiltInField
+from core.rag.models.document import ChildDocument, Document
+from extensions.ext_database import db
+from models.dataset import Dataset, DatasetCollectionBinding, DatasetMetadata, DatasetMetadataBinding, DocumentSegment
+from models.dataset import Document as DatasetDocument
+from models.model import App, AppAnnotationSetting, MessageAnnotation
+
+
+@click.command("vdb-migrate", help="Migrate vector db.")
+@click.option("--scope", default="all", prompt=False, help="The scope of vector database to migrate, Default is All.")
+def vdb_migrate(scope: str):
+    if scope in {"knowledge", "all"}:
+        migrate_knowledge_vector_database()
+    if scope in {"annotation", "all"}:
+        migrate_annotation_vector_database()
+
+
+def migrate_annotation_vector_database():
+    """
+    Migrate annotation datas to target vector database .
+    """
+    click.echo(click.style("Starting annotation data migration.", fg="green"))
+    create_count = 0
+    skipped_count = 0
+    total_count = 0
+    page = 1
+    while True:
+        try:
+            # get apps info
+            per_page = 50
+            with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
+                apps = (
+                    session.query(App)
+                    .where(App.status == "normal")
+                    .order_by(App.created_at.desc())
+                    .limit(per_page)
+                    .offset((page - 1) * per_page)
+                    .all()
+                )
+            if not apps:
+                break
+        except SQLAlchemyError:
+            raise
+
+        page += 1
+        for app in apps:
+            total_count = total_count + 1
+            click.echo(
+                f"Processing the {total_count} app {app.id}. " + f"{create_count} created, {skipped_count} skipped."
+            )
+            try:
+                click.echo(f"Creating app annotation index: {app.id}")
+                with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
+                    app_annotation_setting = (
+                        session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app.id).first()
+                    )
+
+                    if not app_annotation_setting:
+                        skipped_count = skipped_count + 1
+                        click.echo(f"App annotation setting disabled: {app.id}")
+                        continue
+                    # get dataset_collection_binding info
+                    dataset_collection_binding = (
+                        session.query(DatasetCollectionBinding)
+                        .where(DatasetCollectionBinding.id == app_annotation_setting.collection_binding_id)
+                        .first()
+                    )
+                    if not dataset_collection_binding:
+                        click.echo(f"App annotation collection binding not found: {app.id}")
+                        continue
+                    annotations = session.scalars(
+                        select(MessageAnnotation).where(MessageAnnotation.app_id == app.id)
+                    ).all()
+                dataset = Dataset(
+                    id=app.id,
+                    tenant_id=app.tenant_id,
+                    indexing_technique="high_quality",
+                    embedding_model_provider=dataset_collection_binding.provider_name,
+                    embedding_model=dataset_collection_binding.model_name,
+                    collection_binding_id=dataset_collection_binding.id,
+                )
+                documents = []
+                if annotations:
+                    for annotation in annotations:
+                        document = Document(
+                            page_content=annotation.question_text,
+                            metadata={"annotation_id": annotation.id, "app_id": app.id, "doc_id": annotation.id},
+                        )
+                        documents.append(document)
+
+                vector = Vector(dataset, attributes=["doc_id", "annotation_id", "app_id"])
+                click.echo(f"Migrating annotations for app: {app.id}.")
+
+                try:
+                    vector.delete()
+                    click.echo(click.style(f"Deleted vector index for app {app.id}.", fg="green"))
+                except Exception as e:
+                    click.echo(click.style(f"Failed to delete vector index for app {app.id}.", fg="red"))
+                    raise e
+                if documents:
+                    try:
+                        click.echo(
+                            click.style(
+                                f"Creating vector index with {len(documents)} annotations for app {app.id}.",
+                                fg="green",
+                            )
+                        )
+                        vector.create(documents)
+                        click.echo(click.style(f"Created vector index for app {app.id}.", fg="green"))
+                    except Exception as e:
+                        click.echo(click.style(f"Failed to created vector index for app {app.id}.", fg="red"))
+                        raise e
+                click.echo(f"Successfully migrated app annotation {app.id}.")
+                create_count += 1
+            except Exception as e:
+                click.echo(
+                    click.style(f"Error creating app annotation index: {e.__class__.__name__} {str(e)}", fg="red")
+                )
+                continue
+
+    click.echo(
+        click.style(
+            f"Migration complete. Created {create_count} app annotation indexes. Skipped {skipped_count} apps.",
+            fg="green",
+        )
+    )
+
+
+def migrate_knowledge_vector_database():
+    """
+    Migrate vector database datas to target vector database .
+    """
+    click.echo(click.style("Starting vector database migration.", fg="green"))
+    create_count = 0
+    skipped_count = 0
+    total_count = 0
+    vector_type = dify_config.VECTOR_STORE
+    upper_collection_vector_types = {
+        VectorType.MILVUS,
+        VectorType.PGVECTOR,
+        VectorType.VASTBASE,
+        VectorType.RELYT,
+        VectorType.WEAVIATE,
+        VectorType.ORACLE,
+        VectorType.ELASTICSEARCH,
+        VectorType.OPENGAUSS,
+        VectorType.TABLESTORE,
+        VectorType.MATRIXONE,
+    }
+    lower_collection_vector_types = {
+        VectorType.ANALYTICDB,
+        VectorType.CHROMA,
+        VectorType.MYSCALE,
+        VectorType.PGVECTO_RS,
+        VectorType.TIDB_VECTOR,
+        VectorType.OPENSEARCH,
+        VectorType.TENCENT,
+        VectorType.BAIDU,
+        VectorType.VIKINGDB,
+        VectorType.UPSTASH,
+        VectorType.COUCHBASE,
+        VectorType.OCEANBASE,
+    }
+    page = 1
+    while True:
+        try:
+            stmt = (
+                select(Dataset).where(Dataset.indexing_technique == "high_quality").order_by(Dataset.created_at.desc())
+            )
+
+            datasets = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False)
+            if not datasets.items:
+                break
+        except SQLAlchemyError:
+            raise
+
+        page += 1
+        for dataset in datasets:
+            total_count = total_count + 1
+            click.echo(
+                f"Processing the {total_count} dataset {dataset.id}. {create_count} created, {skipped_count} skipped."
+            )
+            try:
+                click.echo(f"Creating dataset vector database index: {dataset.id}")
+                if dataset.index_struct_dict:
+                    if dataset.index_struct_dict["type"] == vector_type:
+                        skipped_count = skipped_count + 1
+                        continue
+                collection_name = ""
+                dataset_id = dataset.id
+                if vector_type in upper_collection_vector_types:
+                    collection_name = Dataset.gen_collection_name_by_id(dataset_id)
+                elif vector_type == VectorType.QDRANT:
+                    if dataset.collection_binding_id:
+                        dataset_collection_binding = (
+                            db.session.query(DatasetCollectionBinding)
+                            .where(DatasetCollectionBinding.id == dataset.collection_binding_id)
+                            .one_or_none()
+                        )
+                        if dataset_collection_binding:
+                            collection_name = dataset_collection_binding.collection_name
+                        else:
+                            raise ValueError("Dataset Collection Binding not found")
+                    else:
+                        collection_name = Dataset.gen_collection_name_by_id(dataset_id)
+
+                elif vector_type in lower_collection_vector_types:
+                    collection_name = Dataset.gen_collection_name_by_id(dataset_id).lower()
+                else:
+                    raise ValueError(f"Vector store {vector_type} is not supported.")
+
+                index_struct_dict = {"type": vector_type, "vector_store": {"class_prefix": collection_name}}
+                dataset.index_struct = json.dumps(index_struct_dict)
+                vector = Vector(dataset)
+                click.echo(f"Migrating dataset {dataset.id}.")
+
+                try:
+                    vector.delete()
+                    click.echo(
+                        click.style(f"Deleted vector index {collection_name} for dataset {dataset.id}.", fg="green")
+                    )
+                except Exception as e:
+                    click.echo(
+                        click.style(
+                            f"Failed to delete vector index {collection_name} for dataset {dataset.id}.", fg="red"
+                        )
+                    )
+                    raise e
+
+                dataset_documents = db.session.scalars(
+                    select(DatasetDocument).where(
+                        DatasetDocument.dataset_id == dataset.id,
+                        DatasetDocument.indexing_status == "completed",
+                        DatasetDocument.enabled == True,
+                        DatasetDocument.archived == False,
+                    )
+                ).all()
+
+                documents = []
+                segments_count = 0
+                for dataset_document in dataset_documents:
+                    segments = db.session.scalars(
+                        select(DocumentSegment).where(
+                            DocumentSegment.document_id == dataset_document.id,
+                            DocumentSegment.status == "completed",
+                            DocumentSegment.enabled == True,
+                        )
+                    ).all()
+
+                    for segment in segments:
+                        document = Document(
+                            page_content=segment.content,
+                            metadata={
+                                "doc_id": segment.index_node_id,
+                                "doc_hash": segment.index_node_hash,
+                                "document_id": segment.document_id,
+                                "dataset_id": segment.dataset_id,
+                            },
+                        )
+                        if dataset_document.doc_form == "hierarchical_model":
+                            child_chunks = segment.get_child_chunks()
+                            if child_chunks:
+                                child_documents = []
+                                for child_chunk in child_chunks:
+                                    child_document = ChildDocument(
+                                        page_content=child_chunk.content,
+                                        metadata={
+                                            "doc_id": child_chunk.index_node_id,
+                                            "doc_hash": child_chunk.index_node_hash,
+                                            "document_id": segment.document_id,
+                                            "dataset_id": segment.dataset_id,
+                                        },
+                                    )
+                                    child_documents.append(child_document)
+                                document.children = child_documents
+
+                        documents.append(document)
+                        segments_count = segments_count + 1
+
+                if documents:
+                    try:
+                        click.echo(
+                            click.style(
+                                f"Creating vector index with {len(documents)} documents of {segments_count}"
+                                f" segments for dataset {dataset.id}.",
+                                fg="green",
+                            )
+                        )
+                        all_child_documents = []
+                        for doc in documents:
+                            if doc.children:
+                                all_child_documents.extend(doc.children)
+                        vector.create(documents)
+                        if all_child_documents:
+                            vector.create(all_child_documents)
+                        click.echo(click.style(f"Created vector index for dataset {dataset.id}.", fg="green"))
+                    except Exception as e:
+                        click.echo(click.style(f"Failed to created vector index for dataset {dataset.id}.", fg="red"))
+                        raise e
+                db.session.add(dataset)
+                db.session.commit()
+                click.echo(f"Successfully migrated dataset {dataset.id}.")
+                create_count += 1
+            except Exception as e:
+                db.session.rollback()
+                click.echo(click.style(f"Error creating dataset index: {e.__class__.__name__} {str(e)}", fg="red"))
+                continue
+
+    click.echo(
+        click.style(
+            f"Migration complete. Created {create_count} dataset indexes. Skipped {skipped_count} datasets.", fg="green"
+        )
+    )
+
+
+@click.command("add-qdrant-index", help="Add Qdrant index.")
+@click.option("--field", default="metadata.doc_id", prompt=False, help="Index field , default is metadata.doc_id.")
+def add_qdrant_index(field: str):
+    click.echo(click.style("Starting Qdrant index creation.", fg="green"))
+
+    create_count = 0
+
+    try:
+        bindings = db.session.query(DatasetCollectionBinding).all()
+        if not bindings:
+            click.echo(click.style("No dataset collection bindings found.", fg="red"))
+            return
+        import qdrant_client
+        from qdrant_client.http.exceptions import UnexpectedResponse
+        from qdrant_client.http.models import PayloadSchemaType
+
+        from core.rag.datasource.vdb.qdrant.qdrant_vector import PathQdrantParams, QdrantConfig
+
+        for binding in bindings:
+            if dify_config.QDRANT_URL is None:
+                raise ValueError("Qdrant URL is required.")
+            qdrant_config = QdrantConfig(
+                endpoint=dify_config.QDRANT_URL,
+                api_key=dify_config.QDRANT_API_KEY,
+                root_path=current_app.root_path,
+                timeout=dify_config.QDRANT_CLIENT_TIMEOUT,
+                grpc_port=dify_config.QDRANT_GRPC_PORT,
+                prefer_grpc=dify_config.QDRANT_GRPC_ENABLED,
+            )
+            try:
+                params = qdrant_config.to_qdrant_params()
+                # Check the type before using
+                if isinstance(params, PathQdrantParams):
+                    # PathQdrantParams case
+                    client = qdrant_client.QdrantClient(path=params.path)
+                else:
+                    # UrlQdrantParams case - params is UrlQdrantParams
+                    client = qdrant_client.QdrantClient(
+                        url=params.url,
+                        api_key=params.api_key,
+                        timeout=int(params.timeout),
+                        verify=params.verify,
+                        grpc_port=params.grpc_port,
+                        prefer_grpc=params.prefer_grpc,
+                    )
+                # create payload index
+                client.create_payload_index(binding.collection_name, field, field_schema=PayloadSchemaType.KEYWORD)
+                create_count += 1
+            except UnexpectedResponse as e:
+                # Collection does not exist, so return
+                if e.status_code == 404:
+                    click.echo(click.style(f"Collection not found: {binding.collection_name}.", fg="red"))
+                    continue
+                # Some other error occurred, so re-raise the exception
+                else:
+                    click.echo(
+                        click.style(
+                            f"Failed to create Qdrant index for collection: {binding.collection_name}.", fg="red"
+                        )
+                    )
+
+    except Exception:
+        click.echo(click.style("Failed to create Qdrant client.", fg="red"))
+
+    click.echo(click.style(f"Index creation complete. Created {create_count} collection indexes.", fg="green"))
+
+
+@click.command("old-metadata-migration", help="Old metadata migration.")
+def old_metadata_migration():
+    """
+    Old metadata migration.
+    """
+    click.echo(click.style("Starting old metadata migration.", fg="green"))
+
+    page = 1
+    while True:
+        try:
+            stmt = (
+                select(DatasetDocument)
+                .where(DatasetDocument.doc_metadata.is_not(None))
+                .order_by(DatasetDocument.created_at.desc())
+            )
+            documents = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False)
+        except SQLAlchemyError:
+            raise
+        if not documents:
+            break
+        for document in documents:
+            if document.doc_metadata:
+                doc_metadata = document.doc_metadata
+                for key in doc_metadata:
+                    for field in BuiltInField:
+                        if field.value == key:
+                            break
+                    else:
+                        dataset_metadata = (
+                            db.session.query(DatasetMetadata)
+                            .where(DatasetMetadata.dataset_id == document.dataset_id, DatasetMetadata.name == key)
+                            .first()
+                        )
+                        if not dataset_metadata:
+                            dataset_metadata = DatasetMetadata(
+                                tenant_id=document.tenant_id,
+                                dataset_id=document.dataset_id,
+                                name=key,
+                                type="string",
+                                created_by=document.created_by,
+                            )
+                            db.session.add(dataset_metadata)
+                            db.session.flush()
+                            dataset_metadata_binding = DatasetMetadataBinding(
+                                tenant_id=document.tenant_id,
+                                dataset_id=document.dataset_id,
+                                metadata_id=dataset_metadata.id,
+                                document_id=document.id,
+                                created_by=document.created_by,
+                            )
+                            db.session.add(dataset_metadata_binding)
+                        else:
+                            dataset_metadata_binding = (
+                                db.session.query(DatasetMetadataBinding)  # type: ignore
+                                .where(
+                                    DatasetMetadataBinding.dataset_id == document.dataset_id,
+                                    DatasetMetadataBinding.document_id == document.id,
+                                    DatasetMetadataBinding.metadata_id == dataset_metadata.id,
+                                )
+                                .first()
+                            )
+                            if not dataset_metadata_binding:
+                                dataset_metadata_binding = DatasetMetadataBinding(
+                                    tenant_id=document.tenant_id,
+                                    dataset_id=document.dataset_id,
+                                    metadata_id=dataset_metadata.id,
+                                    document_id=document.id,
+                                    created_by=document.created_by,
+                                )
+                                db.session.add(dataset_metadata_binding)
+                        db.session.commit()
+        page += 1
+    click.echo(click.style("Old metadata migration completed.", fg="green"))
--- a/api/configs/middleware/vdb/weaviate_config.py
+++ b/api/configs/middleware/vdb/weaviate_config.py
@@ -17,11 +17,6 @@ class WeaviateConfig(BaseSettings):
        default=None,
    )

-    WEAVIATE_GRPC_ENABLED: bool = Field(
-        description="Whether to enable gRPC for Weaviate connection (True for gRPC, False for HTTP)",
-        default=True,
-    )
-
    WEAVIATE_GRPC_ENDPOINT: str | None = Field(
        description="URL of the Weaviate gRPC server (e.g., 'grpc://localhost:50051' or 'grpcs://weaviate.example.com:443')",
        default=None,
--- a/api/controllers/console/init.py
+++ b/api/controllers/console/init.py
@@ -39,6 +39,7 @@ from . import (
    feature,
    human_input_form,
    init_validate,
+    notification,
    ping,
    setup,
    spec,
@@ -184,6 +185,7 @@ __all__ = [
    "model_config",
    "model_providers",
    "models",
+    "notification",
    "oauth",
    "oauth_server",
    "ops_trace",
--- a/api/controllers/console/admin.py
+++ b/api/controllers/console/admin.py
@@ -1,3 +1,5 @@
+import csv
+import io
 from collections.abc import Callable
 from functools import wraps
 from typing import ParamSpec, TypeVar
@@ -6,7 +8,7 @@ from flask import request
 from flask_restx import Resource
 from pydantic import BaseModel, Field, field_validator
 from sqlalchemy import select
-from werkzeug.exceptions import NotFound, Unauthorized
+from werkzeug.exceptions import BadRequest, NotFound, Unauthorized

 from configs import dify_config
 from constants.languages import supported_language
@@ -16,6 +18,7 @@ from core.db.session_factory import session_factory
 from extensions.ext_database import db
 from libs.token import extract_access_token
 from models.model import App, ExporleBanner, InstalledApp, RecommendedApp, TrialApp
+from services.billing_service import BillingService

 P = ParamSpec("P")
 R = TypeVar("R")
@@ -277,3 +280,168 @@ class DeleteExploreBannerApi(Resource):
        db.session.commit()

        return {"result": "success"}, 204
+
+
+class LangContentPayload(BaseModel):
+    lang: str = Field(..., description="Language tag: 'zh' | 'en' | 'jp'")
+    title: str = Field(...)
+    subtitle: str | None = Field(default=None)
+    body: str = Field(...)
+    title_pic_url: str | None = Field(default=None)
+
+
+class UpsertNotificationPayload(BaseModel):
+    notification_id: str | None = Field(default=None, description="Omit to create; supply UUID to update")
+    contents: list[LangContentPayload] = Field(..., min_length=1)
+    start_time: str | None = Field(default=None, description="RFC3339, e.g. 2026-03-01T00:00:00Z")
+    end_time: str | None = Field(default=None, description="RFC3339, e.g. 2026-03-20T23:59:59Z")
+    frequency: str = Field(default="once", description="'once' | 'every_page_load'")
+    status: str = Field(default="active", description="'active' | 'inactive'")
+
+
+class BatchAddNotificationAccountsPayload(BaseModel):
+    notification_id: str = Field(...)
+    user_email: list[str] = Field(..., description="List of account email addresses")
+
+
+console_ns.schema_model(
+    UpsertNotificationPayload.__name__,
+    UpsertNotificationPayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
+
+console_ns.schema_model(
+    BatchAddNotificationAccountsPayload.__name__,
+    BatchAddNotificationAccountsPayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
+
+
+@console_ns.route("/admin/upsert_notification")
+class UpsertNotificationApi(Resource):
+    @console_ns.doc("upsert_notification")
+    @console_ns.doc(
+        description=(
+            "Create or update an in-product notification. "
+            "Supply notification_id to update an existing one; omit it to create a new one. "
+            "Pass at least one language variant in contents (zh / en / jp)."
+        )
+    )
+    @console_ns.expect(console_ns.models[UpsertNotificationPayload.__name__])
+    @console_ns.response(200, "Notification upserted successfully")
+    @only_edition_cloud
+    @admin_required
+    def post(self):
+        payload = UpsertNotificationPayload.model_validate(console_ns.payload)
+        result = BillingService.upsert_notification(
+            contents=[c.model_dump() for c in payload.contents],
+            frequency=payload.frequency,
+            status=payload.status,
+            notification_id=payload.notification_id,
+            start_time=payload.start_time,
+            end_time=payload.end_time,
+        )
+        return {"result": "success", "notification_id": result.get("notificationId")}, 200
+
+
+@console_ns.route("/admin/batch_add_notification_accounts")
+class BatchAddNotificationAccountsApi(Resource):
+    @console_ns.doc("batch_add_notification_accounts")
+    @console_ns.doc(
+        description=(
+            "Register target accounts for a notification by email address. "
+            'JSON body: {"notification_id": "...", "user_email": ["a@example.com", ...]}. '
+            "File upload: multipart/form-data with a 'file' field (CSV or TXT, one email per line) "
+            "plus a 'notification_id' field. "
+            "Emails that do not match any account are silently skipped."
+        )
+    )
+    @console_ns.response(200, "Accounts added successfully")
+    @only_edition_cloud
+    @admin_required
+    def post(self):
+        from models.account import Account
+
+        if "file" in request.files:
+            notification_id = request.form.get("notification_id", "").strip()
+            if not notification_id:
+                raise BadRequest("notification_id is required.")
+            emails = self._parse_emails_from_file()
+        else:
+            payload = BatchAddNotificationAccountsPayload.model_validate(console_ns.payload)
+            notification_id = payload.notification_id
+            emails = payload.user_email
+
+        if not emails:
+            raise BadRequest("No valid email addresses provided.")
+
+        # Resolve emails → account IDs in chunks to avoid large IN-clause
+        account_ids: list[str] = []
+        chunk_size = 500
+        for i in range(0, len(emails), chunk_size):
+            chunk = emails[i : i + chunk_size]
+            rows = db.session.execute(select(Account.id, Account.email).where(Account.email.in_(chunk))).all()
+            account_ids.extend(str(row.id) for row in rows)
+
+        if not account_ids:
+            raise BadRequest("None of the provided emails matched an existing account.")
+
+        # Send to dify-saas in batches of 1000
+        total_count = 0
+        batch_size = 1000
+        for i in range(0, len(account_ids), batch_size):
+            batch = account_ids[i : i + batch_size]
+            result = BillingService.batch_add_notification_accounts(
+                notification_id=notification_id,
+                account_ids=batch,
+            )
+            total_count += result.get("count", 0)
+
+        return {
+            "result": "success",
+            "emails_provided": len(emails),
+            "accounts_matched": len(account_ids),
+            "count": total_count,
+        }, 200
+
+    @staticmethod
+    def _parse_emails_from_file() -> list[str]:
+        """Parse email addresses from an uploaded CSV or TXT file."""
+        file = request.files["file"]
+        if not file.filename:
+            raise BadRequest("Uploaded file has no filename.")
+
+        filename_lower = file.filename.lower()
+        if not filename_lower.endswith((".csv", ".txt")):
+            raise BadRequest("Invalid file type. Only CSV (.csv) and TXT (.txt) files are allowed.")
+
+        try:
+            content = file.read().decode("utf-8")
+        except UnicodeDecodeError:
+            try:
+                file.seek(0)
+                content = file.read().decode("gbk")
+            except UnicodeDecodeError:
+                raise BadRequest("Unable to decode the file. Please use UTF-8 or GBK encoding.")
+
+        emails: list[str] = []
+        if filename_lower.endswith(".csv"):
+            reader = csv.reader(io.StringIO(content))
+            for row in reader:
+                for cell in row:
+                    cell = cell.strip()
+                    if cell:
+                        emails.append(cell)
+        else:
+            for line in content.splitlines():
+                line = line.strip()
+                if line:
+                    emails.append(line)
+
+        # Deduplicate while preserving order
+        seen: set[str] = set()
+        unique_emails: list[str] = []
+        for email in emails:
+            if email.lower() not in seen:
+                seen.add(email.lower())
+                unique_emails.append(email)
+
+        return unique_emails
--- a/api/controllers/console/notification.py
+++ b/api/controllers/console/notification.py
@@ -0,0 +1,90 @@
+from flask import request
+from flask_restx import Resource
+from pydantic import BaseModel, Field
+
+from controllers.console import console_ns
+from controllers.console.wraps import account_initialization_required, only_edition_cloud, setup_required
+from libs.login import current_account_with_tenant, login_required
+from services.billing_service import BillingService
+
+# Notification content is stored under three lang tags.
+_FALLBACK_LANG = "en-US"
+
+
+def _pick_lang_content(contents: dict, lang: str) -> dict:
+    """Return the single LangContent for *lang*, falling back to English."""
+    return contents.get(lang) or contents.get(_FALLBACK_LANG) or next(iter(contents.values()), {})
+
+
+class DismissNotificationPayload(BaseModel):
+    notification_id: str = Field(...)
+
+
+@console_ns.route("/notification")
+class NotificationApi(Resource):
+    @console_ns.doc("get_notification")
+    @console_ns.doc(
+        description=(
+            "Return the active in-product notification for the current user "
+            "in their interface language (falls back to English if unavailable). "
+            "The notification is NOT marked as seen here; call POST /notification/dismiss "
+            "when the user explicitly closes the modal."
+        ),
+        responses={
+            200: "Success — inspect should_show to decide whether to render the modal",
+            401: "Unauthorized",
+        },
+    )
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @only_edition_cloud
+    def get(self):
+        current_user, _ = current_account_with_tenant()
+
+        result = BillingService.get_account_notification(str(current_user.id))
+
+        # Proto JSON uses camelCase field names (Kratos default marshaling).
+        if not result.get("shouldShow"):
+            return {"should_show": False, "notifications": []}, 200
+
+        lang = current_user.interface_language or _FALLBACK_LANG
+
+        notifications = []
+        for notification in result.get("notifications") or []:
+            contents: dict = notification.get("contents") or {}
+            lang_content = _pick_lang_content(contents, lang)
+            notifications.append(
+                {
+                    "notification_id": notification.get("notificationId"),
+                    "frequency": notification.get("frequency"),
+                    "lang": lang_content.get("lang", lang),
+                    "title": lang_content.get("title", ""),
+                    "subtitle": lang_content.get("subtitle", ""),
+                    "body": lang_content.get("body", ""),
+                    "title_pic_url": lang_content.get("titlePicUrl", ""),
+                }
+            )
+
+        return {"should_show": bool(notifications), "notifications": notifications}, 200
+
+
+@console_ns.route("/notification/dismiss")
+class NotificationDismissApi(Resource):
+    @console_ns.doc("dismiss_notification")
+    @console_ns.doc(
+        description="Mark a notification as dismissed for the current user.",
+        responses={200: "Success", 401: "Unauthorized"},
+    )
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @only_edition_cloud
+    def post(self):
+        current_user, _ = current_account_with_tenant()
+        payload = DismissNotificationPayload.model_validate(request.get_json())
+        BillingService.dismiss_notification(
+            notification_id=payload.notification_id,
+            account_id=str(current_user.id),
+        )
+        return {"result": "success"}, 200
--- a/api/controllers/console/workspace/plugin.py
+++ b/api/controllers/console/workspace/plugin.py
@@ -5,6 +5,7 @@ from typing import Any, Literal
 from flask import request, send_file
 from flask_restx import Resource
 from pydantic import BaseModel, Field
+from werkzeug.datastructures import FileStorage
 from werkzeug.exceptions import Forbidden

 from configs import dify_config
@@ -169,6 +170,20 @@ register_enum_models(
 )


+def _read_upload_content(file: FileStorage, max_size: int) -> bytes:
+    """
+    Read the uploaded file and validate its actual size before delegating to the plugin service.
+
+    FileStorage.content_length is not reliable for multipart test uploads and may be zero even when
+    content exists, so the controllers validate against the loaded bytes instead.
+    """
+    content = file.read()
+    if len(content) > max_size:
+        raise ValueError("File size exceeds the maximum allowed size")
+
+    return content
+
+
@console_ns.route("/workspaces/current/plugin/debugging-key")
 class PluginDebuggingKeyApi(Resource):
    @setup_required
@@ -284,12 +299,7 @@ class PluginUploadFromPkgApi(Resource):
        _, tenant_id = current_account_with_tenant()

        file = request.files["pkg"]
-
-        # check file size
-        if file.content_length > dify_config.PLUGIN_MAX_PACKAGE_SIZE:
-            raise ValueError("File size exceeds the maximum allowed size")
-
-        content = file.read()
+        content = _read_upload_content(file, dify_config.PLUGIN_MAX_PACKAGE_SIZE)
        try:
            response = PluginService.upload_pkg(tenant_id, content)
        except PluginDaemonClientSideError as e:
@@ -328,12 +338,7 @@ class PluginUploadFromBundleApi(Resource):
        _, tenant_id = current_account_with_tenant()

        file = request.files["bundle"]
-
-        # check file size
-        if file.content_length > dify_config.PLUGIN_MAX_BUNDLE_SIZE:
-            raise ValueError("File size exceeds the maximum allowed size")
-
-        content = file.read()
+        content = _read_upload_content(file, dify_config.PLUGIN_MAX_BUNDLE_SIZE)
        try:
            response = PluginService.upload_bundle(tenant_id, content)
        except PluginDaemonClientSideError as e:
--- a/api/controllers/inner_api/plugin/wraps.py
+++ b/api/controllers/inner_api/plugin/wraps.py
@@ -114,6 +114,7 @@ def get_user_tenant(view_func: Callable[P, R]):

 def plugin_data(view: Callable[P, R] | None = None, *, payload_type: type[BaseModel]):
    def decorator(view_func: Callable[P, R]):
+        @wraps(view_func)
        def decorated_view(*args: P.args, **kwargs: P.kwargs):
            try:
                data = request.get_json()
--- a/api/core/agent/cot_agent_runner.py
+++ b/api/core/agent/cot_agent_runner.py
@@ -6,6 +6,7 @@ from typing import Any

 from core.agent.base_agent_runner import BaseAgentRunner
 from core.agent.entities import AgentScratchpadUnit
+from core.agent.errors import AgentMaxIterationError
 from core.agent.output_parser.cot_output_parser import CotAgentOutputParser
 from core.app.apps.base_app_queue_manager import PublishFrom
 from core.app.entities.queue_entities import QueueAgentThoughtEvent, QueueMessageEndEvent, QueueMessageFileEvent
@@ -22,7 +23,6 @@ from dify_graph.model_runtime.entities.message_entities import (
    ToolPromptMessage,
    UserPromptMessage,
 )
-from dify_graph.nodes.agent.exc import AgentMaxIterationError
 from models.model import Message

 logger = logging.getLogger(__name__)
--- a/api/core/agent/errors.py
+++ b/api/core/agent/errors.py
@@ -0,0 +1,9 @@
+class AgentMaxIterationError(Exception):
+    """Raised when an agent runner exceeds the configured max iteration count."""
+
+    def __init__(self, max_iteration: int):
+        self.max_iteration = max_iteration
+        super().__init__(
+            f"Agent exceeded the maximum iteration limit of {max_iteration}. "
+            f"The agent was unable to complete the task within the allowed number of iterations."
+        )
--- a/api/core/agent/fc_agent_runner.py
+++ b/api/core/agent/fc_agent_runner.py
@@ -5,6 +5,7 @@ from copy import deepcopy
 from typing import Any, Union

 from core.agent.base_agent_runner import BaseAgentRunner
+from core.agent.errors import AgentMaxIterationError
 from core.app.apps.base_app_queue_manager import PublishFrom
 from core.app.entities.queue_entities import QueueAgentThoughtEvent, QueueMessageEndEvent, QueueMessageFileEvent
 from core.prompt.agent_history_prompt_transform import AgentHistoryPromptTransform
@@ -25,7 +26,6 @@ from dify_graph.model_runtime.entities import (
    UserPromptMessage,
 )
 from dify_graph.model_runtime.entities.message_entities import ImagePromptMessageContent, PromptMessageContentUnionTypes
-from dify_graph.nodes.agent.exc import AgentMaxIterationError
 from models.model import Message

 logger = logging.getLogger(__name__)
--- a/api/core/app/apps/advanced_chat/app_runner.py
+++ b/api/core/app/apps/advanced_chat/app_runner.py
@@ -138,20 +138,25 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
            query = self.application_generate_entity.query

            # moderation
-            if self.handle_input_moderation(
+            stop, new_inputs, new_query = self.handle_input_moderation(
                app_record=self._app,
                app_generate_entity=self.application_generate_entity,
                inputs=inputs,
                query=query,
                message_id=self.message.id,
-            ):
+            )
+            if stop:
                return

+            self.application_generate_entity.inputs = new_inputs
+            self.application_generate_entity.query = new_query
+            system_inputs.query = new_query
+
            # annotation reply
            if self.handle_annotation_reply(
                app_record=self._app,
                message=self.message,
-                query=query,
+                query=new_query,
                app_generate_entity=self.application_generate_entity,
            ):
                return
@@ -163,7 +168,7 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
            # init variable pool
            variable_pool = VariablePool(
                system_variables=system_inputs,
-                user_inputs=inputs,
+                user_inputs=new_inputs,
                environment_variables=self._workflow.environment_variables,
                # Based on the definition of `Variable`,
                # `VariableBase` instances can be safely used as `Variable` since they are compatible.
@@ -240,10 +245,10 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
        inputs: Mapping[str, Any],
        query: str,
        message_id: str,
-    ) -> bool:
+    ) -> tuple[bool, Mapping[str, Any], str]:
        try:
            # process sensitive_word_avoidance
-            _, inputs, query = self.moderation_for_inputs(
+            _, new_inputs, new_query = self.moderation_for_inputs(
                app_id=app_record.id,
                tenant_id=app_generate_entity.app_config.tenant_id,
                app_generate_entity=app_generate_entity,
@@ -253,9 +258,9 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
            )
        except ModerationError as e:
            self._complete_with_stream_output(text=str(e), stopped_by=QueueStopEvent.StopBy.INPUT_MODERATION)
-            return True
+            return True, inputs, query

-        return False
+        return False, new_inputs, new_query

    def handle_annotation_reply(
        self, app_record: App, message: Message, query: str, app_generate_entity: AdvancedChatAppGenerateEntity
--- a/api/core/app/apps/advanced_chat/generate_response_converter.py
+++ b/api/core/app/apps/advanced_chat/generate_response_converter.py
@@ -114,7 +114,7 @@ class AdvancedChatAppGenerateResponseConverter(AppGenerateResponseConverter):
                metadata = sub_stream_response_dict.get("metadata", {})
                sub_stream_response_dict["metadata"] = cls._get_simple_metadata(metadata)
                response_chunk.update(sub_stream_response_dict)
-            if isinstance(sub_stream_response, ErrorStreamResponse):
+            elif isinstance(sub_stream_response, ErrorStreamResponse):
                data = cls._error_to_stream_response(sub_stream_response.err)
                response_chunk.update(data)
            elif isinstance(sub_stream_response, NodeStartStreamResponse | NodeFinishStreamResponse):
--- a/api/core/app/apps/agent_chat/generate_response_converter.py
+++ b/api/core/app/apps/agent_chat/generate_response_converter.py
@@ -113,7 +113,7 @@ class AgentChatAppGenerateResponseConverter(AppGenerateResponseConverter):
                metadata = sub_stream_response_dict.get("metadata", {})
                sub_stream_response_dict["metadata"] = cls._get_simple_metadata(metadata)
                response_chunk.update(sub_stream_response_dict)
-            if isinstance(sub_stream_response, ErrorStreamResponse):
+            elif isinstance(sub_stream_response, ErrorStreamResponse):
                data = cls._error_to_stream_response(sub_stream_response.err)
                response_chunk.update(data)
            else:
--- a/api/core/app/apps/chat/generate_response_converter.py
+++ b/api/core/app/apps/chat/generate_response_converter.py
@@ -113,7 +113,7 @@ class ChatAppGenerateResponseConverter(AppGenerateResponseConverter):
                metadata = sub_stream_response_dict.get("metadata", {})
                sub_stream_response_dict["metadata"] = cls._get_simple_metadata(metadata)
                response_chunk.update(sub_stream_response_dict)
-            if isinstance(sub_stream_response, ErrorStreamResponse):
+            elif isinstance(sub_stream_response, ErrorStreamResponse):
                data = cls._error_to_stream_response(sub_stream_response.err)
                response_chunk.update(data)
            else:
--- a/api/core/app/apps/workflow_app_runner.py
+++ b/api/core/app/apps/workflow_app_runner.py
@@ -3,7 +3,10 @@ import time
 from collections.abc import Mapping, Sequence
 from typing import Any, cast

+from pydantic import ValidationError
+
 from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
+from core.app.entities.agent_strategy import AgentStrategyInfo
 from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom, build_dify_run_context
 from core.app.entities.queue_entities import (
    AppQueueEvent,
@@ -30,8 +33,10 @@ from core.app.entities.queue_entities import (
    QueueWorkflowSucceededEvent,
 )
 from core.workflow.node_factory import DifyNodeFactory
+from core.workflow.node_resolution import resolve_workflow_node_class
 from core.workflow.workflow_entry import WorkflowEntry
 from dify_graph.entities import GraphInitParams
+from dify_graph.entities.graph_config import NodeConfigDictAdapter
 from dify_graph.entities.pause_reason import HumanInputRequired
 from dify_graph.graph import Graph
 from dify_graph.graph_engine.layers.base import GraphEngineLayer
@@ -62,8 +67,6 @@ from dify_graph.graph_events import (
    NodeRunSucceededEvent,
 )
 from dify_graph.graph_events.graph import GraphRunAbortedEvent
-from dify_graph.nodes import NodeType
-from dify_graph.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING
 from dify_graph.runtime import GraphRuntimeState, VariablePool
 from dify_graph.system_variable import SystemVariable
 from dify_graph.variable_loader import DUMMY_VARIABLE_LOADER, VariableLoader, load_into_variable_pool
@@ -303,10 +306,12 @@ class WorkflowBasedAppRunner:
        if not target_node_config:
            raise ValueError(f"{node_type_label} node id not found in workflow graph")

+        target_node_config = NodeConfigDictAdapter.validate_python(target_node_config)
+
        # Get node class
-        node_type = NodeType(target_node_config.get("data", {}).get("type"))
-        node_version = target_node_config.get("data", {}).get("version", "1")
-        node_cls = NODE_TYPE_CLASSES_MAPPING[node_type][node_version]
+        node_type = target_node_config["data"].type
+        node_version = str(target_node_config["data"].version)
+        node_cls = resolve_workflow_node_class(node_type=node_type, node_version=node_version)

        # Use the variable pool from graph_runtime_state instead of creating a new one
        variable_pool = graph_runtime_state.variable_pool
@@ -334,6 +339,18 @@ class WorkflowBasedAppRunner:

        return graph, variable_pool

+    @staticmethod
+    def _build_agent_strategy_info(event: NodeRunStartedEvent) -> AgentStrategyInfo | None:
+        raw_agent_strategy = event.extras.get("agent_strategy")
+        if raw_agent_strategy is None:
+            return None
+
+        try:
+            return AgentStrategyInfo.model_validate(raw_agent_strategy)
+        except ValidationError:
+            logger.warning("Invalid agent strategy payload for node %s", event.node_id, exc_info=True)
+            return None
+
    def _handle_event(self, workflow_entry: WorkflowEntry, event: GraphEngineEvent):
        """
        Handle event
@@ -419,7 +436,7 @@ class WorkflowBasedAppRunner:
                    start_at=event.start_at,
                    in_iteration_id=event.in_iteration_id,
                    in_loop_id=event.in_loop_id,
-                    agent_strategy=event.agent_strategy,
+                    agent_strategy=self._build_agent_strategy_info(event),
                    provider_type=event.provider_type,
                    provider_id=event.provider_id,
                )
--- a/api/core/app/entities/init.py
+++ b/api/core/app/entities/init.py
@@ -0,0 +1,3 @@
+from .agent_strategy import AgentStrategyInfo
+
+__all__ = ["AgentStrategyInfo"]
--- a/api/core/app/entities/agent_strategy.py
+++ b/api/core/app/entities/agent_strategy.py
@@ -0,0 +1,8 @@
+from pydantic import BaseModel, ConfigDict
+
+
+class AgentStrategyInfo(BaseModel):
+    name: str
+    icon: str | None = None
+
+    model_config = ConfigDict(extra="forbid")
--- a/api/core/app/entities/queue_entities.py
+++ b/api/core/app/entities/queue_entities.py
@@ -5,8 +5,8 @@ from typing import Any

 from pydantic import BaseModel, ConfigDict, Field

+from core.app.entities.agent_strategy import AgentStrategyInfo
 from core.rag.entities.citation_metadata import RetrievalSourceMetadata
-from dify_graph.entities import AgentNodeStrategyInit
 from dify_graph.entities.pause_reason import PauseReason
 from dify_graph.entities.workflow_start_reason import WorkflowStartReason
 from dify_graph.enums import WorkflowNodeExecutionMetadataKey
@@ -314,7 +314,7 @@ class QueueNodeStartedEvent(AppQueueEvent):
    in_iteration_id: str | None = None
    in_loop_id: str | None = None
    start_at: datetime
-    agent_strategy: AgentNodeStrategyInit | None = None
+    agent_strategy: AgentStrategyInfo | None = None

    # FIXME(-LAN-): only for ToolNode, need to refactor
    provider_type: str  # should be a core.tools.entities.tool_entities.ToolProviderType
--- a/api/core/app/entities/task_entities.py
+++ b/api/core/app/entities/task_entities.py
@@ -4,8 +4,8 @@ from typing import Any

 from pydantic import BaseModel, ConfigDict, Field

+from core.app.entities.agent_strategy import AgentStrategyInfo
 from core.rag.entities.citation_metadata import RetrievalSourceMetadata
-from dify_graph.entities import AgentNodeStrategyInit
 from dify_graph.entities.workflow_start_reason import WorkflowStartReason
 from dify_graph.enums import WorkflowExecutionStatus, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus
 from dify_graph.model_runtime.entities.llm_entities import LLMResult, LLMUsage
@@ -349,7 +349,7 @@ class NodeStartStreamResponse(StreamResponse):
        extras: dict[str, object] = Field(default_factory=dict)
        iteration_id: str | None = None
        loop_id: str | None = None
-        agent_strategy: AgentNodeStrategyInit | None = None
+        agent_strategy: AgentStrategyInfo | None = None

    event: StreamEvent = StreamEvent.NODE_STARTED
    workflow_run_id: str
--- a/api/core/datasource/__base/datasource_provider.py
+++ b/api/core/datasource/__base/datasource_provider.py
@@ -59,8 +59,6 @@ class DatasourcePluginProviderController(ABC):
        :param credentials: the credentials of the tool
        """
        credentials_schema = dict[str, ProviderConfig]()
-        if credentials_schema is None:
-            return

        for credential in self.entity.credentials_schema:
            credentials_schema[credential.name] = credential
--- a/api/core/llm_generator/llm_generator.py
+++ b/api/core/llm_generator/llm_generator.py
@@ -193,7 +193,8 @@ class LLMGenerator:
                error_step = "generate rule config"
            except Exception as e:
                logger.exception("Failed to generate rule config, model: %s", args.model_config_data.name)
-                rule_config["error"] = str(e)
+                error = str(e)
+                error_step = "generate rule config"

            rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else ""

@@ -279,7 +280,8 @@ class LLMGenerator:

        except Exception as e:
            logger.exception("Failed to generate rule config, model: %s", args.model_config_data.name)
-            rule_config["error"] = str(e)
+            error = str(e)
+            error_step = "handle unexpected exception"

        rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else ""

--- a/api/core/plugin/entities/parameters.py
+++ b/api/core/plugin/entities/parameters.py
@@ -191,7 +191,7 @@ def cast_parameter_value(typ: StrEnum, value: Any, /):
    except ValueError:
        raise
    except Exception:
-        raise ValueError(f"The tool parameter value {value} is not in correct type of {as_normal_type(typ)}.")
+        raise ValueError(f"The tool parameter value {repr(value)} is not in correct type of {as_normal_type(typ)}.")


 def init_frontend_parameter(rule: PluginParameter, type: StrEnum, value: Any):
--- a/api/core/plugin/entities/plugin_daemon.py
+++ b/api/core/plugin/entities/plugin_daemon.py
@@ -157,6 +157,7 @@ class PluginInstallTaskPluginStatus(BaseModel):
    message: str = Field(description="The message of the install task.")
    icon: str = Field(description="The icon of the plugin.")
    labels: I18nObject = Field(description="The labels of the plugin.")
+    source: str | None = Field(default=None, description="The installation source of the plugin")


 class PluginInstallTask(BasePluginEntity):
--- a/api/core/rag/extractor/extract_processor.py
+++ b/api/core/rag/extractor/extract_processor.py
@@ -74,7 +74,8 @@ class ExtractProcessor:
                        else:
                            suffix = ""
            # https://stackoverflow.com/questions/26541416/generate-temporary-file-names-without-creating-actual-file-in-python#comment90414256_26541521
-            file_path = f"{temp_dir}/{tempfile.gettempdir()}{suffix}"
+            # Generate a temporary filename under the created temp_dir and ensure the directory exists
+            file_path = f"{temp_dir}/{next(tempfile._get_candidate_names())}{suffix}"  # type: ignore
            Path(file_path).write_bytes(response.content)
            extract_setting = ExtractSetting(datasource_type=DatasourceType.FILE, document_model="text_model")
            if return_text:
--- a/api/core/rag/extractor/word_extractor.py
+++ b/api/core/rag/extractor/word_extractor.py
@@ -204,26 +204,61 @@ class WordExtractor(BaseExtractor):
        return " ".join(unique_content)

    def _parse_cell_paragraph(self, paragraph, image_map):
-        paragraph_content = []
-        for run in paragraph.runs:
-            if run.element.xpath(".//a:blip"):
-                for blip in run.element.xpath(".//a:blip"):
-                    image_id = blip.get("{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed")
-                    if not image_id:
-                        continue
-                    rel = paragraph.part.rels.get(image_id)
-                    if rel is None:
-                        continue
-                    # For external images, use image_id as key; for internal, use target_part
-                    if rel.is_external:
-                        if image_id in image_map:
-                            paragraph_content.append(image_map[image_id])
-                    else:
-                        image_part = rel.target_part
-                        if image_part in image_map:
-                            paragraph_content.append(image_map[image_part])
-            else:
-                paragraph_content.append(run.text)
+        paragraph_content: list[str] = []
+
+        for child in paragraph._element:
+            tag = child.tag
+            if tag == qn("w:hyperlink"):
+                # Note: w:hyperlink elements may also use w:anchor for internal bookmarks.
+                # This extractor intentionally only converts external links (HTTP/mailto, etc.)
+                # that are backed by a relationship id (r:id) with rel.is_external == True.
+                # Hyperlinks without such an external rel (including anchor-only bookmarks)
+                # are left as plain text link_text.
+                r_id = child.get(qn("r:id"))
+                link_text_parts: list[str] = []
+                for run_elem in child.findall(qn("w:r")):
+                    run = Run(run_elem, paragraph)
+                    if run.text:
+                        link_text_parts.append(run.text)
+                link_text = "".join(link_text_parts).strip()
+                if r_id:
+                    try:
+                        rel = paragraph.part.rels.get(r_id)
+                        if rel:
+                            target_ref = getattr(rel, "target_ref", None)
+                            if target_ref:
+                                parsed_target = urlparse(str(target_ref))
+                                if rel.is_external or parsed_target.scheme in ("http", "https", "mailto"):
+                                    display_text = link_text or str(target_ref)
+                                    link_text = f"[{display_text}]({target_ref})"
+                    except Exception:
+                        logger.exception("Failed to resolve URL for hyperlink with r:id: %s", r_id)
+                if link_text:
+                    paragraph_content.append(link_text)
+
+            elif tag == qn("w:r"):
+                run = Run(child, paragraph)
+                if run.element.xpath(".//a:blip"):
+                    for blip in run.element.xpath(".//a:blip"):
+                        image_id = blip.get(
+                            "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed"
+                        )
+                        if not image_id:
+                            continue
+                        rel = paragraph.part.rels.get(image_id)
+                        if rel is None:
+                            continue
+                        if rel.is_external:
+                            if image_id in image_map:
+                                paragraph_content.append(image_map[image_id])
+                        else:
+                            image_part = rel.target_part
+                            if image_part in image_map:
+                                paragraph_content.append(image_map[image_part])
+                else:
+                    if run.text:
+                        paragraph_content.append(run.text)
+
        return "".join(paragraph_content).strip()

    def parse_docx(self, docx_path):
--- a/api/core/tools/builtin_tool/provider.py
+++ b/api/core/tools/builtin_tool/provider.py
@@ -113,17 +113,26 @@ class BuiltinToolProviderController(ToolProviderController):
        """
        return self.get_credentials_schema_by_type(CredentialType.API_KEY)

-    def get_credentials_schema_by_type(self, credential_type: str) -> list[ProviderConfig]:
+    def get_credentials_schema_by_type(self, credential_type: CredentialType | str) -> list[ProviderConfig]:
        """
        returns the credentials schema of the provider

-        :param credential_type: the type of the credential
-        :return: the credentials schema of the provider
+        :param credential_type: the type of the credential, as CredentialType or str; str values
+            are normalized via CredentialType.of and may raise ValueError for invalid values.
+        :return: list[ProviderConfig] for CredentialType.OAUTH2 or CredentialType.API_KEY, an
+            empty list for CredentialType.UNAUTHORIZED or missing schemas.
+
+        Reads from self.entity.oauth_schema and self.entity.credentials_schema.
+        Raises ValueError for invalid credential types.
        """
-        if credential_type == CredentialType.OAUTH2.value:
+        if isinstance(credential_type, str):
+            credential_type = CredentialType.of(credential_type)
+        if credential_type == CredentialType.OAUTH2:
            return self.entity.oauth_schema.credentials_schema.copy() if self.entity.oauth_schema else []
        if credential_type == CredentialType.API_KEY:
            return self.entity.credentials_schema.copy() if self.entity.credentials_schema else []
+        if credential_type == CredentialType.UNAUTHORIZED:
+            return []
        raise ValueError(f"Invalid credential type: {credential_type}")

    def get_oauth_client_schema(self) -> list[ProviderConfig]:
--- a/api/core/tools/tool_file_manager.py
+++ b/api/core/tools/tool_file_manager.py
@@ -137,6 +137,7 @@ class ToolFileManager:

            session.add(tool_file)
            session.commit()
+            session.refresh(tool_file)

        return tool_file

--- a/api/core/trigger/debug/event_selectors.py
+++ b/api/core/trigger/debug/event_selectors.py
@@ -19,6 +19,7 @@ from core.trigger.debug.events import (
    build_plugin_pool_key,
    build_webhook_pool_key,
 )
+from dify_graph.entities.graph_config import NodeConfigDict
 from dify_graph.enums import NodeType
 from dify_graph.nodes.trigger_plugin.entities import TriggerEventNodeData
 from dify_graph.nodes.trigger_schedule.entities import ScheduleConfig
@@ -41,10 +42,10 @@ class TriggerDebugEventPoller(ABC):
    app_id: str
    user_id: str
    tenant_id: str
-    node_config: Mapping[str, Any]
+    node_config: NodeConfigDict
    node_id: str

-    def __init__(self, tenant_id: str, user_id: str, app_id: str, node_config: Mapping[str, Any], node_id: str):
+    def __init__(self, tenant_id: str, user_id: str, app_id: str, node_config: NodeConfigDict, node_id: str):
        self.tenant_id = tenant_id
        self.user_id = user_id
        self.app_id = app_id
@@ -60,7 +61,7 @@ class PluginTriggerDebugEventPoller(TriggerDebugEventPoller):
    def poll(self) -> TriggerDebugEvent | None:
        from services.trigger.trigger_service import TriggerService

-        plugin_trigger_data = TriggerEventNodeData.model_validate(self.node_config.get("data", {}))
+        plugin_trigger_data = TriggerEventNodeData.model_validate(self.node_config["data"], from_attributes=True)
        provider_id = TriggerProviderID(plugin_trigger_data.provider_id)
        pool_key: str = build_plugin_pool_key(
            name=plugin_trigger_data.event_name,
--- a/api/core/workflow/node_factory.py
+++ b/api/core/workflow/node_factory.py
@@ -1,5 +1,5 @@
-from collections.abc import Mapping
-from typing import TYPE_CHECKING, Any, cast, final
+from collections.abc import Callable, Mapping
+from typing import TYPE_CHECKING, Any, TypeAlias, cast, final

 from sqlalchemy import select
 from sqlalchemy.orm import Session
@@ -22,7 +22,15 @@ from core.rag.retrieval.dataset_retrieval import DatasetRetrieval
 from core.rag.summary_index.summary_index import SummaryIndex
 from core.repositories.human_input_repository import HumanInputFormRepositoryImpl
 from core.tools.tool_file_manager import ToolFileManager
-from dify_graph.entities.graph_config import NodeConfigDict
+from core.workflow.node_resolution import resolve_workflow_node_class
+from core.workflow.nodes.agent.message_transformer import AgentMessageTransformer
+from core.workflow.nodes.agent.plugin_strategy_adapter import (
+    PluginAgentStrategyPresentationProvider,
+    PluginAgentStrategyResolver,
+)
+from core.workflow.nodes.agent.runtime_support import AgentRuntimeSupport
+from dify_graph.entities.base_node_data import BaseNodeData
+from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter
 from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY
 from dify_graph.enums import NodeType, SystemVariableKey
 from dify_graph.file.file_manager import file_manager
@@ -31,26 +39,18 @@ from dify_graph.model_runtime.entities.model_entities import ModelType
 from dify_graph.model_runtime.memory import PromptMessageMemory
 from dify_graph.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
 from dify_graph.nodes.base.node import Node
-from dify_graph.nodes.code.code_node import CodeNode, WorkflowCodeExecutor
+from dify_graph.nodes.code.code_node import WorkflowCodeExecutor
 from dify_graph.nodes.code.entities import CodeLanguage
 from dify_graph.nodes.code.limits import CodeNodeLimits
-from dify_graph.nodes.datasource import DatasourceNode
-from dify_graph.nodes.document_extractor import DocumentExtractorNode, UnstructuredApiConfig
-from dify_graph.nodes.http_request import HttpRequestNode, build_http_request_config
-from dify_graph.nodes.human_input.human_input_node import HumanInputNode
-from dify_graph.nodes.knowledge_index.knowledge_index_node import KnowledgeIndexNode
-from dify_graph.nodes.knowledge_retrieval.knowledge_retrieval_node import KnowledgeRetrievalNode
-from dify_graph.nodes.llm.entities import ModelConfig
+from dify_graph.nodes.document_extractor import UnstructuredApiConfig
+from dify_graph.nodes.http_request import build_http_request_config
+from dify_graph.nodes.llm.entities import LLMNodeData
 from dify_graph.nodes.llm.exc import LLMModeRequiredError, ModelNotExistError
-from dify_graph.nodes.llm.node import LLMNode
-from dify_graph.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING
-from dify_graph.nodes.parameter_extractor.parameter_extractor_node import ParameterExtractorNode
-from dify_graph.nodes.question_classifier.question_classifier_node import QuestionClassifierNode
+from dify_graph.nodes.parameter_extractor.entities import ParameterExtractorNodeData
+from dify_graph.nodes.question_classifier.entities import QuestionClassifierNodeData
 from dify_graph.nodes.template_transform.template_renderer import (
    CodeExecutorJinja2TemplateRenderer,
 )
-from dify_graph.nodes.template_transform.template_transform_node import TemplateTransformNode
-from dify_graph.nodes.tool.tool_node import ToolNode
 from dify_graph.variables.segments import StringSegment
 from extensions.ext_database import db
 from models.model import Conversation
@@ -60,6 +60,9 @@ if TYPE_CHECKING:
    from dify_graph.runtime import GraphRuntimeState


+LLMCompatibleNodeData: TypeAlias = LLMNodeData | QuestionClassifierNodeData | ParameterExtractorNodeData
+
+
 def fetch_memory(
    *,
    conversation_id: str | None,
@@ -100,10 +103,7 @@ class DefaultWorkflowCodeExecutor:
@final
 class DifyNodeFactory(NodeFactory):
    """
-    Default implementation of NodeFactory that uses the traditional node mapping.
-
-    This factory creates nodes by looking up their types in NODE_TYPE_CLASSES_MAPPING
-    and instantiating the appropriate node class.
+    Default implementation of NodeFactory that resolves node classes from the live registry.
    """

    def __init__(
@@ -146,6 +146,10 @@ class DifyNodeFactory(NodeFactory):
        )

        self._llm_credentials_provider, self._llm_model_factory = build_dify_model_access(self._dify_context.tenant_id)
+        self._agent_strategy_resolver = PluginAgentStrategyResolver()
+        self._agent_strategy_presentation_provider = PluginAgentStrategyPresentationProvider()
+        self._agent_runtime_support = AgentRuntimeSupport()
+        self._agent_message_transformer = AgentMessageTransformer()

    @staticmethod
    def _resolve_dify_context(run_context: Mapping[str, Any]) -> DifyRunContext:
@@ -157,178 +161,125 @@ class DifyNodeFactory(NodeFactory):
        return DifyRunContext.model_validate(raw_ctx)

    @override
-    def create_node(self, node_config: NodeConfigDict) -> Node:
+    def create_node(self, node_config: dict[str, Any] | NodeConfigDict) -> Node:
        """
        Create a Node instance from node configuration data using the traditional mapping.

        :param node_config: node configuration dictionary containing type and other data
        :return: initialized Node instance
-        :raises ValueError: if node type is unknown or configuration is invalid
+        :raises ValueError: if node_config fails NodeConfigDict/BaseNodeData validation
+            (including pydantic ValidationError, which subclasses ValueError),
+            if node type is unknown, or if no implementation exists for the resolved version
        """
-        # Get node_id from config
-        node_id = node_config["id"]
-
-        # Get node type from config
-        node_data = node_config["data"]
-        try:
-            node_type = NodeType(node_data["type"])
-        except ValueError:
-            raise ValueError(f"Unknown node type: {node_data['type']}")
-
-        # Get node class
-        node_mapping = NODE_TYPE_CLASSES_MAPPING.get(node_type)
-        if not node_mapping:
-            raise ValueError(f"No class mapping found for node type: {node_type}")
-
-        latest_node_class = node_mapping.get(LATEST_VERSION)
-        node_version = str(node_data.get("version", "1"))
-        matched_node_class = node_mapping.get(node_version)
-        node_class = matched_node_class or latest_node_class
-        if not node_class:
-            raise ValueError(f"No latest version class found for node type: {node_type}")
-
-        # Create node instance
-        if node_type == NodeType.CODE:
-            return CodeNode(
-                id=node_id,
-                config=node_config,
-                graph_init_params=self.graph_init_params,
-                graph_runtime_state=self.graph_runtime_state,
-                code_executor=self._code_executor,
-                code_limits=self._code_limits,
-            )
-
-        if node_type == NodeType.TEMPLATE_TRANSFORM:
-            return TemplateTransformNode(
-                id=node_id,
-                config=node_config,
-                graph_init_params=self.graph_init_params,
-                graph_runtime_state=self.graph_runtime_state,
-                template_renderer=self._template_renderer,
-                max_output_length=self._template_transform_max_output_length,
-            )
-
-        if node_type == NodeType.HTTP_REQUEST:
-            return HttpRequestNode(
-                id=node_id,
-                config=node_config,
-                graph_init_params=self.graph_init_params,
-                graph_runtime_state=self.graph_runtime_state,
-                http_request_config=self._http_request_config,
-                http_client=self._http_request_http_client,
-                tool_file_manager_factory=self._http_request_tool_file_manager_factory,
-                file_manager=self._http_request_file_manager,
-            )
-
-        if node_type == NodeType.HUMAN_INPUT:
-            return HumanInputNode(
-                id=node_id,
-                config=node_config,
-                graph_init_params=self.graph_init_params,
-                graph_runtime_state=self.graph_runtime_state,
-                form_repository=HumanInputFormRepositoryImpl(tenant_id=self._dify_context.tenant_id),
-            )
-
-        if node_type == NodeType.KNOWLEDGE_INDEX:
-            return KnowledgeIndexNode(
-                id=node_id,
-                config=node_config,
-                graph_init_params=self.graph_init_params,
-                graph_runtime_state=self.graph_runtime_state,
-                index_processor=IndexProcessor(),
-                summary_index_service=SummaryIndex(),
-            )
-
-        if node_type == NodeType.LLM:
-            model_instance = self._build_model_instance_for_llm_node(node_data)
-            memory = self._build_memory_for_llm_node(node_data=node_data, model_instance=model_instance)
-            return LLMNode(
-                id=node_id,
-                config=node_config,
-                graph_init_params=self.graph_init_params,
-                graph_runtime_state=self.graph_runtime_state,
-                credentials_provider=self._llm_credentials_provider,
-                model_factory=self._llm_model_factory,
-                model_instance=model_instance,
-                memory=memory,
-                http_client=self._http_request_http_client,
-            )
-
-        if node_type == NodeType.DATASOURCE:
-            return DatasourceNode(
-                id=node_id,
-                config=node_config,
-                graph_init_params=self.graph_init_params,
-                graph_runtime_state=self.graph_runtime_state,
-                datasource_manager=DatasourceManager,
-            )
-
-        if node_type == NodeType.KNOWLEDGE_RETRIEVAL:
-            return KnowledgeRetrievalNode(
-                id=node_id,
-                config=node_config,
-                graph_init_params=self.graph_init_params,
-                graph_runtime_state=self.graph_runtime_state,
-                rag_retrieval=self._rag_retrieval,
-            )
-
-        if node_type == NodeType.DOCUMENT_EXTRACTOR:
-            return DocumentExtractorNode(
-                id=node_id,
-                config=node_config,
-                graph_init_params=self.graph_init_params,
-                graph_runtime_state=self.graph_runtime_state,
-                unstructured_api_config=self._document_extractor_unstructured_api_config,
-                http_client=self._http_request_http_client,
-            )
-
-        if node_type == NodeType.QUESTION_CLASSIFIER:
-            model_instance = self._build_model_instance_for_llm_node(node_data)
-            memory = self._build_memory_for_llm_node(node_data=node_data, model_instance=model_instance)
-            return QuestionClassifierNode(
-                id=node_id,
-                config=node_config,
-                graph_init_params=self.graph_init_params,
-                graph_runtime_state=self.graph_runtime_state,
-                credentials_provider=self._llm_credentials_provider,
-                model_factory=self._llm_model_factory,
-                model_instance=model_instance,
-                memory=memory,
-                http_client=self._http_request_http_client,
-            )
-
-        if node_type == NodeType.PARAMETER_EXTRACTOR:
-            model_instance = self._build_model_instance_for_llm_node(node_data)
-            memory = self._build_memory_for_llm_node(node_data=node_data, model_instance=model_instance)
-            return ParameterExtractorNode(
-                id=node_id,
-                config=node_config,
-                graph_init_params=self.graph_init_params,
-                graph_runtime_state=self.graph_runtime_state,
-                credentials_provider=self._llm_credentials_provider,
-                model_factory=self._llm_model_factory,
-                model_instance=model_instance,
-                memory=memory,
-            )
-
-        if node_type == NodeType.TOOL:
-            return ToolNode(
-                id=node_id,
-                config=node_config,
-                graph_init_params=self.graph_init_params,
-                graph_runtime_state=self.graph_runtime_state,
-                tool_file_manager_factory=self._http_request_tool_file_manager_factory(),
-            )
-
+        typed_node_config = NodeConfigDictAdapter.validate_python(node_config)
+        node_id = typed_node_config["id"]
+        node_data = typed_node_config["data"]
+        node_class = self._resolve_node_class(node_type=node_data.type, node_version=str(node_data.version))
+        node_type = node_data.type
+        node_init_kwargs_factories: Mapping[NodeType, Callable[[], dict[str, object]]] = {
+            NodeType.CODE: lambda: {
+                "code_executor": self._code_executor,
+                "code_limits": self._code_limits,
+            },
+            NodeType.TEMPLATE_TRANSFORM: lambda: {
+                "template_renderer": self._template_renderer,
+                "max_output_length": self._template_transform_max_output_length,
+            },
+            NodeType.HTTP_REQUEST: lambda: {
+                "http_request_config": self._http_request_config,
+                "http_client": self._http_request_http_client,
+                "tool_file_manager_factory": self._http_request_tool_file_manager_factory,
+                "file_manager": self._http_request_file_manager,
+            },
+            NodeType.HUMAN_INPUT: lambda: {
+                "form_repository": HumanInputFormRepositoryImpl(tenant_id=self._dify_context.tenant_id),
+            },
+            NodeType.KNOWLEDGE_INDEX: lambda: {
+                "index_processor": IndexProcessor(),
+                "summary_index_service": SummaryIndex(),
+            },
+            NodeType.LLM: lambda: self._build_llm_compatible_node_init_kwargs(
+                node_class=node_class,
+                node_data=node_data,
+                include_http_client=True,
+            ),
+            NodeType.DATASOURCE: lambda: {
+                "datasource_manager": DatasourceManager,
+            },
+            NodeType.KNOWLEDGE_RETRIEVAL: lambda: {
+                "rag_retrieval": self._rag_retrieval,
+            },
+            NodeType.DOCUMENT_EXTRACTOR: lambda: {
+                "unstructured_api_config": self._document_extractor_unstructured_api_config,
+                "http_client": self._http_request_http_client,
+            },
+            NodeType.QUESTION_CLASSIFIER: lambda: self._build_llm_compatible_node_init_kwargs(
+                node_class=node_class,
+                node_data=node_data,
+                include_http_client=True,
+            ),
+            NodeType.PARAMETER_EXTRACTOR: lambda: self._build_llm_compatible_node_init_kwargs(
+                node_class=node_class,
+                node_data=node_data,
+                include_http_client=False,
+            ),
+            NodeType.TOOL: lambda: {
+                "tool_file_manager_factory": self._http_request_tool_file_manager_factory(),
+            },
+            NodeType.AGENT: lambda: {
+                "strategy_resolver": self._agent_strategy_resolver,
+                "presentation_provider": self._agent_strategy_presentation_provider,
+                "runtime_support": self._agent_runtime_support,
+                "message_transformer": self._agent_message_transformer,
+            },
+        }
+        node_init_kwargs = node_init_kwargs_factories.get(node_type, lambda: {})()
        return node_class(
            id=node_id,
-            config=node_config,
+            config=typed_node_config,
            graph_init_params=self.graph_init_params,
            graph_runtime_state=self.graph_runtime_state,
+            **node_init_kwargs,
        )

-    def _build_model_instance_for_llm_node(self, node_data: Mapping[str, Any]) -> ModelInstance:
-        node_data_model = ModelConfig.model_validate(node_data["model"])
+    @staticmethod
+    def _validate_resolved_node_data(node_class: type[Node], node_data: BaseNodeData) -> BaseNodeData:
+        """
+        Re-validate the permissive graph payload with the concrete NodeData model declared by the resolved node class.
+        """
+        return node_class.validate_node_data(node_data)
+
+    @staticmethod
+    def _resolve_node_class(*, node_type: NodeType, node_version: str) -> type[Node]:
+        return resolve_workflow_node_class(node_type=node_type, node_version=node_version)
+
+    def _build_llm_compatible_node_init_kwargs(
+        self,
+        *,
+        node_class: type[Node],
+        node_data: BaseNodeData,
+        include_http_client: bool,
+    ) -> dict[str, object]:
+        validated_node_data = cast(
+            LLMCompatibleNodeData,
+            self._validate_resolved_node_data(node_class=node_class, node_data=node_data),
+        )
+        model_instance = self._build_model_instance_for_llm_node(validated_node_data)
+        node_init_kwargs: dict[str, object] = {
+            "credentials_provider": self._llm_credentials_provider,
+            "model_factory": self._llm_model_factory,
+            "model_instance": model_instance,
+            "memory": self._build_memory_for_llm_node(
+                node_data=validated_node_data,
+                model_instance=model_instance,
+            ),
+        }
+        if include_http_client:
+            node_init_kwargs["http_client"] = self._http_request_http_client
+        return node_init_kwargs
+
+    def _build_model_instance_for_llm_node(self, node_data: LLMCompatibleNodeData) -> ModelInstance:
+        node_data_model = node_data.model
        if not node_data_model.mode:
            raise LLMModeRequiredError("LLM mode is required.")

@@ -364,14 +315,12 @@ class DifyNodeFactory(NodeFactory):
    def _build_memory_for_llm_node(
        self,
        *,
-        node_data: Mapping[str, Any],
+        node_data: LLMCompatibleNodeData,
        model_instance: ModelInstance,
    ) -> PromptMessageMemory | None:
-        raw_memory_config = node_data.get("memory")
-        if raw_memory_config is None:
+        if node_data.memory is None:
            return None

-        node_memory = MemoryConfig.model_validate(raw_memory_config)
        conversation_id_variable = self.graph_runtime_state.variable_pool.get(
            ["sys", SystemVariableKey.CONVERSATION_ID]
        )
@@ -381,6 +330,6 @@ class DifyNodeFactory(NodeFactory):
        return fetch_memory(
            conversation_id=conversation_id,
            app_id=self._dify_context.app_id,
-            node_data_memory=node_memory,
+            node_data_memory=node_data.memory,
            model_instance=model_instance,
        )
--- a/api/core/workflow/node_resolution.py
+++ b/api/core/workflow/node_resolution.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from importlib import import_module
+
+from dify_graph.enums import NodeType
+from dify_graph.nodes.base.node import Node
+from dify_graph.nodes.node_mapping import LATEST_VERSION, get_node_type_classes_mapping
+
+_WORKFLOW_NODE_MODULES = ("core.workflow.nodes.agent",)
+_workflow_nodes_registered = False
+
+
+def ensure_workflow_nodes_registered() -> None:
+    """Import workflow-local node modules so they can register with `Node.__init_subclass__`."""
+    global _workflow_nodes_registered
+
+    if _workflow_nodes_registered:
+        return
+
+    for module_name in _WORKFLOW_NODE_MODULES:
+        import_module(module_name)
+
+    _workflow_nodes_registered = True
+
+
+def get_workflow_node_type_classes_mapping() -> Mapping[NodeType, Mapping[str, type[Node]]]:
+    ensure_workflow_nodes_registered()
+    return get_node_type_classes_mapping()
+
+
+def resolve_workflow_node_class(*, node_type: NodeType, node_version: str) -> type[Node]:
+    node_mapping = get_workflow_node_type_classes_mapping().get(node_type)
+    if not node_mapping:
+        raise ValueError(f"No class mapping found for node type: {node_type}")
+
+    latest_node_class = node_mapping.get(LATEST_VERSION)
+    matched_node_class = node_mapping.get(node_version)
+    node_class = matched_node_class or latest_node_class
+    if not node_class:
+        raise ValueError(f"No latest version class found for node type: {node_type}")
+    return node_class
--- a/api/tests/unit_tests/core/model_runtime/base/init.py
+++ b/api/tests/unit_tests/core/model_runtime/base/init.py
--- a/api/core/workflow/nodes/agent/init.py
+++ b/api/core/workflow/nodes/agent/init.py
@@ -0,0 +1,4 @@
+from .agent_node import AgentNode
+from .entities import AgentNodeData
+
+__all__ = ["AgentNode", "AgentNodeData"]
--- a/api/core/workflow/nodes/agent/agent_node.py
+++ b/api/core/workflow/nodes/agent/agent_node.py
@@ -0,0 +1,188 @@
+from __future__ import annotations
+
+from collections.abc import Generator, Mapping, Sequence
+from typing import TYPE_CHECKING, Any
+
+from dify_graph.entities.graph_config import NodeConfigDict
+from dify_graph.enums import NodeType, SystemVariableKey, WorkflowNodeExecutionStatus
+from dify_graph.node_events import NodeEventBase, NodeRunResult, StreamCompletedEvent
+from dify_graph.nodes.base.node import Node
+from dify_graph.nodes.base.variable_template_parser import VariableTemplateParser
+
+from .entities import AgentNodeData
+from .exceptions import (
+    AgentInvocationError,
+    AgentMessageTransformError,
+)
+from .message_transformer import AgentMessageTransformer
+from .runtime_support import AgentRuntimeSupport
+from .strategy_protocols import AgentStrategyPresentationProvider, AgentStrategyResolver
+
+if TYPE_CHECKING:
+    from dify_graph.entities import GraphInitParams
+    from dify_graph.runtime import GraphRuntimeState
+
+
+class AgentNode(Node[AgentNodeData]):
+    node_type = NodeType.AGENT
+
+    _strategy_resolver: AgentStrategyResolver
+    _presentation_provider: AgentStrategyPresentationProvider
+    _runtime_support: AgentRuntimeSupport
+    _message_transformer: AgentMessageTransformer
+
+    def __init__(
+        self,
+        id: str,
+        config: NodeConfigDict,
+        graph_init_params: GraphInitParams,
+        graph_runtime_state: GraphRuntimeState,
+        *,
+        strategy_resolver: AgentStrategyResolver,
+        presentation_provider: AgentStrategyPresentationProvider,
+        runtime_support: AgentRuntimeSupport,
+        message_transformer: AgentMessageTransformer,
+    ) -> None:
+        super().__init__(
+            id=id,
+            config=config,
+            graph_init_params=graph_init_params,
+            graph_runtime_state=graph_runtime_state,
+        )
+        self._strategy_resolver = strategy_resolver
+        self._presentation_provider = presentation_provider
+        self._runtime_support = runtime_support
+        self._message_transformer = message_transformer
+
+    @classmethod
+    def version(cls) -> str:
+        return "1"
+
+    def populate_start_event(self, event) -> None:
+        dify_ctx = self.require_dify_context()
+        event.extras["agent_strategy"] = {
+            "name": self.node_data.agent_strategy_name,
+            "icon": self._presentation_provider.get_icon(
+                tenant_id=dify_ctx.tenant_id,
+                agent_strategy_provider_name=self.node_data.agent_strategy_provider_name,
+            ),
+        }
+
+    def _run(self) -> Generator[NodeEventBase, None, None]:
+        from core.plugin.impl.exc import PluginDaemonClientSideError
+
+        dify_ctx = self.require_dify_context()
+
+        try:
+            strategy = self._strategy_resolver.resolve(
+                tenant_id=dify_ctx.tenant_id,
+                agent_strategy_provider_name=self.node_data.agent_strategy_provider_name,
+                agent_strategy_name=self.node_data.agent_strategy_name,
+            )
+        except Exception as e:
+            yield StreamCompletedEvent(
+                node_run_result=NodeRunResult(
+                    status=WorkflowNodeExecutionStatus.FAILED,
+                    inputs={},
+                    error=f"Failed to get agent strategy: {str(e)}",
+                ),
+            )
+            return
+
+        agent_parameters = strategy.get_parameters()
+
+        parameters = self._runtime_support.build_parameters(
+            agent_parameters=agent_parameters,
+            variable_pool=self.graph_runtime_state.variable_pool,
+            node_data=self.node_data,
+            strategy=strategy,
+            tenant_id=dify_ctx.tenant_id,
+            app_id=dify_ctx.app_id,
+            invoke_from=dify_ctx.invoke_from,
+        )
+        parameters_for_log = self._runtime_support.build_parameters(
+            agent_parameters=agent_parameters,
+            variable_pool=self.graph_runtime_state.variable_pool,
+            node_data=self.node_data,
+            strategy=strategy,
+            tenant_id=dify_ctx.tenant_id,
+            app_id=dify_ctx.app_id,
+            invoke_from=dify_ctx.invoke_from,
+            for_log=True,
+        )
+        credentials = self._runtime_support.build_credentials(parameters=parameters)
+
+        conversation_id = self.graph_runtime_state.variable_pool.get(["sys", SystemVariableKey.CONVERSATION_ID])
+
+        try:
+            message_stream = strategy.invoke(
+                params=parameters,
+                user_id=dify_ctx.user_id,
+                app_id=dify_ctx.app_id,
+                conversation_id=conversation_id.text if conversation_id else None,
+                credentials=credentials,
+            )
+        except Exception as e:
+            error = AgentInvocationError(f"Failed to invoke agent: {str(e)}", original_error=e)
+            yield StreamCompletedEvent(
+                node_run_result=NodeRunResult(
+                    status=WorkflowNodeExecutionStatus.FAILED,
+                    inputs=parameters_for_log,
+                    error=str(error),
+                )
+            )
+            return
+
+        try:
+            yield from self._message_transformer.transform(
+                messages=message_stream,
+                tool_info={
+                    "icon": self._presentation_provider.get_icon(
+                        tenant_id=dify_ctx.tenant_id,
+                        agent_strategy_provider_name=self.node_data.agent_strategy_provider_name,
+                    ),
+                    "agent_strategy": self.node_data.agent_strategy_name,
+                },
+                parameters_for_log=parameters_for_log,
+                user_id=dify_ctx.user_id,
+                tenant_id=dify_ctx.tenant_id,
+                node_type=self.node_type,
+                node_id=self._node_id,
+                node_execution_id=self.id,
+            )
+        except PluginDaemonClientSideError as e:
+            transform_error = AgentMessageTransformError(
+                f"Failed to transform agent message: {str(e)}", original_error=e
+            )
+            yield StreamCompletedEvent(
+                node_run_result=NodeRunResult(
+                    status=WorkflowNodeExecutionStatus.FAILED,
+                    inputs=parameters_for_log,
+                    error=str(transform_error),
+                )
+            )
+
+    @classmethod
+    def _extract_variable_selector_to_variable_mapping(
+        cls,
+        *,
+        graph_config: Mapping[str, Any],
+        node_id: str,
+        node_data: AgentNodeData,
+    ) -> Mapping[str, Sequence[str]]:
+        _ = graph_config  # Explicitly mark as unused
+        result: dict[str, Any] = {}
+        typed_node_data = node_data
+        for parameter_name in typed_node_data.agent_parameters:
+            input = typed_node_data.agent_parameters[parameter_name]
+            match input.type:
+                case "mixed" | "constant":
+                    selectors = VariableTemplateParser(str(input.value)).extract_variable_selectors()
+                    for selector in selectors:
+                        result[selector.variable] = selector.value_selector
+                case "variable":
+                    result[parameter_name] = input.value
+
+        result = {node_id + "." + key: value for key, value in result.items()}
+
+        return result
--- a/api/core/workflow/nodes/agent/entities.py
+++ b/api/core/workflow/nodes/agent/entities.py
@@ -5,13 +5,15 @@ from pydantic import BaseModel

 from core.prompt.entities.advanced_prompt_entities import MemoryConfig
 from core.tools.entities.tool_entities import ToolSelector
-from dify_graph.nodes.base.entities import BaseNodeData
+from dify_graph.entities.base_node_data import BaseNodeData
+from dify_graph.enums import NodeType


 class AgentNodeData(BaseNodeData):
-    agent_strategy_provider_name: str  # redundancy
+    type: NodeType = NodeType.AGENT
+    agent_strategy_provider_name: str
    agent_strategy_name: str
-    agent_strategy_label: str  # redundancy
+    agent_strategy_label: str
    memory: MemoryConfig | None = None
    # The version of the tool parameter.
    # If this value is None, it indicates this is a previous version
--- a/api/core/workflow/nodes/agent/exceptions.py
+++ b/api/core/workflow/nodes/agent/exceptions.py
@@ -119,14 +119,3 @@ class AgentVariableTypeError(AgentNodeError):
        self.expected_type = expected_type
        self.actual_type = actual_type
        super().__init__(message)
-
-
-class AgentMaxIterationError(AgentNodeError):
-    """Exception raised when the agent exceeds the maximum iteration limit."""
-
-    def __init__(self, max_iteration: int):
-        self.max_iteration = max_iteration
-        super().__init__(
-            f"Agent exceeded the maximum iteration limit of {max_iteration}. "
-            f"The agent was unable to complete the task within the allowed number of iterations."
-        )
--- a/api/core/workflow/nodes/agent/message_transformer.py
+++ b/api/core/workflow/nodes/agent/message_transformer.py
@@ -0,0 +1,292 @@
+from __future__ import annotations
+
+from collections.abc import Generator, Mapping
+from typing import Any, cast
+
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.utils.message_transformer import ToolFileMessageTransformer
+from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus
+from dify_graph.file import File, FileTransferMethod
+from dify_graph.model_runtime.entities.llm_entities import LLMUsage, LLMUsageMetadata
+from dify_graph.model_runtime.utils.encoders import jsonable_encoder
+from dify_graph.node_events import (
+    AgentLogEvent,
+    NodeEventBase,
+    NodeRunResult,
+    StreamChunkEvent,
+    StreamCompletedEvent,
+)
+from dify_graph.variables.segments import ArrayFileSegment
+from extensions.ext_database import db
+from factories import file_factory
+from models import ToolFile
+from services.tools.builtin_tools_manage_service import BuiltinToolManageService
+
+from .exceptions import AgentNodeError, AgentVariableTypeError, ToolFileNotFoundError
+
+
+class AgentMessageTransformer:
+    def transform(
+        self,
+        *,
+        messages: Generator[ToolInvokeMessage, None, None],
+        tool_info: Mapping[str, Any],
+        parameters_for_log: dict[str, Any],
+        user_id: str,
+        tenant_id: str,
+        node_type: NodeType,
+        node_id: str,
+        node_execution_id: str,
+    ) -> Generator[NodeEventBase, None, None]:
+        from core.plugin.impl.plugin import PluginInstaller
+
+        message_stream = ToolFileMessageTransformer.transform_tool_invoke_messages(
+            messages=messages,
+            user_id=user_id,
+            tenant_id=tenant_id,
+            conversation_id=None,
+        )
+
+        text = ""
+        files: list[File] = []
+        json_list: list[dict | list] = []
+
+        agent_logs: list[AgentLogEvent] = []
+        agent_execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] = {}
+        llm_usage = LLMUsage.empty_usage()
+        variables: dict[str, Any] = {}
+
+        for message in message_stream:
+            if message.type in {
+                ToolInvokeMessage.MessageType.IMAGE_LINK,
+                ToolInvokeMessage.MessageType.BINARY_LINK,
+                ToolInvokeMessage.MessageType.IMAGE,
+            }:
+                assert isinstance(message.message, ToolInvokeMessage.TextMessage)
+
+                url = message.message.text
+                if message.meta:
+                    transfer_method = message.meta.get("transfer_method", FileTransferMethod.TOOL_FILE)
+                else:
+                    transfer_method = FileTransferMethod.TOOL_FILE
+
+                tool_file_id = str(url).split("/")[-1].split(".")[0]
+
+                with Session(db.engine) as session:
+                    stmt = select(ToolFile).where(ToolFile.id == tool_file_id)
+                    tool_file = session.scalar(stmt)
+                    if tool_file is None:
+                        raise ToolFileNotFoundError(tool_file_id)
+
+                mapping = {
+                    "tool_file_id": tool_file_id,
+                    "type": file_factory.get_file_type_by_mime_type(tool_file.mimetype),
+                    "transfer_method": transfer_method,
+                    "url": url,
+                }
+                file = file_factory.build_from_mapping(
+                    mapping=mapping,
+                    tenant_id=tenant_id,
+                )
+                files.append(file)
+            elif message.type == ToolInvokeMessage.MessageType.BLOB:
+                assert isinstance(message.message, ToolInvokeMessage.TextMessage)
+                assert message.meta
+
+                tool_file_id = message.message.text.split("/")[-1].split(".")[0]
+                with Session(db.engine) as session:
+                    stmt = select(ToolFile).where(ToolFile.id == tool_file_id)
+                    tool_file = session.scalar(stmt)
+                    if tool_file is None:
+                        raise ToolFileNotFoundError(tool_file_id)
+
+                mapping = {
+                    "tool_file_id": tool_file_id,
+                    "transfer_method": FileTransferMethod.TOOL_FILE,
+                }
+                files.append(
+                    file_factory.build_from_mapping(
+                        mapping=mapping,
+                        tenant_id=tenant_id,
+                    )
+                )
+            elif message.type == ToolInvokeMessage.MessageType.TEXT:
+                assert isinstance(message.message, ToolInvokeMessage.TextMessage)
+                text += message.message.text
+                yield StreamChunkEvent(
+                    selector=[node_id, "text"],
+                    chunk=message.message.text,
+                    is_final=False,
+                )
+            elif message.type == ToolInvokeMessage.MessageType.JSON:
+                assert isinstance(message.message, ToolInvokeMessage.JsonMessage)
+                if node_type == NodeType.AGENT:
+                    if isinstance(message.message.json_object, dict):
+                        msg_metadata: dict[str, Any] = message.message.json_object.pop("execution_metadata", {})
+                        llm_usage = LLMUsage.from_metadata(cast(LLMUsageMetadata, msg_metadata))
+                        agent_execution_metadata = {
+                            WorkflowNodeExecutionMetadataKey(key): value
+                            for key, value in msg_metadata.items()
+                            if key in WorkflowNodeExecutionMetadataKey.__members__.values()
+                        }
+                    else:
+                        llm_usage = LLMUsage.empty_usage()
+                        agent_execution_metadata = {}
+                if message.message.json_object:
+                    json_list.append(message.message.json_object)
+            elif message.type == ToolInvokeMessage.MessageType.LINK:
+                assert isinstance(message.message, ToolInvokeMessage.TextMessage)
+                stream_text = f"Link: {message.message.text}\n"
+                text += stream_text
+                yield StreamChunkEvent(
+                    selector=[node_id, "text"],
+                    chunk=stream_text,
+                    is_final=False,
+                )
+            elif message.type == ToolInvokeMessage.MessageType.VARIABLE:
+                assert isinstance(message.message, ToolInvokeMessage.VariableMessage)
+                variable_name = message.message.variable_name
+                variable_value = message.message.variable_value
+                if message.message.stream:
+                    if not isinstance(variable_value, str):
+                        raise AgentVariableTypeError(
+                            "When 'stream' is True, 'variable_value' must be a string.",
+                            variable_name=variable_name,
+                            expected_type="str",
+                            actual_type=type(variable_value).__name__,
+                        )
+                    if variable_name not in variables:
+                        variables[variable_name] = ""
+                    variables[variable_name] += variable_value
+
+                    yield StreamChunkEvent(
+                        selector=[node_id, variable_name],
+                        chunk=variable_value,
+                        is_final=False,
+                    )
+                else:
+                    variables[variable_name] = variable_value
+            elif message.type == ToolInvokeMessage.MessageType.FILE:
+                assert message.meta is not None
+                assert isinstance(message.meta, dict)
+                if "file" not in message.meta:
+                    raise AgentNodeError("File message is missing 'file' key in meta")
+
+                if not isinstance(message.meta["file"], File):
+                    raise AgentNodeError(f"Expected File object but got {type(message.meta['file']).__name__}")
+                files.append(message.meta["file"])
+            elif message.type == ToolInvokeMessage.MessageType.LOG:
+                assert isinstance(message.message, ToolInvokeMessage.LogMessage)
+                if message.message.metadata:
+                    icon = tool_info.get("icon", "")
+                    dict_metadata = dict(message.message.metadata)
+                    if dict_metadata.get("provider"):
+                        manager = PluginInstaller()
+                        plugins = manager.list_plugins(tenant_id)
+                        try:
+                            current_plugin = next(
+                                plugin
+                                for plugin in plugins
+                                if f"{plugin.plugin_id}/{plugin.name}" == dict_metadata["provider"]
+                            )
+                            icon = current_plugin.declaration.icon
+                        except StopIteration:
+                            pass
+                        icon_dark = None
+                        try:
+                            builtin_tool = next(
+                                provider
+                                for provider in BuiltinToolManageService.list_builtin_tools(
+                                    user_id,
+                                    tenant_id,
+                                )
+                                if provider.name == dict_metadata["provider"]
+                            )
+                            icon = builtin_tool.icon
+                            icon_dark = builtin_tool.icon_dark
+                        except StopIteration:
+                            pass
+
+                        dict_metadata["icon"] = icon
+                        dict_metadata["icon_dark"] = icon_dark
+                        message.message.metadata = dict_metadata
+                agent_log = AgentLogEvent(
+                    message_id=message.message.id,
+                    node_execution_id=node_execution_id,
+                    parent_id=message.message.parent_id,
+                    error=message.message.error,
+                    status=message.message.status.value,
+                    data=message.message.data,
+                    label=message.message.label,
+                    metadata=message.message.metadata,
+                    node_id=node_id,
+                )
+
+                for log in agent_logs:
+                    if log.message_id == agent_log.message_id:
+                        log.data = agent_log.data
+                        log.status = agent_log.status
+                        log.error = agent_log.error
+                        log.label = agent_log.label
+                        log.metadata = agent_log.metadata
+                        break
+                else:
+                    agent_logs.append(agent_log)
+
+                yield agent_log
+
+        json_output: list[dict[str, Any] | list[Any]] = []
+        if agent_logs:
+            for log in agent_logs:
+                json_output.append(
+                    {
+                        "id": log.message_id,
+                        "parent_id": log.parent_id,
+                        "error": log.error,
+                        "status": log.status,
+                        "data": log.data,
+                        "label": log.label,
+                        "metadata": log.metadata,
+                        "node_id": log.node_id,
+                    }
+                )
+        if json_list:
+            json_output.extend(json_list)
+        else:
+            json_output.append({"data": []})
+
+        yield StreamChunkEvent(
+            selector=[node_id, "text"],
+            chunk="",
+            is_final=True,
+        )
+
+        for var_name in variables:
+            yield StreamChunkEvent(
+                selector=[node_id, var_name],
+                chunk="",
+                is_final=True,
+            )
+
+        yield StreamCompletedEvent(
+            node_run_result=NodeRunResult(
+                status=WorkflowNodeExecutionStatus.SUCCEEDED,
+                outputs={
+                    "text": text,
+                    "usage": jsonable_encoder(llm_usage),
+                    "files": ArrayFileSegment(value=files),
+                    "json": json_output,
+                    **variables,
+                },
+                metadata={
+                    **agent_execution_metadata,
+                    WorkflowNodeExecutionMetadataKey.TOOL_INFO: tool_info,
+                    WorkflowNodeExecutionMetadataKey.AGENT_LOG: agent_logs,
+                },
+                inputs=parameters_for_log,
+                llm_usage=llm_usage,
+            )
+        )
--- a/api/core/workflow/nodes/agent/plugin_strategy_adapter.py
+++ b/api/core/workflow/nodes/agent/plugin_strategy_adapter.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+
+from factories.agent_factory import get_plugin_agent_strategy
+
+from .strategy_protocols import AgentStrategyPresentationProvider, AgentStrategyResolver, ResolvedAgentStrategy
+
+
+class PluginAgentStrategyResolver(AgentStrategyResolver):
+    def resolve(
+        self,
+        *,
+        tenant_id: str,
+        agent_strategy_provider_name: str,
+        agent_strategy_name: str,
+    ) -> ResolvedAgentStrategy:
+        return get_plugin_agent_strategy(
+            tenant_id=tenant_id,
+            agent_strategy_provider_name=agent_strategy_provider_name,
+            agent_strategy_name=agent_strategy_name,
+        )
+
+
+class PluginAgentStrategyPresentationProvider(AgentStrategyPresentationProvider):
+    def get_icon(self, *, tenant_id: str, agent_strategy_provider_name: str) -> str | None:
+        from core.plugin.impl.plugin import PluginInstaller
+
+        manager = PluginInstaller()
+        try:
+            plugins = manager.list_plugins(tenant_id)
+        except Exception:
+            return None
+
+        try:
+            current_plugin = next(
+                plugin for plugin in plugins if f"{plugin.plugin_id}/{plugin.name}" == agent_strategy_provider_name
+            )
+        except StopIteration:
+            return None
+
+        return current_plugin.declaration.icon
--- a/api/core/workflow/nodes/agent/runtime_support.py
+++ b/api/core/workflow/nodes/agent/runtime_support.py
@@ -0,0 +1,276 @@
+from __future__ import annotations
+
+import json
+from collections.abc import Sequence
+from typing import Any, cast
+
+from packaging.version import Version
+from pydantic import ValidationError
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
+from core.agent.entities import AgentToolEntity
+from core.agent.plugin_entities import AgentStrategyParameter
+from core.memory.token_buffer_memory import TokenBufferMemory
+from core.model_manager import ModelInstance, ModelManager
+from core.plugin.entities.request import InvokeCredentials
+from core.provider_manager import ProviderManager
+from core.tools.entities.tool_entities import ToolIdentity, ToolParameter, ToolProviderType
+from core.tools.tool_manager import ToolManager
+from dify_graph.enums import SystemVariableKey
+from dify_graph.model_runtime.entities.model_entities import AIModelEntity, ModelType
+from dify_graph.runtime import VariablePool
+from dify_graph.variables.segments import StringSegment
+from extensions.ext_database import db
+from models.model import Conversation
+
+from .entities import AgentNodeData, AgentOldVersionModelFeatures, ParamsAutoGenerated
+from .exceptions import AgentInputTypeError, AgentVariableNotFoundError
+from .strategy_protocols import ResolvedAgentStrategy
+
+
+class AgentRuntimeSupport:
+    def build_parameters(
+        self,
+        *,
+        agent_parameters: Sequence[AgentStrategyParameter],
+        variable_pool: VariablePool,
+        node_data: AgentNodeData,
+        strategy: ResolvedAgentStrategy,
+        tenant_id: str,
+        app_id: str,
+        invoke_from: Any,
+        for_log: bool = False,
+    ) -> dict[str, Any]:
+        agent_parameters_dictionary = {parameter.name: parameter for parameter in agent_parameters}
+
+        result: dict[str, Any] = {}
+        for parameter_name in node_data.agent_parameters:
+            parameter = agent_parameters_dictionary.get(parameter_name)
+            if not parameter:
+                result[parameter_name] = None
+                continue
+
+            agent_input = node_data.agent_parameters[parameter_name]
+            match agent_input.type:
+                case "variable":
+                    variable = variable_pool.get(agent_input.value)  # type: ignore[arg-type]
+                    if variable is None:
+                        raise AgentVariableNotFoundError(str(agent_input.value))
+                    parameter_value = variable.value
+                case "mixed" | "constant":
+                    try:
+                        if not isinstance(agent_input.value, str):
+                            parameter_value = json.dumps(agent_input.value, ensure_ascii=False)
+                        else:
+                            parameter_value = str(agent_input.value)
+                    except TypeError:
+                        parameter_value = str(agent_input.value)
+
+                    segment_group = variable_pool.convert_template(parameter_value)
+                    parameter_value = segment_group.log if for_log else segment_group.text
+                    try:
+                        if not isinstance(agent_input.value, str):
+                            parameter_value = json.loads(parameter_value)
+                    except json.JSONDecodeError:
+                        parameter_value = parameter_value
+                case _:
+                    raise AgentInputTypeError(agent_input.type)
+
+            value = parameter_value
+            if parameter.type == "array[tools]":
+                value = cast(list[dict[str, Any]], value)
+                value = [tool for tool in value if tool.get("enabled", False)]
+                value = self._filter_mcp_type_tool(strategy, value)
+                for tool in value:
+                    if "schemas" in tool:
+                        tool.pop("schemas")
+                    parameters = tool.get("parameters", {})
+                    if all(isinstance(v, dict) for _, v in parameters.items()):
+                        params = {}
+                        for key, param in parameters.items():
+                            if param.get("auto", ParamsAutoGenerated.OPEN) in (
+                                ParamsAutoGenerated.CLOSE,
+                                0,
+                            ):
+                                value_param = param.get("value", {})
+                                if value_param and value_param.get("type", "") == "variable":
+                                    variable_selector = value_param.get("value")
+                                    if not variable_selector:
+                                        raise ValueError("Variable selector is missing for a variable-type parameter.")
+
+                                    variable = variable_pool.get(variable_selector)
+                                    if variable is None:
+                                        raise AgentVariableNotFoundError(str(variable_selector))
+
+                                    params[key] = variable.value
+                                else:
+                                    params[key] = value_param.get("value", "") if value_param is not None else None
+                            else:
+                                params[key] = None
+                        parameters = params
+                    tool["settings"] = {k: v.get("value", None) for k, v in tool.get("settings", {}).items()}
+                    tool["parameters"] = parameters
+
+            if not for_log:
+                if parameter.type == "array[tools]":
+                    value = cast(list[dict[str, Any]], value)
+                    tool_value = []
+                    for tool in value:
+                        provider_type = ToolProviderType(tool.get("type", ToolProviderType.BUILT_IN))
+                        setting_params = tool.get("settings", {})
+                        parameters = tool.get("parameters", {})
+                        manual_input_params = [key for key, value in parameters.items() if value is not None]
+
+                        parameters = {**parameters, **setting_params}
+                        entity = AgentToolEntity(
+                            provider_id=tool.get("provider_name", ""),
+                            provider_type=provider_type,
+                            tool_name=tool.get("tool_name", ""),
+                            tool_parameters=parameters,
+                            plugin_unique_identifier=tool.get("plugin_unique_identifier", None),
+                            credential_id=tool.get("credential_id", None),
+                        )
+
+                        extra = tool.get("extra", {})
+
+                        runtime_variable_pool: VariablePool | None = None
+                        if node_data.version != "1" or node_data.tool_node_version is not None:
+                            runtime_variable_pool = variable_pool
+                        tool_runtime = ToolManager.get_agent_tool_runtime(
+                            tenant_id,
+                            app_id,
+                            entity,
+                            invoke_from,
+                            runtime_variable_pool,
+                        )
+                        if tool_runtime.entity.description:
+                            tool_runtime.entity.description.llm = (
+                                extra.get("description", "") or tool_runtime.entity.description.llm
+                            )
+                        for tool_runtime_params in tool_runtime.entity.parameters:
+                            tool_runtime_params.form = (
+                                ToolParameter.ToolParameterForm.FORM
+                                if tool_runtime_params.name in manual_input_params
+                                else tool_runtime_params.form
+                            )
+                        manual_input_value = {}
+                        if tool_runtime.entity.parameters:
+                            manual_input_value = {
+                                key: value for key, value in parameters.items() if key in manual_input_params
+                            }
+                        runtime_parameters = {
+                            **tool_runtime.runtime.runtime_parameters,
+                            **manual_input_value,
+                        }
+                        tool_value.append(
+                            {
+                                **tool_runtime.entity.model_dump(mode="json"),
+                                "runtime_parameters": runtime_parameters,
+                                "credential_id": tool.get("credential_id", None),
+                                "provider_type": provider_type.value,
+                            }
+                        )
+                    value = tool_value
+                if parameter.type == AgentStrategyParameter.AgentStrategyParameterType.MODEL_SELECTOR:
+                    value = cast(dict[str, Any], value)
+                    model_instance, model_schema = self.fetch_model(tenant_id=tenant_id, value=value)
+                    history_prompt_messages = []
+                    if node_data.memory:
+                        memory = self.fetch_memory(
+                            variable_pool=variable_pool,
+                            app_id=app_id,
+                            model_instance=model_instance,
+                        )
+                        if memory:
+                            prompt_messages = memory.get_history_prompt_messages(
+                                message_limit=node_data.memory.window.size or None
+                            )
+                            history_prompt_messages = [
+                                prompt_message.model_dump(mode="json") for prompt_message in prompt_messages
+                            ]
+                    value["history_prompt_messages"] = history_prompt_messages
+                    if model_schema:
+                        model_schema = self._remove_unsupported_model_features_for_old_version(model_schema)
+                        value["entity"] = model_schema.model_dump(mode="json")
+                    else:
+                        value["entity"] = None
+            result[parameter_name] = value
+
+        return result
+
+    def build_credentials(self, *, parameters: dict[str, Any]) -> InvokeCredentials:
+        credentials = InvokeCredentials()
+        credentials.tool_credentials = {}
+        for tool in parameters.get("tools", []):
+            if not tool.get("credential_id"):
+                continue
+            try:
+                identity = ToolIdentity.model_validate(tool.get("identity", {}))
+            except ValidationError:
+                continue
+            credentials.tool_credentials[identity.provider] = tool.get("credential_id", None)
+        return credentials
+
+    def fetch_memory(
+        self,
+        *,
+        variable_pool: VariablePool,
+        app_id: str,
+        model_instance: ModelInstance,
+    ) -> TokenBufferMemory | None:
+        conversation_id_variable = variable_pool.get(["sys", SystemVariableKey.CONVERSATION_ID])
+        if not isinstance(conversation_id_variable, StringSegment):
+            return None
+        conversation_id = conversation_id_variable.value
+
+        with Session(db.engine, expire_on_commit=False) as session:
+            stmt = select(Conversation).where(Conversation.app_id == app_id, Conversation.id == conversation_id)
+            conversation = session.scalar(stmt)
+            if not conversation:
+                return None
+
+        return TokenBufferMemory(conversation=conversation, model_instance=model_instance)
+
+    def fetch_model(self, *, tenant_id: str, value: dict[str, Any]) -> tuple[ModelInstance, AIModelEntity | None]:
+        provider_manager = ProviderManager()
+        provider_model_bundle = provider_manager.get_provider_model_bundle(
+            tenant_id=tenant_id,
+            provider=value.get("provider", ""),
+            model_type=ModelType.LLM,
+        )
+        model_name = value.get("model", "")
+        model_credentials = provider_model_bundle.configuration.get_current_credentials(
+            model_type=ModelType.LLM,
+            model=model_name,
+        )
+        provider_name = provider_model_bundle.configuration.provider.provider
+        model_type_instance = provider_model_bundle.model_type_instance
+        model_instance = ModelManager().get_model_instance(
+            tenant_id=tenant_id,
+            provider=provider_name,
+            model_type=ModelType(value.get("model_type", "")),
+            model=model_name,
+        )
+        model_schema = model_type_instance.get_model_schema(model_name, model_credentials)
+        return model_instance, model_schema
+
+    @staticmethod
+    def _remove_unsupported_model_features_for_old_version(model_schema: AIModelEntity) -> AIModelEntity:
+        if model_schema.features:
+            for feature in model_schema.features[:]:
+                try:
+                    AgentOldVersionModelFeatures(feature.value)
+                except ValueError:
+                    model_schema.features.remove(feature)
+        return model_schema
+
+    @staticmethod
+    def _filter_mcp_type_tool(
+        strategy: ResolvedAgentStrategy,
+        tools: list[dict[str, Any]],
+    ) -> list[dict[str, Any]]:
+        meta_version = strategy.meta_version
+        if meta_version and Version(meta_version) > Version("0.0.1"):
+            return tools
+        return [tool for tool in tools if tool.get("type") != ToolProviderType.MCP]
--- a/api/core/workflow/nodes/agent/strategy_protocols.py
+++ b/api/core/workflow/nodes/agent/strategy_protocols.py
@@ -0,0 +1,39 @@
+from __future__ import annotations
+
+from collections.abc import Generator, Sequence
+from typing import Any, Protocol
+
+from core.agent.plugin_entities import AgentStrategyParameter
+from core.plugin.entities.request import InvokeCredentials
+from core.tools.entities.tool_entities import ToolInvokeMessage
+
+
+class ResolvedAgentStrategy(Protocol):
+    meta_version: str | None
+
+    def get_parameters(self) -> Sequence[AgentStrategyParameter]: ...
+
+    def invoke(
+        self,
+        *,
+        params: dict[str, Any],
+        user_id: str,
+        conversation_id: str | None = None,
+        app_id: str | None = None,
+        message_id: str | None = None,
+        credentials: InvokeCredentials | None = None,
+    ) -> Generator[ToolInvokeMessage, None, None]: ...
+
+
+class AgentStrategyResolver(Protocol):
+    def resolve(
+        self,
+        *,
+        tenant_id: str,
+        agent_strategy_provider_name: str,
+        agent_strategy_name: str,
+    ) -> ResolvedAgentStrategy: ...
+
+
+class AgentStrategyPresentationProvider(Protocol):
+    def get_icon(self, *, tenant_id: str, agent_strategy_provider_name: str) -> str | None: ...
--- a/api/core/workflow/workflow_entry.py
+++ b/api/core/workflow/workflow_entry.py
@@ -9,9 +9,10 @@ from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom, build_di
 from core.app.workflow.layers.llm_quota import LLMQuotaLayer
 from core.app.workflow.layers.observability import ObservabilityLayer
 from core.workflow.node_factory import DifyNodeFactory
+from core.workflow.node_resolution import resolve_workflow_node_class
 from dify_graph.constants import ENVIRONMENT_VARIABLE_NODE_ID
 from dify_graph.entities import GraphInitParams
-from dify_graph.entities.graph_config import NodeConfigData, NodeConfigDict
+from dify_graph.entities.graph_config import NodeConfigDictAdapter
 from dify_graph.errors import WorkflowNodeRunFailedError
 from dify_graph.file.models import File
 from dify_graph.graph import Graph
@@ -23,7 +24,6 @@ from dify_graph.graph_engine.protocols.command_channel import CommandChannel
 from dify_graph.graph_events import GraphEngineEvent, GraphNodeEventBase, GraphRunFailedEvent
 from dify_graph.nodes import NodeType
 from dify_graph.nodes.base.node import Node
-from dify_graph.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING
 from dify_graph.runtime import ChildGraphNotFoundError, GraphRuntimeState, VariablePool
 from dify_graph.system_variable import SystemVariable
 from dify_graph.variable_loader import DUMMY_VARIABLE_LOADER, VariableLoader, load_into_variable_pool
@@ -212,7 +212,7 @@ class WorkflowEntry:
        node_config_data = node_config["data"]

        # Get node type
-        node_type = NodeType(node_config_data["type"])
+        node_type = node_config_data.type

        # init graph init params and runtime state
        graph_init_params = GraphInitParams(
@@ -234,8 +234,7 @@ class WorkflowEntry:
            graph_init_params=graph_init_params,
            graph_runtime_state=graph_runtime_state,
        )
-        typed_node_config = cast(dict[str, object], node_config)
-        node = cast(Any, node_factory).create_node(typed_node_config)
+        node = node_factory.create_node(node_config)
        node_cls = type(node)

        try:
@@ -344,7 +343,7 @@ class WorkflowEntry:
        if node_type not in {NodeType.PARAMETER_EXTRACTOR, NodeType.QUESTION_CLASSIFIER}:
            raise ValueError(f"Node type {node_type} not supported")

-        node_cls = NODE_TYPE_CLASSES_MAPPING[node_type]["1"]
+        node_cls = resolve_workflow_node_class(node_type=node_type, node_version="1")
        if not node_cls:
            raise ValueError(f"Node class not found for node type {node_type}")

@@ -371,10 +370,7 @@ class WorkflowEntry:
        graph_runtime_state = GraphRuntimeState(variable_pool=variable_pool, start_at=time.perf_counter())

        # init workflow run state
-        node_config: NodeConfigDict = {
-            "id": node_id,
-            "data": cast(NodeConfigData, node_data),
-        }
+        node_config = NodeConfigDictAdapter.validate_python({"id": node_id, "data": node_data})
        node_factory = DifyNodeFactory(
            graph_init_params=graph_init_params,
            graph_runtime_state=graph_runtime_state,
--- a/api/dify_graph/entities/init.py
+++ b/api/dify_graph/entities/init.py
@@ -1,11 +1,9 @@
-from .agent import AgentNodeStrategyInit
 from .graph_init_params import GraphInitParams
 from .workflow_execution import WorkflowExecution
 from .workflow_node_execution import WorkflowNodeExecution
 from .workflow_start_reason import WorkflowStartReason

 __all__ = [
-    "AgentNodeStrategyInit",
    "GraphInitParams",
    "WorkflowExecution",
    "WorkflowNodeExecution",
--- a/api/dify_graph/entities/agent.py
+++ b/api/dify_graph/entities/agent.py
@@ -1,8 +0,0 @@
-from pydantic import BaseModel
-
-
-class AgentNodeStrategyInit(BaseModel):
-    """Agent node strategy initialization data."""
-
-    name: str
-    icon: str | None = None
--- a/api/dify_graph/entities/base_node_data.py
+++ b/api/dify_graph/entities/base_node_data.py
@@ -0,0 +1,176 @@
+from __future__ import annotations
+
+import json
+from abc import ABC
+from builtins import type as type_
+from enum import StrEnum
+from typing import Any, Union
+
+from pydantic import BaseModel, ConfigDict, Field, model_validator
+
+from dify_graph.entities.exc import DefaultValueTypeError
+from dify_graph.enums import ErrorStrategy, NodeType
+
+# Project supports Python 3.11+, where `typing.Union[...]` is valid in `isinstance`.
+_NumberType = Union[int, float]
+
+
+class RetryConfig(BaseModel):
+    """node retry config"""
+
+    max_retries: int = 0  # max retry times
+    retry_interval: int = 0  # retry interval in milliseconds
+    retry_enabled: bool = False  # whether retry is enabled
+
+    @property
+    def retry_interval_seconds(self) -> float:
+        return self.retry_interval / 1000
+
+
+class DefaultValueType(StrEnum):
+    STRING = "string"
+    NUMBER = "number"
+    OBJECT = "object"
+    ARRAY_NUMBER = "array[number]"
+    ARRAY_STRING = "array[string]"
+    ARRAY_OBJECT = "array[object]"
+    ARRAY_FILES = "array[file]"
+
+
+class DefaultValue(BaseModel):
+    value: Any = None
+    type: DefaultValueType
+    key: str
+
+    @staticmethod
+    def _parse_json(value: str):
+        """Unified JSON parsing handler"""
+        try:
+            return json.loads(value)
+        except json.JSONDecodeError:
+            raise DefaultValueTypeError(f"Invalid JSON format for value: {value}")
+
+    @staticmethod
+    def _validate_array(value: Any, element_type: type_ | tuple[type_, ...]) -> bool:
+        """Unified array type validation"""
+        return isinstance(value, list) and all(isinstance(x, element_type) for x in value)
+
+    @staticmethod
+    def _convert_number(value: str) -> float:
+        """Unified number conversion handler"""
+        try:
+            return float(value)
+        except ValueError:
+            raise DefaultValueTypeError(f"Cannot convert to number: {value}")
+
+    @model_validator(mode="after")
+    def validate_value_type(self) -> DefaultValue:
+        # Type validation configuration
+        type_validators: dict[DefaultValueType, dict[str, Any]] = {
+            DefaultValueType.STRING: {
+                "type": str,
+                "converter": lambda x: x,
+            },
+            DefaultValueType.NUMBER: {
+                "type": _NumberType,
+                "converter": self._convert_number,
+            },
+            DefaultValueType.OBJECT: {
+                "type": dict,
+                "converter": self._parse_json,
+            },
+            DefaultValueType.ARRAY_NUMBER: {
+                "type": list,
+                "element_type": _NumberType,
+                "converter": self._parse_json,
+            },
+            DefaultValueType.ARRAY_STRING: {
+                "type": list,
+                "element_type": str,
+                "converter": self._parse_json,
+            },
+            DefaultValueType.ARRAY_OBJECT: {
+                "type": list,
+                "element_type": dict,
+                "converter": self._parse_json,
+            },
+        }
+
+        validator: dict[str, Any] = type_validators.get(self.type, {})
+        if not validator:
+            if self.type == DefaultValueType.ARRAY_FILES:
+                # Handle files type
+                return self
+            raise DefaultValueTypeError(f"Unsupported type: {self.type}")
+
+        # Handle string input cases
+        if isinstance(self.value, str) and self.type != DefaultValueType.STRING:
+            self.value = validator["converter"](self.value)
+
+        # Validate base type
+        if not isinstance(self.value, validator["type"]):
+            raise DefaultValueTypeError(f"Value must be {validator['type'].__name__} type for {self.value}")
+
+        # Validate array element types
+        if validator["type"] == list and not self._validate_array(self.value, validator["element_type"]):
+            raise DefaultValueTypeError(f"All elements must be {validator['element_type'].__name__} for {self.value}")
+
+        return self
+
+
+class BaseNodeData(ABC, BaseModel):
+    # Raw graph payloads are first validated through `NodeConfigDictAdapter`, where
+    # `node["data"]` is typed as `BaseNodeData` before the concrete node class is known.
+    # At that boundary, node-specific fields are still "extra" relative to this shared DTO,
+    # and persisted templates/workflows also carry undeclared compatibility keys such as
+    # `selected`, `params`, `paramSchemas`, and `datasource_label`. Keep extras permissive
+    # here until graph parsing becomes discriminated by node type or those legacy payloads
+    # are normalized.
+    model_config = ConfigDict(extra="allow")
+
+    type: NodeType
+    title: str = ""
+    desc: str | None = None
+    version: str = "1"
+    error_strategy: ErrorStrategy | None = None
+    default_value: list[DefaultValue] | None = None
+    retry_config: RetryConfig = Field(default_factory=RetryConfig)
+
+    @property
+    def default_value_dict(self) -> dict[str, Any]:
+        if self.default_value:
+            return {item.key: item.value for item in self.default_value}
+        return {}
+
+    def __getitem__(self, key: str) -> Any:
+        """
+        Dict-style access without calling model_dump() on every lookup.
+        Prefer using model fields and Pydantic's extra storage.
+        """
+        # First, check declared model fields
+        if key in self.__class__.model_fields:
+            return getattr(self, key)
+
+        # Then, check undeclared compatibility fields stored in Pydantic's extra dict.
+        extras = getattr(self, "__pydantic_extra__", None)
+        if extras is None:
+            extras = getattr(self, "model_extra", None)
+        if extras is not None and key in extras:
+            return extras[key]
+
+        raise KeyError(key)
+
+    def get(self, key: str, default: Any = None) -> Any:
+        """
+        Dict-style .get() without calling model_dump() on every lookup.
+        """
+        if key in self.__class__.model_fields:
+            return getattr(self, key)
+
+        extras = getattr(self, "__pydantic_extra__", None)
+        if extras is None:
+            extras = getattr(self, "model_extra", None)
+        if extras is not None and key in extras:
+            return extras.get(key, default)
+
+        return default
--- a/api/dify_graph/nodes/base/exc.py
+++ b/api/dify_graph/nodes/base/exc.py
--- a/api/dify_graph/entities/graph_config.py
+++ b/api/dify_graph/entities/graph_config.py
@@ -4,21 +4,20 @@ import sys

 from pydantic import TypeAdapter, with_config

+from dify_graph.entities.base_node_data import BaseNodeData
+
 if sys.version_info >= (3, 12):
    from typing import TypedDict
 else:
    from typing_extensions import TypedDict


-@with_config(extra="allow")
-class NodeConfigData(TypedDict):
-    type: str
-
-
@with_config(extra="allow")
 class NodeConfigDict(TypedDict):
    id: str
-    data: NodeConfigData
+    # This is the permissive raw graph boundary. Node factories re-validate `data`
+    # with the concrete `NodeData` subtype after resolving the node implementation.
+    data: BaseNodeData


 NodeConfigDictAdapter = TypeAdapter(NodeConfigDict)
--- a/api/dify_graph/graph/graph.py
+++ b/api/dify_graph/graph/graph.py
@@ -8,7 +8,7 @@ from typing import Protocol, cast, final
 from pydantic import TypeAdapter

 from dify_graph.entities.graph_config import NodeConfigDict
-from dify_graph.enums import ErrorStrategy, NodeExecutionType, NodeState, NodeType
+from dify_graph.enums import ErrorStrategy, NodeExecutionType, NodeState
 from dify_graph.nodes.base.node import Node
 from libs.typing import is_str

@@ -34,7 +34,8 @@ class NodeFactory(Protocol):

        :param node_config: node configuration dictionary containing type and other data
        :return: initialized Node instance
-        :raises ValueError: if node type is unknown or configuration is invalid
+        :raises ValueError: if node type is unknown or no implementation exists for the resolved version
+        :raises ValidationError: if node_config does not satisfy NodeConfigDict/BaseNodeData validation
        """
        ...

@@ -115,10 +116,7 @@ class Graph:
        start_node_id = None
        for nid in root_candidates:
            node_data = node_configs_map[nid]["data"]
-            node_type = node_data["type"]
-            if not isinstance(node_type, str):
-                continue
-            if NodeType(node_type).is_start_node:
+            if node_data.type.is_start_node:
                start_node_id = nid
                break

@@ -203,6 +201,23 @@ class Graph:

        return GraphBuilder(graph_cls=cls)

+    @staticmethod
+    def _filter_canvas_only_nodes(node_configs: Sequence[Mapping[str, object]]) -> list[dict[str, object]]:
+        """
+        Remove editor-only nodes before `NodeConfigDict` validation.
+
+        Persisted note widgets use a top-level `type == "custom-note"` but leave
+        `data.type` empty because they are never executable graph nodes. Filter
+        them while configs are still raw dicts so Pydantic does not validate
+        their placeholder payloads against `BaseNodeData.type: NodeType`.
+        """
+        filtered_node_configs: list[dict[str, object]] = []
+        for node_config in node_configs:
+            if node_config.get("type", "") == "custom-note":
+                continue
+            filtered_node_configs.append(dict(node_config))
+        return filtered_node_configs
+
    @classmethod
    def _promote_fail_branch_nodes(cls, nodes: dict[str, Node]) -> None:
        """
@@ -302,13 +317,13 @@ class Graph:
        node_configs = graph_config.get("nodes", [])

        edge_configs = cast(list[dict[str, object]], edge_configs)
+        node_configs = cast(list[dict[str, object]], node_configs)
+        node_configs = cls._filter_canvas_only_nodes(node_configs)
        node_configs = _ListNodeConfigDict.validate_python(node_configs)

        if not node_configs:
            raise ValueError("Graph must have at least one node")

-        node_configs = [node_config for node_config in node_configs if node_config.get("type", "") != "custom-note"]
-
        # Parse node configurations
        node_configs_map = cls._parse_node_configs(node_configs)

--- a/api/dify_graph/graph_events/node.py
+++ b/api/dify_graph/graph_events/node.py
@@ -4,7 +4,6 @@ from datetime import datetime
 from pydantic import Field

 from core.rag.entities.citation_metadata import RetrievalSourceMetadata
-from dify_graph.entities import AgentNodeStrategyInit
 from dify_graph.entities.pause_reason import PauseReason

 from .base import GraphNodeEventBase
@@ -13,8 +12,8 @@ from .base import GraphNodeEventBase
 class NodeRunStartedEvent(GraphNodeEventBase):
    node_title: str
    predecessor_node_id: str | None = None
-    agent_strategy: AgentNodeStrategyInit | None = None
    start_at: datetime = Field(..., description="node start time")
+    extras: dict[str, object] = Field(default_factory=dict)

    # FIXME(-LAN-): only for ToolNode
    provider_type: str = ""
--- a/api/dify_graph/model_runtime/entities/message_entities.py
+++ b/api/dify_graph/model_runtime/entities/message_entities.py
@@ -276,7 +276,4 @@ class ToolPromptMessage(PromptMessage):

        :return: True if prompt message is empty, False otherwise
        """
-        if not super().is_empty() and not self.tool_call_id:
-            return False
-
-        return True
+        return super().is_empty() and not self.tool_call_id
--- a/api/dify_graph/model_runtime/errors/invoke.py
+++ b/api/dify_graph/model_runtime/errors/invoke.py
@@ -4,7 +4,8 @@ class InvokeError(ValueError):
    description: str | None = None

    def __init__(self, description: str | None = None):
-        self.description = description
+        if description is not None:
+            self.description = description

    def __str__(self):
        return self.description or self.__class__.__name__
--- a/api/dify_graph/model_runtime/model_providers/model_provider_factory.py
+++ b/api/dify_graph/model_runtime/model_providers/model_provider_factory.py
@@ -282,7 +282,8 @@ class ModelProviderFactory:
                all_model_type_models.append(model_schema)

            simple_provider_schema = provider_schema.to_simple_provider()
-            simple_provider_schema.models.extend(all_model_type_models)
+            if model_type:
+                simple_provider_schema.models = all_model_type_models

            providers.append(simple_provider_schema)

--- a/api/dify_graph/nodes/agent/init.py
+++ b/api/dify_graph/nodes/agent/init.py
@@ -1,3 +0,0 @@
-from .agent_node import AgentNode
-
-__all__ = ["AgentNode"]
--- a/api/dify_graph/nodes/agent/agent_node.py
+++ b/api/dify_graph/nodes/agent/agent_node.py
@@ -1,762 +0,0 @@
-from __future__ import annotations
-
-import json
-from collections.abc import Generator, Mapping, Sequence
-from typing import TYPE_CHECKING, Any, cast
-
-from packaging.version import Version
-from pydantic import ValidationError
-from sqlalchemy import select
-from sqlalchemy.orm import Session
-
-from core.agent.entities import AgentToolEntity
-from core.agent.plugin_entities import AgentStrategyParameter
-from core.memory.token_buffer_memory import TokenBufferMemory
-from core.model_manager import ModelInstance, ModelManager
-from core.provider_manager import ProviderManager
-from core.tools.entities.tool_entities import (
-    ToolIdentity,
-    ToolInvokeMessage,
-    ToolParameter,
-    ToolProviderType,
-)
-from core.tools.tool_manager import ToolManager
-from core.tools.utils.message_transformer import ToolFileMessageTransformer
-from dify_graph.enums import (
-    NodeType,
-    SystemVariableKey,
-    WorkflowNodeExecutionMetadataKey,
-    WorkflowNodeExecutionStatus,
-)
-from dify_graph.file import File, FileTransferMethod
-from dify_graph.model_runtime.entities.llm_entities import LLMUsage, LLMUsageMetadata
-from dify_graph.model_runtime.entities.model_entities import AIModelEntity, ModelType
-from dify_graph.model_runtime.utils.encoders import jsonable_encoder
-from dify_graph.node_events import (
-    AgentLogEvent,
-    NodeEventBase,
-    NodeRunResult,
-    StreamChunkEvent,
-    StreamCompletedEvent,
-)
-from dify_graph.nodes.agent.entities import AgentNodeData, AgentOldVersionModelFeatures, ParamsAutoGenerated
-from dify_graph.nodes.base.node import Node
-from dify_graph.nodes.base.variable_template_parser import VariableTemplateParser
-from dify_graph.runtime import VariablePool
-from dify_graph.variables.segments import ArrayFileSegment, StringSegment
-from extensions.ext_database import db
-from factories import file_factory
-from factories.agent_factory import get_plugin_agent_strategy
-from models import ToolFile
-from models.model import Conversation
-from services.tools.builtin_tools_manage_service import BuiltinToolManageService
-
-from .exc import (
-    AgentInputTypeError,
-    AgentInvocationError,
-    AgentMessageTransformError,
-    AgentNodeError,
-    AgentVariableNotFoundError,
-    AgentVariableTypeError,
-    ToolFileNotFoundError,
-)
-
-if TYPE_CHECKING:
-    from core.agent.strategy.plugin import PluginAgentStrategy
-    from core.plugin.entities.request import InvokeCredentials
-
-
-class AgentNode(Node[AgentNodeData]):
-    """
-    Agent Node
-    """
-
-    node_type = NodeType.AGENT
-
-    @classmethod
-    def version(cls) -> str:
-        return "1"
-
-    def _run(self) -> Generator[NodeEventBase, None, None]:
-        from core.plugin.impl.exc import PluginDaemonClientSideError
-
-        dify_ctx = self.require_dify_context()
-
-        try:
-            strategy = get_plugin_agent_strategy(
-                tenant_id=dify_ctx.tenant_id,
-                agent_strategy_provider_name=self.node_data.agent_strategy_provider_name,
-                agent_strategy_name=self.node_data.agent_strategy_name,
-            )
-        except Exception as e:
-            yield StreamCompletedEvent(
-                node_run_result=NodeRunResult(
-                    status=WorkflowNodeExecutionStatus.FAILED,
-                    inputs={},
-                    error=f"Failed to get agent strategy: {str(e)}",
-                ),
-            )
-            return
-
-        agent_parameters = strategy.get_parameters()
-
-        # get parameters
-        parameters = self._generate_agent_parameters(
-            agent_parameters=agent_parameters,
-            variable_pool=self.graph_runtime_state.variable_pool,
-            node_data=self.node_data,
-            strategy=strategy,
-        )
-        parameters_for_log = self._generate_agent_parameters(
-            agent_parameters=agent_parameters,
-            variable_pool=self.graph_runtime_state.variable_pool,
-            node_data=self.node_data,
-            for_log=True,
-            strategy=strategy,
-        )
-        credentials = self._generate_credentials(parameters=parameters)
-
-        # get conversation id
-        conversation_id = self.graph_runtime_state.variable_pool.get(["sys", SystemVariableKey.CONVERSATION_ID])
-
-        try:
-            message_stream = strategy.invoke(
-                params=parameters,
-                user_id=dify_ctx.user_id,
-                app_id=dify_ctx.app_id,
-                conversation_id=conversation_id.text if conversation_id else None,
-                credentials=credentials,
-            )
-        except Exception as e:
-            error = AgentInvocationError(f"Failed to invoke agent: {str(e)}", original_error=e)
-            yield StreamCompletedEvent(
-                node_run_result=NodeRunResult(
-                    status=WorkflowNodeExecutionStatus.FAILED,
-                    inputs=parameters_for_log,
-                    error=str(error),
-                )
-            )
-            return
-
-        try:
-            yield from self._transform_message(
-                messages=message_stream,
-                tool_info={
-                    "icon": self.agent_strategy_icon,
-                    "agent_strategy": self.node_data.agent_strategy_name,
-                },
-                parameters_for_log=parameters_for_log,
-                user_id=dify_ctx.user_id,
-                tenant_id=dify_ctx.tenant_id,
-                node_type=self.node_type,
-                node_id=self._node_id,
-                node_execution_id=self.id,
-            )
-        except PluginDaemonClientSideError as e:
-            transform_error = AgentMessageTransformError(
-                f"Failed to transform agent message: {str(e)}", original_error=e
-            )
-            yield StreamCompletedEvent(
-                node_run_result=NodeRunResult(
-                    status=WorkflowNodeExecutionStatus.FAILED,
-                    inputs=parameters_for_log,
-                    error=str(transform_error),
-                )
-            )
-
-    def _generate_agent_parameters(
-        self,
-        *,
-        agent_parameters: Sequence[AgentStrategyParameter],
-        variable_pool: VariablePool,
-        node_data: AgentNodeData,
-        for_log: bool = False,
-        strategy: PluginAgentStrategy,
-    ) -> dict[str, Any]:
-        """
-        Generate parameters based on the given tool parameters, variable pool, and node data.
-
-        Args:
-            agent_parameters (Sequence[AgentParameter]): The list of agent parameters.
-            variable_pool (VariablePool): The variable pool containing the variables.
-            node_data (AgentNodeData): The data associated with the agent node.
-
-        Returns:
-            Mapping[str, Any]: A dictionary containing the generated parameters.
-
-        """
-        agent_parameters_dictionary = {parameter.name: parameter for parameter in agent_parameters}
-
-        result: dict[str, Any] = {}
-        for parameter_name in node_data.agent_parameters:
-            parameter = agent_parameters_dictionary.get(parameter_name)
-            if not parameter:
-                result[parameter_name] = None
-                continue
-            agent_input = node_data.agent_parameters[parameter_name]
-            match agent_input.type:
-                case "variable":
-                    variable = variable_pool.get(agent_input.value)  # type: ignore
-                    if variable is None:
-                        raise AgentVariableNotFoundError(str(agent_input.value))
-                    parameter_value = variable.value
-                case "mixed" | "constant":
-                    # variable_pool.convert_template expects a string template,
-                    # but if passing a dict, convert to JSON string first before rendering
-                    try:
-                        if not isinstance(agent_input.value, str):
-                            parameter_value = json.dumps(agent_input.value, ensure_ascii=False)
-                        else:
-                            parameter_value = str(agent_input.value)
-                    except TypeError:
-                        parameter_value = str(agent_input.value)
-                    segment_group = variable_pool.convert_template(parameter_value)
-                    parameter_value = segment_group.log if for_log else segment_group.text
-                    # variable_pool.convert_template returns a string,
-                    # so we need to convert it back to a dictionary
-                    try:
-                        if not isinstance(agent_input.value, str):
-                            parameter_value = json.loads(parameter_value)
-                    except json.JSONDecodeError:
-                        parameter_value = parameter_value
-                case _:
-                    raise AgentInputTypeError(agent_input.type)
-            value = parameter_value
-            if parameter.type == "array[tools]":
-                value = cast(list[dict[str, Any]], value)
-                value = [tool for tool in value if tool.get("enabled", False)]
-                value = self._filter_mcp_type_tool(strategy, value)
-                for tool in value:
-                    if "schemas" in tool:
-                        tool.pop("schemas")
-                    parameters = tool.get("parameters", {})
-                    if all(isinstance(v, dict) for _, v in parameters.items()):
-                        params = {}
-                        for key, param in parameters.items():
-                            if param.get("auto", ParamsAutoGenerated.OPEN) in (
-                                ParamsAutoGenerated.CLOSE,
-                                0,
-                            ):
-                                value_param = param.get("value", {})
-                                if value_param and value_param.get("type", "") == "variable":
-                                    variable_selector = value_param.get("value")
-                                    if not variable_selector:
-                                        raise ValueError("Variable selector is missing for a variable-type parameter.")
-
-                                    variable = variable_pool.get(variable_selector)
-                                    if variable is None:
-                                        raise AgentVariableNotFoundError(str(variable_selector))
-
-                                    params[key] = variable.value
-                                else:
-                                    params[key] = value_param.get("value", "") if value_param is not None else None
-                            else:
-                                params[key] = None
-                        parameters = params
-                    tool["settings"] = {k: v.get("value", None) for k, v in tool.get("settings", {}).items()}
-                    tool["parameters"] = parameters
-
-            if not for_log:
-                if parameter.type == "array[tools]":
-                    value = cast(list[dict[str, Any]], value)
-                    tool_value = []
-                    for tool in value:
-                        provider_type = ToolProviderType(tool.get("type", ToolProviderType.BUILT_IN))
-                        setting_params = tool.get("settings", {})
-                        parameters = tool.get("parameters", {})
-                        manual_input_params = [key for key, value in parameters.items() if value is not None]
-
-                        parameters = {**parameters, **setting_params}
-                        entity = AgentToolEntity(
-                            provider_id=tool.get("provider_name", ""),
-                            provider_type=provider_type,
-                            tool_name=tool.get("tool_name", ""),
-                            tool_parameters=parameters,
-                            plugin_unique_identifier=tool.get("plugin_unique_identifier", None),
-                            credential_id=tool.get("credential_id", None),
-                        )
-
-                        extra = tool.get("extra", {})
-
-                        # This is an issue that caused problems before.
-                        # Logically, we shouldn't use the node_data.version field for judgment
-                        # But for backward compatibility with historical data
-                        # this version field judgment is still preserved here.
-                        runtime_variable_pool: VariablePool | None = None
-                        if node_data.version != "1" or node_data.tool_node_version is not None:
-                            runtime_variable_pool = variable_pool
-                        dify_ctx = self.require_dify_context()
-                        tool_runtime = ToolManager.get_agent_tool_runtime(
-                            dify_ctx.tenant_id,
-                            dify_ctx.app_id,
-                            entity,
-                            dify_ctx.invoke_from,
-                            runtime_variable_pool,
-                        )
-                        if tool_runtime.entity.description:
-                            tool_runtime.entity.description.llm = (
-                                extra.get("description", "") or tool_runtime.entity.description.llm
-                            )
-                        for tool_runtime_params in tool_runtime.entity.parameters:
-                            tool_runtime_params.form = (
-                                ToolParameter.ToolParameterForm.FORM
-                                if tool_runtime_params.name in manual_input_params
-                                else tool_runtime_params.form
-                            )
-                        manual_input_value = {}
-                        if tool_runtime.entity.parameters:
-                            manual_input_value = {
-                                key: value for key, value in parameters.items() if key in manual_input_params
-                            }
-                        runtime_parameters = {
-                            **tool_runtime.runtime.runtime_parameters,
-                            **manual_input_value,
-                        }
-                        tool_value.append(
-                            {
-                                **tool_runtime.entity.model_dump(mode="json"),
-                                "runtime_parameters": runtime_parameters,
-                                "credential_id": tool.get("credential_id", None),
-                                "provider_type": provider_type.value,
-                            }
-                        )
-                    value = tool_value
-                if parameter.type == AgentStrategyParameter.AgentStrategyParameterType.MODEL_SELECTOR:
-                    value = cast(dict[str, Any], value)
-                    model_instance, model_schema = self._fetch_model(value)
-                    # memory config
-                    history_prompt_messages = []
-                    if node_data.memory:
-                        memory = self._fetch_memory(model_instance)
-                        if memory:
-                            prompt_messages = memory.get_history_prompt_messages(
-                                message_limit=node_data.memory.window.size or None
-                            )
-                            history_prompt_messages = [
-                                prompt_message.model_dump(mode="json") for prompt_message in prompt_messages
-                            ]
-                    value["history_prompt_messages"] = history_prompt_messages
-                    if model_schema:
-                        # remove structured output feature to support old version agent plugin
-                        model_schema = self._remove_unsupported_model_features_for_old_version(model_schema)
-                        value["entity"] = model_schema.model_dump(mode="json")
-                    else:
-                        value["entity"] = None
-            result[parameter_name] = value
-
-        return result
-
-    def _generate_credentials(
-        self,
-        parameters: dict[str, Any],
-    ) -> InvokeCredentials:
-        """
-        Generate credentials based on the given agent parameters.
-        """
-        from core.plugin.entities.request import InvokeCredentials
-
-        credentials = InvokeCredentials()
-
-        # generate credentials for tools selector
-        credentials.tool_credentials = {}
-        for tool in parameters.get("tools", []):
-            if tool.get("credential_id"):
-                try:
-                    identity = ToolIdentity.model_validate(tool.get("identity", {}))
-                    credentials.tool_credentials[identity.provider] = tool.get("credential_id", None)
-                except ValidationError:
-                    continue
-        return credentials
-
-    @classmethod
-    def _extract_variable_selector_to_variable_mapping(
-        cls,
-        *,
-        graph_config: Mapping[str, Any],
-        node_id: str,
-        node_data: Mapping[str, Any],
-    ) -> Mapping[str, Sequence[str]]:
-        # Create typed NodeData from dict
-        typed_node_data = AgentNodeData.model_validate(node_data)
-
-        result: dict[str, Any] = {}
-        for parameter_name in typed_node_data.agent_parameters:
-            input = typed_node_data.agent_parameters[parameter_name]
-            match input.type:
-                case "mixed" | "constant":
-                    selectors = VariableTemplateParser(str(input.value)).extract_variable_selectors()
-                    for selector in selectors:
-                        result[selector.variable] = selector.value_selector
-                case "variable":
-                    result[parameter_name] = input.value
-
-        result = {node_id + "." + key: value for key, value in result.items()}
-
-        return result
-
-    @property
-    def agent_strategy_icon(self) -> str | None:
-        """
-        Get agent strategy icon
-        :return:
-        """
-        from core.plugin.impl.plugin import PluginInstaller
-
-        manager = PluginInstaller()
-        dify_ctx = self.require_dify_context()
-        plugins = manager.list_plugins(dify_ctx.tenant_id)
-        try:
-            current_plugin = next(
-                plugin
-                for plugin in plugins
-                if f"{plugin.plugin_id}/{plugin.name}" == self.node_data.agent_strategy_provider_name
-            )
-            icon = current_plugin.declaration.icon
-        except StopIteration:
-            icon = None
-        return icon
-
-    def _fetch_memory(self, model_instance: ModelInstance) -> TokenBufferMemory | None:
-        # get conversation id
-        conversation_id_variable = self.graph_runtime_state.variable_pool.get(
-            ["sys", SystemVariableKey.CONVERSATION_ID]
-        )
-        if not isinstance(conversation_id_variable, StringSegment):
-            return None
-        conversation_id = conversation_id_variable.value
-
-        dify_ctx = self.require_dify_context()
-        with Session(db.engine, expire_on_commit=False) as session:
-            stmt = select(Conversation).where(
-                Conversation.app_id == dify_ctx.app_id, Conversation.id == conversation_id
-            )
-            conversation = session.scalar(stmt)
-
-            if not conversation:
-                return None
-
-        memory = TokenBufferMemory(conversation=conversation, model_instance=model_instance)
-
-        return memory
-
-    def _fetch_model(self, value: dict[str, Any]) -> tuple[ModelInstance, AIModelEntity | None]:
-        dify_ctx = self.require_dify_context()
-        provider_manager = ProviderManager()
-        provider_model_bundle = provider_manager.get_provider_model_bundle(
-            tenant_id=dify_ctx.tenant_id, provider=value.get("provider", ""), model_type=ModelType.LLM
-        )
-        model_name = value.get("model", "")
-        model_credentials = provider_model_bundle.configuration.get_current_credentials(
-            model_type=ModelType.LLM, model=model_name
-        )
-        provider_name = provider_model_bundle.configuration.provider.provider
-        model_type_instance = provider_model_bundle.model_type_instance
-        model_instance = ModelManager().get_model_instance(
-            tenant_id=dify_ctx.tenant_id,
-            provider=provider_name,
-            model_type=ModelType(value.get("model_type", "")),
-            model=model_name,
-        )
-        model_schema = model_type_instance.get_model_schema(model_name, model_credentials)
-        return model_instance, model_schema
-
-    def _remove_unsupported_model_features_for_old_version(self, model_schema: AIModelEntity) -> AIModelEntity:
-        if model_schema.features:
-            for feature in model_schema.features[:]:  # Create a copy to safely modify during iteration
-                try:
-                    AgentOldVersionModelFeatures(feature.value)  # Try to create enum member from value
-                except ValueError:
-                    model_schema.features.remove(feature)
-        return model_schema
-
-    def _filter_mcp_type_tool(self, strategy: PluginAgentStrategy, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
-        """
-        Filter MCP type tool
-        :param strategy: plugin agent strategy
-        :param tool: tool
-        :return: filtered tool dict
-        """
-        meta_version = strategy.meta_version
-        if meta_version and Version(meta_version) > Version("0.0.1"):
-            return tools
-        else:
-            return [tool for tool in tools if tool.get("type") != ToolProviderType.MCP]
-
-    def _transform_message(
-        self,
-        messages: Generator[ToolInvokeMessage, None, None],
-        tool_info: Mapping[str, Any],
-        parameters_for_log: dict[str, Any],
-        user_id: str,
-        tenant_id: str,
-        node_type: NodeType,
-        node_id: str,
-        node_execution_id: str,
-    ) -> Generator[NodeEventBase, None, None]:
-        """
-        Convert ToolInvokeMessages into tuple[plain_text, files]
-        """
-        # transform message and handle file storage
-        from core.plugin.impl.plugin import PluginInstaller
-
-        message_stream = ToolFileMessageTransformer.transform_tool_invoke_messages(
-            messages=messages,
-            user_id=user_id,
-            tenant_id=tenant_id,
-            conversation_id=None,
-        )
-
-        text = ""
-        files: list[File] = []
-        json_list: list[dict | list] = []
-
-        agent_logs: list[AgentLogEvent] = []
-        agent_execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] = {}
-        llm_usage = LLMUsage.empty_usage()
-        variables: dict[str, Any] = {}
-
-        for message in message_stream:
-            if message.type in {
-                ToolInvokeMessage.MessageType.IMAGE_LINK,
-                ToolInvokeMessage.MessageType.BINARY_LINK,
-                ToolInvokeMessage.MessageType.IMAGE,
-            }:
-                assert isinstance(message.message, ToolInvokeMessage.TextMessage)
-
-                url = message.message.text
-                if message.meta:
-                    transfer_method = message.meta.get("transfer_method", FileTransferMethod.TOOL_FILE)
-                else:
-                    transfer_method = FileTransferMethod.TOOL_FILE
-
-                tool_file_id = str(url).split("/")[-1].split(".")[0]
-
-                with Session(db.engine) as session:
-                    stmt = select(ToolFile).where(ToolFile.id == tool_file_id)
-                    tool_file = session.scalar(stmt)
-                    if tool_file is None:
-                        raise ToolFileNotFoundError(tool_file_id)
-
-                mapping = {
-                    "tool_file_id": tool_file_id,
-                    "type": file_factory.get_file_type_by_mime_type(tool_file.mimetype),
-                    "transfer_method": transfer_method,
-                    "url": url,
-                }
-                file = file_factory.build_from_mapping(
-                    mapping=mapping,
-                    tenant_id=tenant_id,
-                )
-                files.append(file)
-            elif message.type == ToolInvokeMessage.MessageType.BLOB:
-                # get tool file id
-                assert isinstance(message.message, ToolInvokeMessage.TextMessage)
-                assert message.meta
-
-                tool_file_id = message.message.text.split("/")[-1].split(".")[0]
-                with Session(db.engine) as session:
-                    stmt = select(ToolFile).where(ToolFile.id == tool_file_id)
-                    tool_file = session.scalar(stmt)
-                    if tool_file is None:
-                        raise ToolFileNotFoundError(tool_file_id)
-
-                mapping = {
-                    "tool_file_id": tool_file_id,
-                    "transfer_method": FileTransferMethod.TOOL_FILE,
-                }
-
-                files.append(
-                    file_factory.build_from_mapping(
-                        mapping=mapping,
-                        tenant_id=tenant_id,
-                    )
-                )
-            elif message.type == ToolInvokeMessage.MessageType.TEXT:
-                assert isinstance(message.message, ToolInvokeMessage.TextMessage)
-                text += message.message.text
-                yield StreamChunkEvent(
-                    selector=[node_id, "text"],
-                    chunk=message.message.text,
-                    is_final=False,
-                )
-            elif message.type == ToolInvokeMessage.MessageType.JSON:
-                assert isinstance(message.message, ToolInvokeMessage.JsonMessage)
-                if node_type == NodeType.AGENT:
-                    if isinstance(message.message.json_object, dict):
-                        msg_metadata: dict[str, Any] = message.message.json_object.pop("execution_metadata", {})
-                        llm_usage = LLMUsage.from_metadata(cast(LLMUsageMetadata, msg_metadata))
-                        agent_execution_metadata = {
-                            WorkflowNodeExecutionMetadataKey(key): value
-                            for key, value in msg_metadata.items()
-                            if key in WorkflowNodeExecutionMetadataKey.__members__.values()
-                        }
-                    else:
-                        msg_metadata = {}
-                        llm_usage = LLMUsage.empty_usage()
-                        agent_execution_metadata = {}
-                if message.message.json_object:
-                    json_list.append(message.message.json_object)
-            elif message.type == ToolInvokeMessage.MessageType.LINK:
-                assert isinstance(message.message, ToolInvokeMessage.TextMessage)
-                stream_text = f"Link: {message.message.text}\n"
-                text += stream_text
-                yield StreamChunkEvent(
-                    selector=[node_id, "text"],
-                    chunk=stream_text,
-                    is_final=False,
-                )
-            elif message.type == ToolInvokeMessage.MessageType.VARIABLE:
-                assert isinstance(message.message, ToolInvokeMessage.VariableMessage)
-                variable_name = message.message.variable_name
-                variable_value = message.message.variable_value
-                if message.message.stream:
-                    if not isinstance(variable_value, str):
-                        raise AgentVariableTypeError(
-                            "When 'stream' is True, 'variable_value' must be a string.",
-                            variable_name=variable_name,
-                            expected_type="str",
-                            actual_type=type(variable_value).__name__,
-                        )
-                    if variable_name not in variables:
-                        variables[variable_name] = ""
-                    variables[variable_name] += variable_value
-
-                    yield StreamChunkEvent(
-                        selector=[node_id, variable_name],
-                        chunk=variable_value,
-                        is_final=False,
-                    )
-                else:
-                    variables[variable_name] = variable_value
-            elif message.type == ToolInvokeMessage.MessageType.FILE:
-                assert message.meta is not None
-                assert isinstance(message.meta, dict)
-                # Validate that meta contains a 'file' key
-                if "file" not in message.meta:
-                    raise AgentNodeError("File message is missing 'file' key in meta")
-
-                # Validate that the file is an instance of File
-                if not isinstance(message.meta["file"], File):
-                    raise AgentNodeError(f"Expected File object but got {type(message.meta['file']).__name__}")
-                files.append(message.meta["file"])
-            elif message.type == ToolInvokeMessage.MessageType.LOG:
-                assert isinstance(message.message, ToolInvokeMessage.LogMessage)
-                if message.message.metadata:
-                    icon = tool_info.get("icon", "")
-                    dict_metadata = dict(message.message.metadata)
-                    if dict_metadata.get("provider"):
-                        manager = PluginInstaller()
-                        plugins = manager.list_plugins(tenant_id)
-                        try:
-                            current_plugin = next(
-                                plugin
-                                for plugin in plugins
-                                if f"{plugin.plugin_id}/{plugin.name}" == dict_metadata["provider"]
-                            )
-                            icon = current_plugin.declaration.icon
-                        except StopIteration:
-                            pass
-                        icon_dark = None
-                        try:
-                            builtin_tool = next(
-                                provider
-                                for provider in BuiltinToolManageService.list_builtin_tools(
-                                    user_id,
-                                    tenant_id,
-                                )
-                                if provider.name == dict_metadata["provider"]
-                            )
-                            icon = builtin_tool.icon
-                            icon_dark = builtin_tool.icon_dark
-                        except StopIteration:
-                            pass
-
-                        dict_metadata["icon"] = icon
-                        dict_metadata["icon_dark"] = icon_dark
-                        message.message.metadata = dict_metadata
-                agent_log = AgentLogEvent(
-                    message_id=message.message.id,
-                    node_execution_id=node_execution_id,
-                    parent_id=message.message.parent_id,
-                    error=message.message.error,
-                    status=message.message.status.value,
-                    data=message.message.data,
-                    label=message.message.label,
-                    metadata=message.message.metadata,
-                    node_id=node_id,
-                )
-
-                # check if the agent log is already in the list
-                for log in agent_logs:
-                    if log.message_id == agent_log.message_id:
-                        # update the log
-                        log.data = agent_log.data
-                        log.status = agent_log.status
-                        log.error = agent_log.error
-                        log.label = agent_log.label
-                        log.metadata = agent_log.metadata
-                        break
-                else:
-                    agent_logs.append(agent_log)
-
-                yield agent_log
-
-        # Add agent_logs to outputs['json'] to ensure frontend can access thinking process
-        json_output: list[dict[str, Any] | list[Any]] = []
-
-        # Step 1: append each agent log as its own dict.
-        if agent_logs:
-            for log in agent_logs:
-                json_output.append(
-                    {
-                        "id": log.message_id,
-                        "parent_id": log.parent_id,
-                        "error": log.error,
-                        "status": log.status,
-                        "data": log.data,
-                        "label": log.label,
-                        "metadata": log.metadata,
-                        "node_id": log.node_id,
-                    }
-                )
-        # Step 2: normalize JSON into {"data": [...]}.change json to list[dict]
-        if json_list:
-            json_output.extend(json_list)
-        else:
-            json_output.append({"data": []})
-
-        # Send final chunk events for all streamed outputs
-        # Final chunk for text stream
-        yield StreamChunkEvent(
-            selector=[node_id, "text"],
-            chunk="",
-            is_final=True,
-        )
-
-        # Final chunks for any streamed variables
-        for var_name in variables:
-            yield StreamChunkEvent(
-                selector=[node_id, var_name],
-                chunk="",
-                is_final=True,
-            )
-
-        yield StreamCompletedEvent(
-            node_run_result=NodeRunResult(
-                status=WorkflowNodeExecutionStatus.SUCCEEDED,
-                outputs={
-                    "text": text,
-                    "usage": jsonable_encoder(llm_usage),
-                    "files": ArrayFileSegment(value=files),
-                    "json": json_output,
-                    **variables,
-                },
-                metadata={
-                    **agent_execution_metadata,
-                    WorkflowNodeExecutionMetadataKey.TOOL_INFO: tool_info,
-                    WorkflowNodeExecutionMetadataKey.AGENT_LOG: agent_logs,
-                },
-                inputs=parameters_for_log,
-                llm_usage=llm_usage,
-            )
-        )
--- a/api/dify_graph/nodes/answer/answer_node.py
+++ b/api/dify_graph/nodes/answer/answer_node.py
@@ -48,12 +48,10 @@ class AnswerNode(Node[AnswerNodeData]):
        *,
        graph_config: Mapping[str, Any],
        node_id: str,
-        node_data: Mapping[str, Any],
+        node_data: AnswerNodeData,
    ) -> Mapping[str, Sequence[str]]:
-        # Create typed NodeData from dict
-        typed_node_data = AnswerNodeData.model_validate(node_data)
-
-        variable_template_parser = VariableTemplateParser(template=typed_node_data.answer)
+        _ = graph_config  # Explicitly mark as unused
+        variable_template_parser = VariableTemplateParser(template=node_data.answer)
        variable_selectors = variable_template_parser.extract_variable_selectors()

        variable_mapping = {}
--- a/api/dify_graph/nodes/answer/entities.py
+++ b/api/dify_graph/nodes/answer/entities.py
@@ -3,7 +3,8 @@ from enum import StrEnum, auto

 from pydantic import BaseModel, Field

-from dify_graph.nodes.base import BaseNodeData
+from dify_graph.entities.base_node_data import BaseNodeData
+from dify_graph.enums import NodeType


 class AnswerNodeData(BaseNodeData):
@@ -11,6 +12,7 @@ class AnswerNodeData(BaseNodeData):
    Answer Node Data.
    """

+    type: NodeType = NodeType.ANSWER
    answer: str = Field(..., description="answer template string")


--- a/api/dify_graph/nodes/base/init.py
+++ b/api/dify_graph/nodes/base/init.py
@@ -1,4 +1,4 @@
-from .entities import BaseIterationNodeData, BaseIterationState, BaseLoopNodeData, BaseLoopState, BaseNodeData
+from .entities import BaseIterationNodeData, BaseIterationState, BaseLoopNodeData, BaseLoopState
 from .usage_tracking_mixin import LLMUsageTrackingMixin

 __all__ = [
@@ -6,6 +6,5 @@ __all__ = [
    "BaseIterationState",
    "BaseLoopNodeData",
    "BaseLoopState",
-    "BaseNodeData",
    "LLMUsageTrackingMixin",
 ]
--- a/api/dify_graph/nodes/base/entities.py
+++ b/api/dify_graph/nodes/base/entities.py
@@ -1,31 +1,12 @@
 from __future__ import annotations

-import json
-from abc import ABC
-from builtins import type as type_
 from collections.abc import Sequence
 from enum import StrEnum
-from typing import Any, Union
+from typing import Any

-from pydantic import BaseModel, field_validator, model_validator
+from pydantic import BaseModel, field_validator

-from dify_graph.enums import ErrorStrategy
-
-from .exc import DefaultValueTypeError
-
-_NumberType = Union[int, float]
-
-
-class RetryConfig(BaseModel):
-    """node retry config"""
-
-    max_retries: int = 0  # max retry times
-    retry_interval: int = 0  # retry interval in milliseconds
-    retry_enabled: bool = False  # whether retry is enabled
-
-    @property
-    def retry_interval_seconds(self) -> float:
-        return self.retry_interval / 1000
+from dify_graph.entities.base_node_data import BaseNodeData


 class VariableSelector(BaseModel):
@@ -76,112 +57,6 @@ class OutputVariableEntity(BaseModel):
        return v


-class DefaultValueType(StrEnum):
-    STRING = "string"
-    NUMBER = "number"
-    OBJECT = "object"
-    ARRAY_NUMBER = "array[number]"
-    ARRAY_STRING = "array[string]"
-    ARRAY_OBJECT = "array[object]"
-    ARRAY_FILES = "array[file]"
-
-
-class DefaultValue(BaseModel):
-    value: Any = None
-    type: DefaultValueType
-    key: str
-
-    @staticmethod
-    def _parse_json(value: str):
-        """Unified JSON parsing handler"""
-        try:
-            return json.loads(value)
-        except json.JSONDecodeError:
-            raise DefaultValueTypeError(f"Invalid JSON format for value: {value}")
-
-    @staticmethod
-    def _validate_array(value: Any, element_type: type_ | tuple[type_, ...]) -> bool:
-        """Unified array type validation"""
-        return isinstance(value, list) and all(isinstance(x, element_type) for x in value)
-
-    @staticmethod
-    def _convert_number(value: str) -> float:
-        """Unified number conversion handler"""
-        try:
-            return float(value)
-        except ValueError:
-            raise DefaultValueTypeError(f"Cannot convert to number: {value}")
-
-    @model_validator(mode="after")
-    def validate_value_type(self) -> DefaultValue:
-        # Type validation configuration
-        type_validators: dict[DefaultValueType, dict[str, Any]] = {
-            DefaultValueType.STRING: {
-                "type": str,
-                "converter": lambda x: x,
-            },
-            DefaultValueType.NUMBER: {
-                "type": _NumberType,
-                "converter": self._convert_number,
-            },
-            DefaultValueType.OBJECT: {
-                "type": dict,
-                "converter": self._parse_json,
-            },
-            DefaultValueType.ARRAY_NUMBER: {
-                "type": list,
-                "element_type": _NumberType,
-                "converter": self._parse_json,
-            },
-            DefaultValueType.ARRAY_STRING: {
-                "type": list,
-                "element_type": str,
-                "converter": self._parse_json,
-            },
-            DefaultValueType.ARRAY_OBJECT: {
-                "type": list,
-                "element_type": dict,
-                "converter": self._parse_json,
-            },
-        }
-
-        validator: dict[str, Any] = type_validators.get(self.type, {})
-        if not validator:
-            if self.type == DefaultValueType.ARRAY_FILES:
-                # Handle files type
-                return self
-            raise DefaultValueTypeError(f"Unsupported type: {self.type}")
-
-        # Handle string input cases
-        if isinstance(self.value, str) and self.type != DefaultValueType.STRING:
-            self.value = validator["converter"](self.value)
-
-        # Validate base type
-        if not isinstance(self.value, validator["type"]):
-            raise DefaultValueTypeError(f"Value must be {validator['type'].__name__} type for {self.value}")
-
-        # Validate array element types
-        if validator["type"] == list and not self._validate_array(self.value, validator["element_type"]):
-            raise DefaultValueTypeError(f"All elements must be {validator['element_type'].__name__} for {self.value}")
-
-        return self
-
-
-class BaseNodeData(ABC, BaseModel):
-    title: str
-    desc: str | None = None
-    version: str = "1"
-    error_strategy: ErrorStrategy | None = None
-    default_value: list[DefaultValue] | None = None
-    retry_config: RetryConfig = RetryConfig()
-
-    @property
-    def default_value_dict(self) -> dict[str, Any]:
-        if self.default_value:
-            return {item.key: item.value for item in self.default_value}
-        return {}
-
-
 class BaseIterationNodeData(BaseNodeData):
    start_node_id: str | None = None

--- a/api/dify_graph/nodes/base/node.py
+++ b/api/dify_graph/nodes/base/node.py
@@ -11,7 +11,9 @@ from types import MappingProxyType
 from typing import Any, ClassVar, Generic, Protocol, TypeVar, cast, get_args, get_origin
 from uuid import uuid4

-from dify_graph.entities import AgentNodeStrategyInit, GraphInitParams
+from dify_graph.entities import GraphInitParams
+from dify_graph.entities.base_node_data import BaseNodeData, RetryConfig
+from dify_graph.entities.graph_config import NodeConfigDict
 from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY
 from dify_graph.enums import (
    ErrorStrategy,
@@ -62,8 +64,6 @@ from dify_graph.node_events import (
 from dify_graph.runtime import GraphRuntimeState
 from libs.datetime_utils import naive_utc_now

-from .entities import BaseNodeData, RetryConfig
-
 NodeDataT = TypeVar("NodeDataT", bound=BaseNodeData)
 _MISSING_RUN_CONTEXT_VALUE = object()

@@ -153,11 +153,11 @@ class Node(Generic[NodeDataT]):
        Later, in __init__:
        ::

-            config["data"] ──► _hydrate_node_data() ──► _node_data_type.model_validate()
-                                                                │
-                                                                ▼
-                                                        CodeNodeData instance
-                                                        (stored in self._node_data)
+            config["data"] ──► _node_data_type.model_validate(..., from_attributes=True)
+                                               │
+                                               ▼
+                                       CodeNodeData instance
+                                       (stored in self._node_data)

        Example:
            class CodeNode(Node[CodeNodeData]):  # CodeNodeData is auto-extracted
@@ -241,7 +241,7 @@ class Node(Generic[NodeDataT]):
    def __init__(
        self,
        id: str,
-        config: Mapping[str, Any],
+        config: NodeConfigDict,
        graph_init_params: GraphInitParams,
        graph_runtime_state: GraphRuntimeState,
    ) -> None:
@@ -254,22 +254,21 @@ class Node(Generic[NodeDataT]):
        self.graph_runtime_state = graph_runtime_state
        self.state: NodeState = NodeState.UNKNOWN  # node execution state

-        node_id = config.get("id")
-        if not node_id:
-            raise ValueError("Node ID is required.")
+        node_id = config["id"]

        self._node_id = node_id
        self._node_execution_id: str = ""
        self._start_at = naive_utc_now()

-        raw_node_data = config.get("data") or {}
-        if not isinstance(raw_node_data, Mapping):
-            raise ValueError("Node config data must be a mapping.")
-
-        self._node_data: NodeDataT = self._hydrate_node_data(raw_node_data)
+        self._node_data = self.validate_node_data(config["data"])

        self.post_init()

+    @classmethod
+    def validate_node_data(cls, node_data: BaseNodeData) -> NodeDataT:
+        """Validate shared graph node payloads against the subclass-declared NodeData model."""
+        return cast(NodeDataT, cls._node_data_type.model_validate(node_data, from_attributes=True))
+
    def post_init(self) -> None:
        """Optional hook for subclasses requiring extra initialization."""
        return
@@ -342,9 +341,6 @@ class Node(Generic[NodeDataT]):
            return None
        return str(execution_id)

-    def _hydrate_node_data(self, data: Mapping[str, Any]) -> NodeDataT:
-        return cast(NodeDataT, self._node_data_type.model_validate(data))
-
    @abstractmethod
    def _run(self) -> NodeRunResult | Generator[NodeEventBase, None, None]:
        """
@@ -353,6 +349,10 @@ class Node(Generic[NodeDataT]):
        """
        raise NotImplementedError

+    def populate_start_event(self, event: NodeRunStartedEvent) -> None:
+        """Allow subclasses to enrich the started event without cross-node imports in the base class."""
+        _ = event
+
    def run(self) -> Generator[GraphNodeEventBase, None, None]:
        execution_id = self.ensure_execution_id()
        self._start_at = naive_utc_now()
@@ -366,41 +366,10 @@ class Node(Generic[NodeDataT]):
            in_iteration_id=None,
            start_at=self._start_at,
        )
-
-        # === FIXME(-LAN-): Needs to refactor.
-        from dify_graph.nodes.tool.tool_node import ToolNode
-
-        if isinstance(self, ToolNode):
-            start_event.provider_id = getattr(self.node_data, "provider_id", "")
-            start_event.provider_type = getattr(self.node_data, "provider_type", "")
-
-        from dify_graph.nodes.datasource.datasource_node import DatasourceNode
-
-        if isinstance(self, DatasourceNode):
-            plugin_id = getattr(self.node_data, "plugin_id", "")
-            provider_name = getattr(self.node_data, "provider_name", "")
-
-            start_event.provider_id = f"{plugin_id}/{provider_name}"
-            start_event.provider_type = getattr(self.node_data, "provider_type", "")
-
-        from dify_graph.nodes.trigger_plugin.trigger_event_node import TriggerEventNode
-
-        if isinstance(self, TriggerEventNode):
-            start_event.provider_id = getattr(self.node_data, "provider_id", "")
-            start_event.provider_type = getattr(self.node_data, "provider_type", "")
-
-        from typing import cast
-
-        from dify_graph.nodes.agent.agent_node import AgentNode
-        from dify_graph.nodes.agent.entities import AgentNodeData
-
-        if isinstance(self, AgentNode):
-            start_event.agent_strategy = AgentNodeStrategyInit(
-                name=cast(AgentNodeData, self.node_data).agent_strategy_name,
-                icon=self.agent_strategy_icon,
-            )
-
-        # ===
+        try:
+            self.populate_start_event(start_event)
+        except Exception:
+            logger.warning("Failed to populate start event for node %s", self._node_id, exc_info=True)
        yield start_event

        try:
@@ -442,7 +411,7 @@ class Node(Generic[NodeDataT]):
        cls,
        *,
        graph_config: Mapping[str, Any],
-        config: Mapping[str, Any],
+        config: NodeConfigDict,
    ) -> Mapping[str, Sequence[str]]:
        """Extracts references variable selectors from node configuration.

@@ -480,13 +449,12 @@ class Node(Generic[NodeDataT]):
        :param config: node config
        :return:
        """
-        node_id = config.get("id")
-        if not node_id:
-            raise ValueError("Node ID is required when extracting variable selector to variable mapping.")
-
-        # Pass raw dict data instead of creating NodeData instance
+        node_id = config["id"]
+        node_data = cls.validate_node_data(config["data"])
        data = cls._extract_variable_selector_to_variable_mapping(
-            graph_config=graph_config, node_id=node_id, node_data=config.get("data", {})
+            graph_config=graph_config,
+            node_id=node_id,
+            node_data=node_data,
        )
        return data

@@ -496,7 +464,7 @@ class Node(Generic[NodeDataT]):
        *,
        graph_config: Mapping[str, Any],
        node_id: str,
-        node_data: Mapping[str, Any],
+        node_data: NodeDataT,
    ) -> Mapping[str, Sequence[str]]:
        return {}

@@ -520,10 +488,8 @@ class Node(Generic[NodeDataT]):
    @abstractmethod
    def version(cls) -> str:
        """`node_version` returns the version of current node type."""
-        # NOTE(QuantumGhost): This should be in sync with `NODE_TYPE_CLASSES_MAPPING`.
-        #
-        # If you have introduced a new node type, please add it to `NODE_TYPE_CLASSES_MAPPING`
-        # in `api/dify_graph/nodes/__init__.py`.
+        # NOTE(QuantumGhost): Node versions must remain unique per `NodeType` so
+        # `Node.get_node_type_classes_mapping()` can resolve numeric versions and `latest`.
        raise NotImplementedError("subclasses of BaseNode must implement `version` method.")

    @classmethod
@@ -531,7 +497,9 @@ class Node(Generic[NodeDataT]):
        """Return mapping of NodeType -> {version -> Node subclass} using __init_subclass__ registry.

        Import all modules under dify_graph.nodes so subclasses register themselves on import.
-        Then we return a readonly view of the registry to avoid accidental mutation.
+        Callers that rely on workflow-local nodes defined outside `dify_graph.nodes` must import
+        those modules before invoking this method so they can register through `__init_subclass__`.
+        We then return a readonly view of the registry to avoid accidental mutation.
        """
        # Import all node modules to ensure they are loaded (thus registered)
        import dify_graph.nodes as _nodes_pkg
--- a/api/dify_graph/nodes/code/code_node.py
+++ b/api/dify_graph/nodes/code/code_node.py
@@ -3,6 +3,7 @@ from decimal import Decimal
 from textwrap import dedent
 from typing import TYPE_CHECKING, Any, Protocol, cast

+from dify_graph.entities.graph_config import NodeConfigDict
 from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus
 from dify_graph.node_events import NodeRunResult
 from dify_graph.nodes.base.node import Node
@@ -77,7 +78,7 @@ class CodeNode(Node[CodeNodeData]):
    def __init__(
        self,
        id: str,
-        config: Mapping[str, Any],
+        config: NodeConfigDict,
        graph_init_params: "GraphInitParams",
        graph_runtime_state: "GraphRuntimeState",
        *,
@@ -466,15 +467,12 @@ class CodeNode(Node[CodeNodeData]):
        *,
        graph_config: Mapping[str, Any],
        node_id: str,
-        node_data: Mapping[str, Any],
+        node_data: CodeNodeData,
    ) -> Mapping[str, Sequence[str]]:
        _ = graph_config  # Explicitly mark as unused
-        # Create typed NodeData from dict
-        typed_node_data = CodeNodeData.model_validate(node_data)
-
        return {
            node_id + "." + variable_selector.variable: variable_selector.value_selector
-            for variable_selector in typed_node_data.variables
+            for variable_selector in node_data.variables
        }

    @property
--- a/api/dify_graph/nodes/code/entities.py
+++ b/api/dify_graph/nodes/code/entities.py
@@ -3,7 +3,8 @@ from typing import Annotated, Literal

 from pydantic import AfterValidator, BaseModel

-from dify_graph.nodes.base import BaseNodeData
+from dify_graph.entities.base_node_data import BaseNodeData
+from dify_graph.enums import NodeType
 from dify_graph.nodes.base.entities import VariableSelector
 from dify_graph.variables.types import SegmentType

@@ -39,6 +40,8 @@ class CodeNodeData(BaseNodeData):
    Code Node Data.
    """

+    type: NodeType = NodeType.CODE
+
    class Output(BaseModel):
        type: Annotated[SegmentType, AfterValidator(_validate_type)]
        children: dict[str, "CodeNodeData.Output"] | None = None
--- a/api/dify_graph/nodes/datasource/datasource_node.py
+++ b/api/dify_graph/nodes/datasource/datasource_node.py
@@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Any

 from core.datasource.entities.datasource_entities import DatasourceProviderType
 from core.plugin.impl.exc import PluginDaemonClientSideError
+from dify_graph.entities.graph_config import NodeConfigDict
 from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus
 from dify_graph.enums import NodeExecutionType, NodeType, SystemVariableKey
 from dify_graph.node_events import NodeRunResult, StreamCompletedEvent
@@ -34,7 +35,7 @@ class DatasourceNode(Node[DatasourceNodeData]):
    def __init__(
        self,
        id: str,
-        config: Mapping[str, Any],
+        config: NodeConfigDict,
        graph_init_params: "GraphInitParams",
        graph_runtime_state: "GraphRuntimeState",
        datasource_manager: DatasourceManagerProtocol,
@@ -47,6 +48,10 @@ class DatasourceNode(Node[DatasourceNodeData]):
        )
        self.datasource_manager = datasource_manager

+    def populate_start_event(self, event) -> None:
+        event.provider_id = f"{self.node_data.plugin_id}/{self.node_data.provider_name}"
+        event.provider_type = self.node_data.provider_type
+
    def _run(self) -> Generator:
        """
        Run the datasource node
@@ -181,7 +186,7 @@ class DatasourceNode(Node[DatasourceNodeData]):
        *,
        graph_config: Mapping[str, Any],
        node_id: str,
-        node_data: Mapping[str, Any],
+        node_data: DatasourceNodeData,
    ) -> Mapping[str, Sequence[str]]:
        """
        Extract variable selector to variable mapping
@@ -190,11 +195,10 @@ class DatasourceNode(Node[DatasourceNodeData]):
        :param node_data: node data
        :return:
        """
-        typed_node_data = DatasourceNodeData.model_validate(node_data)
        result = {}
-        if typed_node_data.datasource_parameters:
-            for parameter_name in typed_node_data.datasource_parameters:
-                input = typed_node_data.datasource_parameters[parameter_name]
+        if node_data.datasource_parameters:
+            for parameter_name in node_data.datasource_parameters:
+                input = node_data.datasource_parameters[parameter_name]
                match input.type:
                    case "mixed":
                        assert isinstance(input.value, str)
--- a/api/dify_graph/nodes/datasource/entities.py
+++ b/api/dify_graph/nodes/datasource/entities.py
@@ -3,7 +3,8 @@ from typing import Any, Literal, Union
 from pydantic import BaseModel, field_validator
 from pydantic_core.core_schema import ValidationInfo

-from dify_graph.nodes.base.entities import BaseNodeData
+from dify_graph.entities.base_node_data import BaseNodeData
+from dify_graph.enums import NodeType


 class DatasourceEntity(BaseModel):
@@ -16,6 +17,8 @@ class DatasourceEntity(BaseModel):


 class DatasourceNodeData(BaseNodeData, DatasourceEntity):
+    type: NodeType = NodeType.DATASOURCE
+
    class DatasourceInput(BaseModel):
        # TODO: check this type
        value: Union[Any, list[str]]
--- a/api/dify_graph/nodes/document_extractor/entities.py
+++ b/api/dify_graph/nodes/document_extractor/entities.py
@@ -1,10 +1,12 @@
 from collections.abc import Sequence
 from dataclasses import dataclass

-from dify_graph.nodes.base import BaseNodeData
+from dify_graph.entities.base_node_data import BaseNodeData
+from dify_graph.enums import NodeType


 class DocumentExtractorNodeData(BaseNodeData):
+    type: NodeType = NodeType.DOCUMENT_EXTRACTOR
    variable_selector: Sequence[str]


--- a/api/dify_graph/nodes/document_extractor/node.py
+++ b/api/dify_graph/nodes/document_extractor/node.py
@@ -21,6 +21,7 @@ from docx.oxml.text.paragraph import CT_P
 from docx.table import Table
 from docx.text.paragraph import Paragraph

+from dify_graph.entities.graph_config import NodeConfigDict
 from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus
 from dify_graph.file import File, FileTransferMethod, file_manager
 from dify_graph.node_events import NodeRunResult
@@ -54,7 +55,7 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
    def __init__(
        self,
        id: str,
-        config: Mapping[str, Any],
+        config: NodeConfigDict,
        graph_init_params: "GraphInitParams",
        graph_runtime_state: "GraphRuntimeState",
        *,
@@ -136,12 +137,10 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
        *,
        graph_config: Mapping[str, Any],
        node_id: str,
-        node_data: Mapping[str, Any],
+        node_data: DocumentExtractorNodeData,
    ) -> Mapping[str, Sequence[str]]:
-        # Create typed NodeData from dict
-        typed_node_data = DocumentExtractorNodeData.model_validate(node_data)
-
-        return {node_id + ".files": typed_node_data.variable_selector}
+        _ = graph_config  # Explicitly mark as unused
+        return {node_id + ".files": node_data.variable_selector}


 def _extract_text_by_mime_type(
--- a/api/dify_graph/nodes/end/entities.py
+++ b/api/dify_graph/nodes/end/entities.py
@@ -1,6 +1,8 @@
 from pydantic import BaseModel, Field

-from dify_graph.nodes.base.entities import BaseNodeData, OutputVariableEntity
+from dify_graph.entities.base_node_data import BaseNodeData
+from dify_graph.enums import NodeType
+from dify_graph.nodes.base.entities import OutputVariableEntity


 class EndNodeData(BaseNodeData):
@@ -8,6 +10,7 @@ class EndNodeData(BaseNodeData):
    END Node Data.
    """

+    type: NodeType = NodeType.END
    outputs: list[OutputVariableEntity]


--- a/api/dify_graph/nodes/http_request/entities.py
+++ b/api/dify_graph/nodes/http_request/entities.py
@@ -8,7 +8,8 @@ import charset_normalizer
 import httpx
 from pydantic import BaseModel, Field, ValidationInfo, field_validator

-from dify_graph.nodes.base import BaseNodeData
+from dify_graph.entities.base_node_data import BaseNodeData
+from dify_graph.enums import NodeType

 HTTP_REQUEST_CONFIG_FILTER_KEY = "http_request_config"

@@ -89,6 +90,7 @@ class HttpRequestNodeData(BaseNodeData):
    Code Node Data.
    """

+    type: NodeType = NodeType.HTTP_REQUEST
    method: Literal[
        "get",
        "post",
--- a/api/dify_graph/nodes/http_request/node.py
+++ b/api/dify_graph/nodes/http_request/node.py
@@ -3,6 +3,7 @@ import mimetypes
 from collections.abc import Callable, Mapping, Sequence
 from typing import TYPE_CHECKING, Any

+from dify_graph.entities.graph_config import NodeConfigDict
 from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus
 from dify_graph.file import File, FileTransferMethod
 from dify_graph.node_events import NodeRunResult
@@ -37,7 +38,7 @@ class HttpRequestNode(Node[HttpRequestNodeData]):
    def __init__(
        self,
        id: str,
-        config: Mapping[str, Any],
+        config: NodeConfigDict,
        graph_init_params: "GraphInitParams",
        graph_runtime_state: "GraphRuntimeState",
        *,
@@ -163,18 +164,15 @@ class HttpRequestNode(Node[HttpRequestNodeData]):
        *,
        graph_config: Mapping[str, Any],
        node_id: str,
-        node_data: Mapping[str, Any],
+        node_data: HttpRequestNodeData,
    ) -> Mapping[str, Sequence[str]]:
-        # Create typed NodeData from dict
-        typed_node_data = HttpRequestNodeData.model_validate(node_data)
-
        selectors: list[VariableSelector] = []
-        selectors += variable_template_parser.extract_selectors_from_template(typed_node_data.url)
-        selectors += variable_template_parser.extract_selectors_from_template(typed_node_data.headers)
-        selectors += variable_template_parser.extract_selectors_from_template(typed_node_data.params)
-        if typed_node_data.body:
-            body_type = typed_node_data.body.type
-            data = typed_node_data.body.data
+        selectors += variable_template_parser.extract_selectors_from_template(node_data.url)
+        selectors += variable_template_parser.extract_selectors_from_template(node_data.headers)
+        selectors += variable_template_parser.extract_selectors_from_template(node_data.params)
+        if node_data.body:
+            body_type = node_data.body.type
+            data = node_data.body.data
            match body_type:
                case "none":
                    pass
--- a/api/dify_graph/nodes/human_input/entities.py
+++ b/api/dify_graph/nodes/human_input/entities.py
@@ -10,7 +10,8 @@ from typing import Annotated, Any, ClassVar, Literal, Self

 from pydantic import BaseModel, Field, field_validator, model_validator

-from dify_graph.nodes.base import BaseNodeData
+from dify_graph.entities.base_node_data import BaseNodeData
+from dify_graph.enums import NodeType
 from dify_graph.nodes.base.variable_template_parser import VariableTemplateParser
 from dify_graph.runtime import VariablePool
 from dify_graph.variables.consts import SELECTORS_LENGTH
@@ -71,8 +72,8 @@ class EmailDeliveryConfig(BaseModel):
    body: str
    debug_mode: bool = False

-    def with_debug_recipient(self, user_id: str) -> "EmailDeliveryConfig":
-        if not user_id:
+    def with_debug_recipient(self, user_id: str | None) -> "EmailDeliveryConfig":
+        if user_id is None:
            debug_recipients = EmailRecipients(whole_workspace=False, items=[])
            return self.model_copy(update={"recipients": debug_recipients})
        debug_recipients = EmailRecipients(whole_workspace=False, items=[MemberRecipient(user_id=user_id)])
@@ -140,7 +141,7 @@ def apply_debug_email_recipient(
    method: DeliveryChannelConfig,
    *,
    enabled: bool,
-    user_id: str,
+    user_id: str | None,
 ) -> DeliveryChannelConfig:
    if not enabled:
        return method
@@ -148,7 +149,7 @@ def apply_debug_email_recipient(
        return method
    if not method.config.debug_mode:
        return method
-    debug_config = method.config.with_debug_recipient(user_id or "")
+    debug_config = method.config.with_debug_recipient(user_id)
    return method.model_copy(update={"config": debug_config})


@@ -214,6 +215,7 @@ class UserAction(BaseModel):
 class HumanInputNodeData(BaseNodeData):
    """Human Input node data."""

+    type: NodeType = NodeType.HUMAN_INPUT
    delivery_methods: list[DeliveryChannelConfig] = Field(default_factory=list)
    form_content: str = ""
    inputs: list[FormInput] = Field(default_factory=list)
--- a/api/dify_graph/nodes/human_input/human_input_node.py
+++ b/api/dify_graph/nodes/human_input/human_input_node.py
@@ -3,6 +3,7 @@ import logging
 from collections.abc import Generator, Mapping, Sequence
 from typing import TYPE_CHECKING, Any

+from dify_graph.entities.graph_config import NodeConfigDict
 from dify_graph.entities.pause_reason import HumanInputRequired
 from dify_graph.enums import NodeExecutionType, NodeType, WorkflowNodeExecutionStatus
 from dify_graph.node_events import (
@@ -63,7 +64,7 @@ class HumanInputNode(Node[HumanInputNodeData]):
    def __init__(
        self,
        id: str,
-        config: Mapping[str, Any],
+        config: NodeConfigDict,
        graph_init_params: "GraphInitParams",
        graph_runtime_state: "GraphRuntimeState",
        form_repository: HumanInputFormRepository,
@@ -348,7 +349,7 @@ class HumanInputNode(Node[HumanInputNodeData]):
        *,
        graph_config: Mapping[str, Any],
        node_id: str,
-        node_data: Mapping[str, Any],
+        node_data: HumanInputNodeData,
    ) -> Mapping[str, Sequence[str]]:
        """
        Extract variable selectors referenced in form content and input default values.
@@ -357,5 +358,4 @@ class HumanInputNode(Node[HumanInputNodeData]):
        1. Variables referenced in form_content ({{#node_name.var_name#}})
        2. Variables referenced in input default values
        """
-        validated_node_data = HumanInputNodeData.model_validate(node_data)
-        return validated_node_data.extract_variable_selector_to_variable_mapping(node_id)
+        return node_data.extract_variable_selector_to_variable_mapping(node_id)
--- a/api/dify_graph/nodes/if_else/entities.py
+++ b/api/dify_graph/nodes/if_else/entities.py
@@ -2,7 +2,8 @@ from typing import Literal

 from pydantic import BaseModel, Field

-from dify_graph.nodes.base import BaseNodeData
+from dify_graph.entities.base_node_data import BaseNodeData
+from dify_graph.enums import NodeType
 from dify_graph.utils.condition.entities import Condition


@@ -11,6 +12,8 @@ class IfElseNodeData(BaseNodeData):
    If Else Node Data.
    """

+    type: NodeType = NodeType.IF_ELSE
+
    class Case(BaseModel):
        """
        Case entity representing a single logical condition group
--- a/api/dify_graph/nodes/if_else/if_else_node.py
+++ b/api/dify_graph/nodes/if_else/if_else_node.py
@@ -97,13 +97,11 @@ class IfElseNode(Node[IfElseNodeData]):
        *,
        graph_config: Mapping[str, Any],
        node_id: str,
-        node_data: Mapping[str, Any],
+        node_data: IfElseNodeData,
    ) -> Mapping[str, Sequence[str]]:
-        # Create typed NodeData from dict
-        typed_node_data = IfElseNodeData.model_validate(node_data)
-
        var_mapping: dict[str, list[str]] = {}
-        for case in typed_node_data.cases or []:
+        _ = graph_config  # Explicitly mark as unused
+        for case in node_data.cases or []:
            for condition in case.conditions:
                key = f"{node_id}.#{'.'.join(condition.variable_selector)}#"
                var_mapping[key] = condition.variable_selector
--- a/api/dify_graph/nodes/iteration/entities.py
+++ b/api/dify_graph/nodes/iteration/entities.py
@@ -3,7 +3,9 @@ from typing import Any

 from pydantic import Field

-from dify_graph.nodes.base import BaseIterationNodeData, BaseIterationState, BaseNodeData
+from dify_graph.entities.base_node_data import BaseNodeData
+from dify_graph.enums import NodeType
+from dify_graph.nodes.base import BaseIterationNodeData, BaseIterationState


 class ErrorHandleMode(StrEnum):
@@ -17,6 +19,7 @@ class IterationNodeData(BaseIterationNodeData):
    Iteration Node Data.
    """

+    type: NodeType = NodeType.ITERATION
    parent_loop_id: str | None = None  # redundant field, not used currently
    iterator_selector: list[str]  # variable selector
    output_selector: list[str]  # output selector
@@ -31,7 +34,7 @@ class IterationStartNodeData(BaseNodeData):
    Iteration Start Node Data.
    """

-    pass
+    type: NodeType = NodeType.ITERATION_START


 class IterationState(BaseIterationState):
--- a/api/dify_graph/nodes/iteration/iteration_node.py
+++ b/api/dify_graph/nodes/iteration/iteration_node.py
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, NewType, cast
 from typing_extensions import TypeIs

 from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID
+from dify_graph.entities.graph_config import NodeConfigDictAdapter
 from dify_graph.enums import (
    NodeExecutionType,
    NodeType,
@@ -460,21 +461,18 @@ class IterationNode(LLMUsageTrackingMixin, Node[IterationNodeData]):
        *,
        graph_config: Mapping[str, Any],
        node_id: str,
-        node_data: Mapping[str, Any],
+        node_data: IterationNodeData,
    ) -> Mapping[str, Sequence[str]]:
-        # Create typed NodeData from dict
-        typed_node_data = IterationNodeData.model_validate(node_data)
-
        variable_mapping: dict[str, Sequence[str]] = {
-            f"{node_id}.input_selector": typed_node_data.iterator_selector,
+            f"{node_id}.input_selector": node_data.iterator_selector,
        }
        iteration_node_ids = set()

        # Find all nodes that belong to this loop
        nodes = graph_config.get("nodes", [])
        for node in nodes:
-            node_data = node.get("data", {})
-            if node_data.get("iteration_id") == node_id:
+            node_config_data = node.get("data", {})
+            if node_config_data.get("iteration_id") == node_id:
                in_iteration_node_id = node.get("id")
                if in_iteration_node_id:
                    iteration_node_ids.add(in_iteration_node_id)
@@ -488,16 +486,18 @@ class IterationNode(LLMUsageTrackingMixin, Node[IterationNodeData]):
            # variable selector to variable mapping
            try:
                # Get node class
-                from dify_graph.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING
+                from dify_graph.nodes.node_mapping import get_node_type_classes_mapping

-                node_type = NodeType(sub_node_config.get("data", {}).get("type"))
-                if node_type not in NODE_TYPE_CLASSES_MAPPING:
+                typed_sub_node_config = NodeConfigDictAdapter.validate_python(sub_node_config)
+                node_type = typed_sub_node_config["data"].type
+                node_mapping = get_node_type_classes_mapping()
+                if node_type not in node_mapping:
                    continue
-                node_version = sub_node_config.get("data", {}).get("version", "1")
-                node_cls = NODE_TYPE_CLASSES_MAPPING[node_type][node_version]
+                node_version = str(typed_sub_node_config["data"].version)
+                node_cls = node_mapping[node_type][node_version]

                sub_node_variable_mapping = node_cls.extract_variable_selector_to_variable_mapping(
-                    graph_config=graph_config, config=sub_node_config
+                    graph_config=graph_config, config=typed_sub_node_config
                )
                sub_node_variable_mapping = cast(dict[str, Sequence[str]], sub_node_variable_mapping)
            except NotImplementedError:
--- a/api/dify_graph/nodes/knowledge_index/entities.py
+++ b/api/dify_graph/nodes/knowledge_index/entities.py
@@ -3,7 +3,8 @@ from typing import Literal, Union
 from pydantic import BaseModel

 from core.rag.retrieval.retrieval_methods import RetrievalMethod
-from dify_graph.nodes.base import BaseNodeData
+from dify_graph.entities.base_node_data import BaseNodeData
+from dify_graph.enums import NodeType


 class RerankingModelConfig(BaseModel):
@@ -155,7 +156,7 @@ class KnowledgeIndexNodeData(BaseNodeData):
    Knowledge index Node Data.
    """

-    type: str = "knowledge-index"
+    type: NodeType = NodeType.KNOWLEDGE_INDEX
    chunk_structure: str
    index_chunk_variable_selector: list[str]
    indexing_technique: str | None = None
--- a/api/dify_graph/nodes/knowledge_index/knowledge_index_node.py
+++ b/api/dify_graph/nodes/knowledge_index/knowledge_index_node.py
@@ -2,6 +2,7 @@ import logging
 from collections.abc import Mapping
 from typing import TYPE_CHECKING, Any

+from dify_graph.entities.graph_config import NodeConfigDict
 from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus
 from dify_graph.enums import NodeExecutionType, NodeType, SystemVariableKey
 from dify_graph.node_events import NodeRunResult
@@ -30,7 +31,7 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
    def __init__(
        self,
        id: str,
-        config: Mapping[str, Any],
+        config: NodeConfigDict,
        graph_init_params: "GraphInitParams",
        graph_runtime_state: "GraphRuntimeState",
        index_processor: IndexProcessorProtocol,
--- a/api/dify_graph/nodes/knowledge_retrieval/entities.py
+++ b/api/dify_graph/nodes/knowledge_retrieval/entities.py
@@ -3,7 +3,8 @@ from typing import Literal

 from pydantic import BaseModel, Field

-from dify_graph.nodes.base import BaseNodeData
+from dify_graph.entities.base_node_data import BaseNodeData
+from dify_graph.enums import NodeType
 from dify_graph.nodes.llm.entities import ModelConfig, VisionConfig


@@ -113,7 +114,7 @@ class KnowledgeRetrievalNodeData(BaseNodeData):
    Knowledge retrieval Node Data.
    """

-    type: str = "knowledge-retrieval"
+    type: NodeType = NodeType.KNOWLEDGE_RETRIEVAL
    query_variable_selector: list[str] | None | str = None
    query_attachment_selector: list[str] | None | str = None
    dataset_ids: list[str]
--- a/api/dify_graph/nodes/knowledge_retrieval/knowledge_retrieval_node.py
+++ b/api/dify_graph/nodes/knowledge_retrieval/knowledge_retrieval_node.py
@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Any, Literal

 from core.app.app_config.entities import DatasetRetrieveConfigEntity
 from dify_graph.entities import GraphInitParams
+from dify_graph.entities.graph_config import NodeConfigDict
 from dify_graph.enums import (
    NodeType,
    WorkflowNodeExecutionMetadataKey,
@@ -49,7 +50,7 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD
    def __init__(
        self,
        id: str,
-        config: Mapping[str, Any],
+        config: NodeConfigDict,
        graph_init_params: "GraphInitParams",
        graph_runtime_state: "GraphRuntimeState",
        rag_retrieval: RAGRetrievalProtocol,
@@ -301,15 +302,12 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD
        *,
        graph_config: Mapping[str, Any],
        node_id: str,
-        node_data: Mapping[str, Any],
+        node_data: KnowledgeRetrievalNodeData,
    ) -> Mapping[str, Sequence[str]]:
        # graph_config is not used in this node type
-        # Create typed NodeData from dict
-        typed_node_data = KnowledgeRetrievalNodeData.model_validate(node_data)
-
        variable_mapping = {}
-        if typed_node_data.query_variable_selector:
-            variable_mapping[node_id + ".query"] = typed_node_data.query_variable_selector
-        if typed_node_data.query_attachment_selector:
-            variable_mapping[node_id + ".queryAttachment"] = typed_node_data.query_attachment_selector
+        if node_data.query_variable_selector:
+            variable_mapping[node_id + ".query"] = node_data.query_variable_selector
+        if node_data.query_attachment_selector:
+            variable_mapping[node_id + ".queryAttachment"] = node_data.query_attachment_selector
        return variable_mapping
--- a/api/dify_graph/nodes/list_operator/entities.py
+++ b/api/dify_graph/nodes/list_operator/entities.py
@@ -3,7 +3,8 @@ from enum import StrEnum

 from pydantic import BaseModel, Field

-from dify_graph.nodes.base import BaseNodeData
+from dify_graph.entities.base_node_data import BaseNodeData
+from dify_graph.enums import NodeType


 class FilterOperator(StrEnum):
@@ -62,6 +63,7 @@ class ExtractConfig(BaseModel):


 class ListOperatorNodeData(BaseNodeData):
+    type: NodeType = NodeType.LIST_OPERATOR
    variable: Sequence[str] = Field(default_factory=list)
    filter_by: FilterBy
    order_by: OrderByConfig
--- a/api/dify_graph/nodes/llm/entities.py
+++ b/api/dify_graph/nodes/llm/entities.py
@@ -4,8 +4,9 @@ from typing import Any, Literal
 from pydantic import BaseModel, Field, field_validator

 from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate, MemoryConfig
+from dify_graph.entities.base_node_data import BaseNodeData
+from dify_graph.enums import NodeType
 from dify_graph.model_runtime.entities import ImagePromptMessageContent, LLMMode
-from dify_graph.nodes.base import BaseNodeData
 from dify_graph.nodes.base.entities import VariableSelector


@@ -59,6 +60,7 @@ class LLMNodeCompletionModelPromptTemplate(CompletionModelPromptTemplate):


 class LLMNodeData(BaseNodeData):
+    type: NodeType = NodeType.LLM
    model: ModelConfig
    prompt_template: Sequence[LLMNodeChatModelMessage] | LLMNodeCompletionModelPromptTemplate
    prompt_config: PromptConfig = Field(default_factory=PromptConfig)
--- a/api/dify_graph/nodes/llm/node.py
+++ b/api/dify_graph/nodes/llm/node.py
@@ -21,6 +21,7 @@ from core.rag.entities.citation_metadata import RetrievalSourceMetadata
 from core.tools.signature import sign_upload_file
 from dify_graph.constants import SYSTEM_VARIABLE_NODE_ID
 from dify_graph.entities import GraphInitParams
+from dify_graph.entities.graph_config import NodeConfigDict
 from dify_graph.enums import (
    NodeType,
    SystemVariableKey,
@@ -121,7 +122,7 @@ class LLMNode(Node[LLMNodeData]):
    def __init__(
        self,
        id: str,
-        config: Mapping[str, Any],
+        config: NodeConfigDict,
        graph_init_params: GraphInitParams,
        graph_runtime_state: GraphRuntimeState,
        *,
@@ -954,14 +955,11 @@ class LLMNode(Node[LLMNodeData]):
        *,
        graph_config: Mapping[str, Any],
        node_id: str,
-        node_data: Mapping[str, Any],
+        node_data: LLMNodeData,
    ) -> Mapping[str, Sequence[str]]:
        # graph_config is not used in this node type
        _ = graph_config  # Explicitly mark as unused
-        # Create typed NodeData from dict
-        typed_node_data = LLMNodeData.model_validate(node_data)
-
-        prompt_template = typed_node_data.prompt_template
+        prompt_template = node_data.prompt_template
        variable_selectors = []
        if isinstance(prompt_template, list):
            for prompt in prompt_template:
@@ -979,7 +977,7 @@ class LLMNode(Node[LLMNodeData]):
        for variable_selector in variable_selectors:
            variable_mapping[variable_selector.variable] = variable_selector.value_selector

-        memory = typed_node_data.memory
+        memory = node_data.memory
        if memory and memory.query_prompt_template:
            query_variable_selectors = VariableTemplateParser(
                template=memory.query_prompt_template
@@ -987,16 +985,16 @@ class LLMNode(Node[LLMNodeData]):
            for variable_selector in query_variable_selectors:
                variable_mapping[variable_selector.variable] = variable_selector.value_selector

-        if typed_node_data.context.enabled:
-            variable_mapping["#context#"] = typed_node_data.context.variable_selector
+        if node_data.context.enabled:
+            variable_mapping["#context#"] = node_data.context.variable_selector

-        if typed_node_data.vision.enabled:
-            variable_mapping["#files#"] = typed_node_data.vision.configs.variable_selector
+        if node_data.vision.enabled:
+            variable_mapping["#files#"] = node_data.vision.configs.variable_selector

-        if typed_node_data.memory:
+        if node_data.memory:
            variable_mapping["#sys.query#"] = ["sys", SystemVariableKey.QUERY]

-        if typed_node_data.prompt_config:
+        if node_data.prompt_config:
            enable_jinja = False

            if isinstance(prompt_template, LLMNodeCompletionModelPromptTemplate):
@@ -1009,7 +1007,7 @@ class LLMNode(Node[LLMNodeData]):
                        break

            if enable_jinja:
-                for variable_selector in typed_node_data.prompt_config.jinja2_variables or []:
+                for variable_selector in node_data.prompt_config.jinja2_variables or []:
                    variable_mapping[variable_selector.variable] = variable_selector.value_selector

        variable_mapping = {node_id + "." + key: value for key, value in variable_mapping.items()}
--- a/Show More
+++ b/Show More