Merge branch 'main' into refactor/documents

fix(web): enhance security for external links and improve pagination
fix: missing content if assistant message with tool_calls (#30083 )
2026-01-14 18:59:49 +00:00 · 2026-01-13 15:09:39 +08:00 · 2026-01-13 15:09:08 +08:00 · 2026-01-13 12:46:33 +08:00 · 2026-01-13 12:26:50 +08:00 · 2026-01-13 11:51:15 +08:00
176 changed files with 10732 additions and 6352 deletions
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -5,5 +5,18 @@
    "typescript-lsp@claude-plugins-official": true,
    "pyright-lsp@claude-plugins-official": true,
    "ralph-loop@claude-plugins-official": true
+  },
+  "hooks": {
+    "PreToolUse": [
+      {
+        "matcher": "Bash",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "npx -y block-no-verify@1.1.1"
+          }
+        ]
+      }
+    ]
  }
 }
--- a/.github/workflows/api-tests.yml
+++ b/.github/workflows/api-tests.yml
@@ -39,12 +39,6 @@ jobs:
      - name: Install dependencies
        run: uv sync --project api --dev

-      - name: Run pyrefly check
-        run: |
-          cd api
-          uv add --dev pyrefly
-          uv run pyrefly check || true
-
      - name: Run dify config tests
        run: uv run --project api dev/pytest/pytest_config_tests.py

--- a/.github/workflows/deploy-trigger-dev.yml
+++ b/.github/workflows/deploy-trigger-dev.yml
@@ -1,4 +1,4 @@
-name: Deploy Trigger Dev
+name: Deploy Agent Dev

 permissions:
  contents: read
@@ -7,7 +7,7 @@ on:
  workflow_run:
    workflows: ["Build and Push API & Web"]
    branches:
-      - "deploy/trigger-dev"
+      - "deploy/agent-dev"
    types:
      - completed

@@ -16,12 +16,12 @@ jobs:
    runs-on: ubuntu-latest
    if: |
      github.event.workflow_run.conclusion == 'success' &&
-      github.event.workflow_run.head_branch == 'deploy/trigger-dev'
+      github.event.workflow_run.head_branch == 'deploy/agent-dev'
    steps:
      - name: Deploy to server
        uses: appleboy/ssh-action@v0.1.8
        with:
-          host: ${{ secrets.TRIGGER_SSH_HOST }}
+          host: ${{ secrets.AGENT_DEV_SSH_HOST }}
          username: ${{ secrets.SSH_USER }}
          key: ${{ secrets.SSH_PRIVATE_KEY }}
          script: |
--- a/.github/workflows/deploy-hitl.yml
+++ b/.github/workflows/deploy-hitl.yml
@@ -0,0 +1,29 @@
+name: Deploy HITL
+
+on:
+  workflow_run:
+    workflows: ["Build and Push API & Web"]
+    branches:
+      - "feat/hitl-frontend"
+      - "feat/hitl-backend"
+    types:
+      - completed
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    if: |
+      github.event.workflow_run.conclusion == 'success' &&
+      (
+        github.event.workflow_run.head_branch == 'feat/hitl-frontend' ||
+        github.event.workflow_run.head_branch == 'feat/hitl-backend'
+      )
+    steps:
+      - name: Deploy to server
+        uses: appleboy/ssh-action@v0.1.8
+        with:
+          host: ${{ secrets.HITL_SSH_HOST }}
+          username: ${{ secrets.SSH_USER }}
+          key: ${{ secrets.SSH_PRIVATE_KEY }}
+          script: |
+            ${{ vars.SSH_SCRIPT || secrets.SSH_SCRIPT }}
--- a/.github/workflows/translate-i18n-claude.yml
+++ b/.github/workflows/translate-i18n-claude.yml
@@ -1,10 +1,12 @@
 name: Translate i18n Files with Claude Code

+# Note: claude-code-action doesn't support push events directly.
+# Push events are handled by trigger-i18n-sync.yml which sends repository_dispatch.
+# See: https://github.com/langgenius/dify/issues/30743
+
 on:
-  push:
-    branches: [main]
-    paths:
-      - 'web/i18n/en-US/*.json'
+  repository_dispatch:
+    types: [i18n-sync]
  workflow_dispatch:
    inputs:
      files:
@@ -87,26 +89,35 @@ jobs:
                echo "DIFF_AVAILABLE=false" >> $GITHUB_OUTPUT
              fi
            fi
-          else
-            # Push trigger - detect changed files from the push
-            BEFORE_SHA="${{ github.event.before }}"
-            # Handle edge case: first push or force push may have null/zero SHA
-            if [ -z "$BEFORE_SHA" ] || [ "$BEFORE_SHA" = "0000000000000000000000000000000000000000" ]; then
-              # Fallback to comparing with parent commit
-              BEFORE_SHA="HEAD~1"
+          elif [ "${{ github.event_name }}" == "repository_dispatch" ]; then
+            # Triggered by push via trigger-i18n-sync.yml workflow
+            # Validate required payload fields
+            if [ -z "${{ github.event.client_payload.changed_files }}" ]; then
+              echo "Error: repository_dispatch payload missing required 'changed_files' field" >&2
+              exit 1
            fi
-            changed=$(git diff --name-only "$BEFORE_SHA" ${{ github.sha }} -- 'web/i18n/en-US/*.json' 2>/dev/null | xargs -n1 basename 2>/dev/null | sed 's/.json$//' | tr '\n' ' ' || echo "")
-            echo "CHANGED_FILES=$changed" >> $GITHUB_OUTPUT
+            echo "CHANGED_FILES=${{ github.event.client_payload.changed_files }}" >> $GITHUB_OUTPUT
            echo "TARGET_LANGS=" >> $GITHUB_OUTPUT
-            echo "SYNC_MODE=incremental" >> $GITHUB_OUTPUT
+            echo "SYNC_MODE=${{ github.event.client_payload.sync_mode || 'incremental' }}" >> $GITHUB_OUTPUT

-            # Generate detailed diff for the push
-            git diff "$BEFORE_SHA"..${{ github.sha }} -- 'web/i18n/en-US/*.json' > /tmp/i18n-diff.txt 2>/dev/null || echo "" > /tmp/i18n-diff.txt
-            if [ -s /tmp/i18n-diff.txt ]; then
-              echo "DIFF_AVAILABLE=true" >> $GITHUB_OUTPUT
+            # Decode the base64-encoded diff from the trigger workflow
+            if [ -n "${{ github.event.client_payload.diff_base64 }}" ]; then
+              if ! echo "${{ github.event.client_payload.diff_base64 }}" | base64 -d > /tmp/i18n-diff.txt 2>&1; then
+                echo "Warning: Failed to decode base64 diff payload" >&2
+                echo "" > /tmp/i18n-diff.txt
+                echo "DIFF_AVAILABLE=false" >> $GITHUB_OUTPUT
+              elif [ -s /tmp/i18n-diff.txt ]; then
+                echo "DIFF_AVAILABLE=true" >> $GITHUB_OUTPUT
+              else
+                echo "DIFF_AVAILABLE=false" >> $GITHUB_OUTPUT
+              fi
            else
+              echo "" > /tmp/i18n-diff.txt
              echo "DIFF_AVAILABLE=false" >> $GITHUB_OUTPUT
            fi
+          else
+            echo "Unsupported event type: ${{ github.event_name }}"
+            exit 1
          fi

          # Truncate diff if too large (keep first 50KB)
--- a/.github/workflows/trigger-i18n-sync.yml
+++ b/.github/workflows/trigger-i18n-sync.yml
@@ -0,0 +1,66 @@
+name: Trigger i18n Sync on Push
+
+# This workflow bridges the push event to repository_dispatch
+# because claude-code-action doesn't support push events directly.
+# See: https://github.com/langgenius/dify/issues/30743
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'web/i18n/en-US/*.json'
+
+permissions:
+  contents: write
+
+jobs:
+  trigger:
+    if: github.repository == 'langgenius/dify'
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Detect changed files and generate diff
+        id: detect
+        run: |
+          BEFORE_SHA="${{ github.event.before }}"
+          # Handle edge case: force push may have null/zero SHA
+          if [ -z "$BEFORE_SHA" ] || [ "$BEFORE_SHA" = "0000000000000000000000000000000000000000" ]; then
+            BEFORE_SHA="HEAD~1"
+          fi
+
+          # Detect changed i18n files
+          changed=$(git diff --name-only "$BEFORE_SHA" "${{ github.sha }}" -- 'web/i18n/en-US/*.json' 2>/dev/null | xargs -n1 basename 2>/dev/null | sed 's/.json$//' | tr '\n' ' ' || echo "")
+          echo "changed_files=$changed" >> $GITHUB_OUTPUT
+
+          # Generate diff for context
+          git diff "$BEFORE_SHA" "${{ github.sha }}" -- 'web/i18n/en-US/*.json' > /tmp/i18n-diff.txt 2>/dev/null || echo "" > /tmp/i18n-diff.txt
+
+          # Truncate if too large (keep first 50KB to match receiving workflow)
+          head -c 50000 /tmp/i18n-diff.txt > /tmp/i18n-diff-truncated.txt
+          mv /tmp/i18n-diff-truncated.txt /tmp/i18n-diff.txt
+
+          # Base64 encode the diff for safe JSON transport (portable, single-line)
+          diff_base64=$(base64 < /tmp/i18n-diff.txt | tr -d '\n')
+          echo "diff_base64=$diff_base64" >> $GITHUB_OUTPUT
+
+          if [ -n "$changed" ]; then
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+            echo "Detected changed files: $changed"
+          else
+            echo "has_changes=false" >> $GITHUB_OUTPUT
+            echo "No i18n changes detected"
+          fi
+
+      - name: Trigger i18n sync workflow
+        if: steps.detect.outputs.has_changes == 'true'
+        uses: peter-evans/repository-dispatch@v3
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          event-type: i18n-sync
+          client-payload: '{"changed_files": "${{ steps.detect.outputs.changed_files }}", "diff_base64": "${{ steps.detect.outputs.diff_base64 }}", "sync_mode": "incremental", "trigger_sha": "${{ github.sha }}"}'
--- a/.nvmrc
+++ b/.nvmrc
@@ -1 +0,0 @@
-22.11.0
--- a/api/.env.example
+++ b/api/.env.example
@@ -589,6 +589,7 @@ ENABLE_CLEAN_UNUSED_DATASETS_TASK=false
 ENABLE_CREATE_TIDB_SERVERLESS_TASK=false
 ENABLE_UPDATE_TIDB_SERVERLESS_STATUS_TASK=false
 ENABLE_CLEAN_MESSAGES=false
+ENABLE_WORKFLOW_RUN_CLEANUP_TASK=false
 ENABLE_MAIL_CLEAN_DOCUMENT_NOTIFY_TASK=false
 ENABLE_DATASETS_QUEUE_MONITOR=false
 ENABLE_CHECK_UPGRADABLE_PLUGIN_TASK=true
--- a/api/commands.py
+++ b/api/commands.py
@@ -1,4 +1,5 @@
 import base64
+import datetime
 import json
 import logging
 import secrets
@@ -45,6 +46,7 @@ from services.clear_free_plan_tenant_expired_logs import ClearFreePlanTenantExpi
 from services.plugin.data_migration import PluginDataMigration
 from services.plugin.plugin_migration import PluginMigration
 from services.plugin.plugin_service import PluginService
+from services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs import WorkflowRunCleanup
 from tasks.remove_app_and_related_data_task import delete_draft_variables_batch

 logger = logging.getLogger(__name__)
@@ -852,6 +854,61 @@ def clear_free_plan_tenant_expired_logs(days: int, batch: int, tenant_ids: list[
    click.echo(click.style("Clear free plan tenant expired logs completed.", fg="green"))


+@click.command("clean-workflow-runs", help="Clean expired workflow runs and related data for free tenants.")
+@click.option("--days", default=30, show_default=True, help="Delete workflow runs created before N days ago.")
+@click.option("--batch-size", default=200, show_default=True, help="Batch size for selecting workflow runs.")
+@click.option(
+    "--start-from",
+    type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
+    default=None,
+    help="Optional lower bound (inclusive) for created_at; must be paired with --end-before.",
+)
+@click.option(
+    "--end-before",
+    type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
+    default=None,
+    help="Optional upper bound (exclusive) for created_at; must be paired with --start-from.",
+)
+@click.option(
+    "--dry-run",
+    is_flag=True,
+    help="Preview cleanup results without deleting any workflow run data.",
+)
+def clean_workflow_runs(
+    days: int,
+    batch_size: int,
+    start_from: datetime.datetime | None,
+    end_before: datetime.datetime | None,
+    dry_run: bool,
+):
+    """
+    Clean workflow runs and related workflow data for free tenants.
+    """
+    if (start_from is None) ^ (end_before is None):
+        raise click.UsageError("--start-from and --end-before must be provided together.")
+
+    start_time = datetime.datetime.now(datetime.UTC)
+    click.echo(click.style(f"Starting workflow run cleanup at {start_time.isoformat()}.", fg="white"))
+
+    WorkflowRunCleanup(
+        days=days,
+        batch_size=batch_size,
+        start_from=start_from,
+        end_before=end_before,
+        dry_run=dry_run,
+    ).run()
+
+    end_time = datetime.datetime.now(datetime.UTC)
+    elapsed = end_time - start_time
+    click.echo(
+        click.style(
+            f"Workflow run cleanup completed. start={start_time.isoformat()} "
+            f"end={end_time.isoformat()} duration={elapsed}",
+            fg="green",
+        )
+    )
+
+
@click.option("-f", "--force", is_flag=True, help="Skip user confirmation and force the command to execute.")
@click.command("clear-orphaned-file-records", help="Clear orphaned file records.")
 def clear_orphaned_file_records(force: bool):
--- a/api/configs/feature/init.py
+++ b/api/configs/feature/init.py
@@ -1101,6 +1101,10 @@ class CeleryScheduleTasksConfig(BaseSettings):
        description="Enable clean messages task",
        default=False,
    )
+    ENABLE_WORKFLOW_RUN_CLEANUP_TASK: bool = Field(
+        description="Enable scheduled workflow run cleanup task",
+        default=False,
+    )
    ENABLE_MAIL_CLEAN_DOCUMENT_NOTIFY_TASK: bool = Field(
        description="Enable mail clean document notify task",
        default=False,
--- a/api/core/agent/base_agent_runner.py
+++ b/api/core/agent/base_agent_runner.py
@@ -1,6 +1,7 @@
 import json
 import logging
 import uuid
+from decimal import Decimal
 from typing import Union, cast

 from sqlalchemy import select
@@ -41,6 +42,7 @@ from core.tools.tool_manager import ToolManager
 from core.tools.utils.dataset_retriever_tool import DatasetRetrieverTool
 from extensions.ext_database import db
 from factories import file_factory
+from models.enums import CreatorUserRole
 from models.model import Conversation, Message, MessageAgentThought, MessageFile

 logger = logging.getLogger(__name__)
@@ -289,6 +291,7 @@ class BaseAgentRunner(AppRunner):
        thought = MessageAgentThought(
            message_id=message_id,
            message_chain_id=None,
+            tool_process_data=None,
            thought="",
            tool=tool_name,
            tool_labels_str="{}",
@@ -296,20 +299,20 @@ class BaseAgentRunner(AppRunner):
            tool_input=tool_input,
            message=message,
            message_token=0,
-            message_unit_price=0,
-            message_price_unit=0,
+            message_unit_price=Decimal(0),
+            message_price_unit=Decimal("0.001"),
            message_files=json.dumps(messages_ids) if messages_ids else "",
            answer="",
            observation="",
            answer_token=0,
-            answer_unit_price=0,
-            answer_price_unit=0,
+            answer_unit_price=Decimal(0),
+            answer_price_unit=Decimal("0.001"),
            tokens=0,
-            total_price=0,
+            total_price=Decimal(0),
            position=self.agent_thought_count + 1,
            currency="USD",
            latency=0,
-            created_by_role="account",
+            created_by_role=CreatorUserRole.ACCOUNT,
            created_by=self.user_id,
        )

@@ -342,7 +345,8 @@ class BaseAgentRunner(AppRunner):
            raise ValueError("agent thought not found")

        if thought:
-            agent_thought.thought += thought
+            existing_thought = agent_thought.thought or ""
+            agent_thought.thought = f"{existing_thought}{thought}"

        if tool_name:
            agent_thought.tool = tool_name
@@ -440,21 +444,30 @@ class BaseAgentRunner(AppRunner):
            agent_thoughts: list[MessageAgentThought] = message.agent_thoughts
            if agent_thoughts:
                for agent_thought in agent_thoughts:
-                    tools = agent_thought.tool
-                    if tools:
-                        tools = tools.split(";")
+                    tool_names_raw = agent_thought.tool
+                    if tool_names_raw:
+                        tool_names = tool_names_raw.split(";")
                        tool_calls: list[AssistantPromptMessage.ToolCall] = []
                        tool_call_response: list[ToolPromptMessage] = []
-                        try:
-                            tool_inputs = json.loads(agent_thought.tool_input)
-                        except Exception:
-                            tool_inputs = {tool: {} for tool in tools}
-                        try:
-                            tool_responses = json.loads(agent_thought.observation)
-                        except Exception:
-                            tool_responses = dict.fromkeys(tools, agent_thought.observation)
+                        tool_input_payload = agent_thought.tool_input
+                        if tool_input_payload:
+                            try:
+                                tool_inputs = json.loads(tool_input_payload)
+                            except Exception:
+                                tool_inputs = {tool: {} for tool in tool_names}
+                        else:
+                            tool_inputs = {tool: {} for tool in tool_names}

-                        for tool in tools:
+                        observation_payload = agent_thought.observation
+                        if observation_payload:
+                            try:
+                                tool_responses = json.loads(observation_payload)
+                            except Exception:
+                                tool_responses = dict.fromkeys(tool_names, observation_payload)
+                        else:
+                            tool_responses = dict.fromkeys(tool_names, observation_payload)
+
+                        for tool in tool_names:
                            # generate a uuid for tool call
                            tool_call_id = str(uuid.uuid4())
                            tool_calls.append(
@@ -484,7 +497,7 @@ class BaseAgentRunner(AppRunner):
                                *tool_call_response,
                            ]
                        )
-                    if not tools:
+                    if not tool_names_raw:
                        result.append(AssistantPromptMessage(content=agent_thought.thought))
            else:
                if message.answer:
--- a/api/core/agent/fc_agent_runner.py
+++ b/api/core/agent/fc_agent_runner.py
@@ -188,7 +188,7 @@ class FunctionCallAgentRunner(BaseAgentRunner):
                    ),
                )

-            assistant_message = AssistantPromptMessage(content="", tool_calls=[])
+            assistant_message = AssistantPromptMessage(content=response, tool_calls=[])
            if tool_calls:
                assistant_message.tool_calls = [
                    AssistantPromptMessage.ToolCall(
@@ -200,8 +200,6 @@ class FunctionCallAgentRunner(BaseAgentRunner):
                    )
                    for tool_call in tool_calls
                ]
-            else:
-                assistant_message.content = response

            self._current_thoughts.append(assistant_message)

--- a/api/core/app/app_config/entities.py
+++ b/api/core/app/app_config/entities.py
@@ -1,4 +1,3 @@
-import json
 from collections.abc import Sequence
 from enum import StrEnum, auto
 from typing import Any, Literal
@@ -121,7 +120,7 @@ class VariableEntity(BaseModel):
    allowed_file_types: Sequence[FileType] | None = Field(default_factory=list)
    allowed_file_extensions: Sequence[str] | None = Field(default_factory=list)
    allowed_file_upload_methods: Sequence[FileTransferMethod] | None = Field(default_factory=list)
-    json_schema: str | None = Field(default=None)
+    json_schema: dict | None = Field(default=None)

    @field_validator("description", mode="before")
    @classmethod
@@ -135,17 +134,11 @@ class VariableEntity(BaseModel):

    @field_validator("json_schema")
    @classmethod
-    def validate_json_schema(cls, schema: str | None) -> str | None:
+    def validate_json_schema(cls, schema: dict | None) -> dict | None:
        if schema is None:
            return None
-
        try:
-            json_schema = json.loads(schema)
-        except json.JSONDecodeError:
-            raise ValueError(f"invalid json_schema value {schema}")
-
-        try:
-            Draft7Validator.check_schema(json_schema)
+            Draft7Validator.check_schema(schema)
        except SchemaError as e:
            raise ValueError(f"Invalid JSON schema: {e.message}")
        return schema
--- a/api/core/app/apps/advanced_chat/app_config_manager.py
+++ b/api/core/app/apps/advanced_chat/app_config_manager.py
@@ -26,7 +26,6 @@ class AdvancedChatAppConfigManager(BaseAppConfigManager):
    @classmethod
    def get_app_config(cls, app_model: App, workflow: Workflow) -> AdvancedChatAppConfig:
        features_dict = workflow.features_dict
-
        app_mode = AppMode.value_of(app_model.mode)
        app_config = AdvancedChatAppConfig(
            tenant_id=app_model.tenant_id,
--- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py
+++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py
@@ -358,25 +358,6 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):
        if node_finish_resp:
            yield node_finish_resp

-        # For ANSWER nodes, check if we need to send a message_replace event
-        # Only send if the final output differs from the accumulated task_state.answer
-        # This happens when variables were updated by variable_assigner during workflow execution
-        if event.node_type == NodeType.ANSWER and event.outputs:
-            final_answer = event.outputs.get("answer")
-            if final_answer is not None and final_answer != self._task_state.answer:
-                logger.info(
-                    "ANSWER node final output '%s' differs from accumulated answer '%s', sending message_replace event",
-                    final_answer,
-                    self._task_state.answer,
-                )
-                # Update the task state answer
-                self._task_state.answer = str(final_answer)
-                # Send message_replace event to update the UI
-                yield self._message_cycle_manager.message_replace_to_stream_response(
-                    answer=str(final_answer),
-                    reason="variable_update",
-                )
-
    def _handle_node_failed_events(
        self,
        event: Union[QueueNodeFailedEvent, QueueNodeExceptionEvent],
--- a/api/core/app/apps/base_app_generator.py
+++ b/api/core/app/apps/base_app_generator.py
@@ -1,4 +1,3 @@
-import json
 from collections.abc import Generator, Mapping, Sequence
 from typing import TYPE_CHECKING, Any, Union, final

@@ -76,12 +75,24 @@ class BaseAppGenerator:
        user_inputs = {**user_inputs, **files_inputs, **file_list_inputs}

        # Check if all files are converted to File
-        if any(filter(lambda v: isinstance(v, dict), user_inputs.values())):
-            raise ValueError("Invalid input type")
-        if any(
-            filter(lambda v: isinstance(v, dict), filter(lambda item: isinstance(item, list), user_inputs.values()))
-        ):
-            raise ValueError("Invalid input type")
+        invalid_dict_keys = [
+            k
+            for k, v in user_inputs.items()
+            if isinstance(v, dict)
+            and entity_dictionary[k].type not in {VariableEntityType.FILE, VariableEntityType.JSON_OBJECT}
+        ]
+        if invalid_dict_keys:
+            raise ValueError(f"Invalid input type for {invalid_dict_keys}")
+
+        invalid_list_dict_keys = [
+            k
+            for k, v in user_inputs.items()
+            if isinstance(v, list)
+            and any(isinstance(item, dict) for item in v)
+            and entity_dictionary[k].type != VariableEntityType.FILE_LIST
+        ]
+        if invalid_list_dict_keys:
+            raise ValueError(f"Invalid input type for {invalid_list_dict_keys}")

        return user_inputs

@@ -178,12 +189,8 @@ class BaseAppGenerator:
                    elif value == 0:
                        value = False
            case VariableEntityType.JSON_OBJECT:
-                if not isinstance(value, str):
-                    raise ValueError(f"{variable_entity.variable} in input form must be a string")
-                try:
-                    json.loads(value)
-                except json.JSONDecodeError:
-                    raise ValueError(f"{variable_entity.variable} in input form must be a valid JSON object")
+                if not isinstance(value, dict):
+                    raise ValueError(f"{variable_entity.variable} in input form must be a dict")
            case _:
                raise AssertionError("this statement should be unreachable.")

--- a/api/core/model_runtime/entities/message_entities.py
+++ b/api/core/model_runtime/entities/message_entities.py
@@ -251,10 +251,7 @@ class AssistantPromptMessage(PromptMessage):

        :return: True if prompt message is empty, False otherwise
        """
-        if not super().is_empty() and not self.tool_calls:
-            return False
-
-        return True
+        return super().is_empty() and not self.tool_calls


 class SystemPromptMessage(PromptMessage):
--- a/api/core/ops/aliyun_trace/aliyun_trace.py
+++ b/api/core/ops/aliyun_trace/aliyun_trace.py
@@ -1,6 +1,7 @@
 import logging
 from collections.abc import Sequence

+from opentelemetry.trace import SpanKind
 from sqlalchemy.orm import sessionmaker

 from core.ops.aliyun_trace.data_exporter.traceclient import (
@@ -151,6 +152,7 @@ class AliyunDataTrace(BaseTraceInstance):
            ),
            status=status,
            links=trace_metadata.links,
+            span_kind=SpanKind.SERVER,
        )
        self.trace_client.add_span(message_span)

@@ -456,6 +458,7 @@ class AliyunDataTrace(BaseTraceInstance):
                ),
                status=status,
                links=trace_metadata.links,
+                span_kind=SpanKind.SERVER,
            )
            self.trace_client.add_span(message_span)

@@ -475,6 +478,7 @@ class AliyunDataTrace(BaseTraceInstance):
            ),
            status=status,
            links=trace_metadata.links,
+            span_kind=SpanKind.SERVER if message_span_id is None else SpanKind.INTERNAL,
        )
        self.trace_client.add_span(workflow_span)

--- a/api/core/ops/aliyun_trace/data_exporter/traceclient.py
+++ b/api/core/ops/aliyun_trace/data_exporter/traceclient.py
@@ -166,7 +166,7 @@ class SpanBuilder:
            attributes=span_data.attributes,
            events=span_data.events,
            links=span_data.links,
-            kind=trace_api.SpanKind.INTERNAL,
+            kind=span_data.span_kind,
            status=span_data.status,
            start_time=span_data.start_time,
            end_time=span_data.end_time,
--- a/api/core/ops/aliyun_trace/entities/aliyun_trace_entity.py
+++ b/api/core/ops/aliyun_trace/entities/aliyun_trace_entity.py
@@ -4,7 +4,7 @@ from typing import Any

 from opentelemetry import trace as trace_api
 from opentelemetry.sdk.trace import Event
-from opentelemetry.trace import Status, StatusCode
+from opentelemetry.trace import SpanKind, Status, StatusCode
 from pydantic import BaseModel, Field


@@ -34,3 +34,4 @@ class SpanData(BaseModel):
    status: Status = Field(default=Status(StatusCode.UNSET), description="The status of the span.")
    start_time: int | None = Field(..., description="The start time of the span in nanoseconds.")
    end_time: int | None = Field(..., description="The end time of the span in nanoseconds.")
+    span_kind: SpanKind = Field(default=SpanKind.INTERNAL, description="The OpenTelemetry SpanKind for this span.")
--- a/api/core/workflow/enums.py
+++ b/api/core/workflow/enums.py
@@ -211,6 +211,10 @@ class WorkflowExecutionStatus(StrEnum):
    def is_ended(self) -> bool:
        return self in _END_STATE

+    @classmethod
+    def ended_values(cls) -> list[str]:
+        return [status.value for status in _END_STATE]
+

 _END_STATE = frozenset(
    [
--- a/api/core/workflow/nodes/start/start_node.py
+++ b/api/core/workflow/nodes/start/start_node.py
@@ -1,4 +1,3 @@
-import json
 from typing import Any

 from jsonschema import Draft7Validator, ValidationError
@@ -43,25 +42,22 @@ class StartNode(Node[StartNodeData]):
            if value is None and variable.required:
                raise ValueError(f"{key} is required in input form")

+            # If no value provided, skip further processing for this key
+            if not value:
+                continue
+
+            if not isinstance(value, dict):
+                raise ValueError(f"JSON object for '{key}' must be an object")
+
+            # Overwrite with normalized dict to ensure downstream consistency
+            node_inputs[key] = value
+
+            # If schema exists, then validate against it
            schema = variable.json_schema
            if not schema:
                continue

-            if not value:
-                continue
-
            try:
-                json_schema = json.loads(schema)
-            except json.JSONDecodeError as e:
-                raise ValueError(f"{schema} must be a valid JSON object")
-
-            try:
-                json_value = json.loads(value)
-            except json.JSONDecodeError as e:
-                raise ValueError(f"{value} must be a valid JSON object")
-
-            try:
-                Draft7Validator(json_schema).validate(json_value)
+                Draft7Validator(schema).validate(value)
            except ValidationError as e:
                raise ValueError(f"JSON object for '{key}' does not match schema: {e.message}")
-            node_inputs[key] = json_value
--- a/api/core/workflow/nodes/variable_assigner/v1/node.py
+++ b/api/core/workflow/nodes/variable_assigner/v1/node.py
@@ -33,6 +33,15 @@ class VariableAssignerNode(Node[VariableAssignerData]):
            graph_runtime_state=graph_runtime_state,
        )

+    def blocks_variable_output(self, variable_selectors: set[tuple[str, ...]]) -> bool:
+        """
+        Check if this Variable Assigner node blocks the output of specific variables.
+
+        Returns True if this node updates any of the requested conversation variables.
+        """
+        assigned_selector = tuple(self.node_data.assigned_variable_selector)
+        return assigned_selector in variable_selectors
+
    @classmethod
    def version(cls) -> str:
        return "1"
--- a/api/core/workflow/workflow_entry.py
+++ b/api/core/workflow/workflow_entry.py
@@ -19,6 +19,7 @@ from core.workflow.graph_engine.protocols.command_channel import CommandChannel
 from core.workflow.graph_events import GraphEngineEvent, GraphNodeEventBase, GraphRunFailedEvent
 from core.workflow.nodes import NodeType
 from core.workflow.nodes.base.node import Node
+from core.workflow.nodes.node_factory import DifyNodeFactory
 from core.workflow.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING
 from core.workflow.runtime import GraphRuntimeState, VariablePool
 from core.workflow.system_variable import SystemVariable
@@ -136,13 +137,11 @@ class WorkflowEntry:
        :param user_inputs: user inputs
        :return:
        """
-        node_config = workflow.get_node_config_by_id(node_id)
+        node_config = dict(workflow.get_node_config_by_id(node_id))
        node_config_data = node_config.get("data", {})

-        # Get node class
+        # Get node type
        node_type = NodeType(node_config_data.get("type"))
-        node_version = node_config_data.get("version", "1")
-        node_cls = NODE_TYPE_CLASSES_MAPPING[node_type][node_version]

        # init graph init params and runtime state
        graph_init_params = GraphInitParams(
@@ -158,12 +157,12 @@ class WorkflowEntry:
        graph_runtime_state = GraphRuntimeState(variable_pool=variable_pool, start_at=time.perf_counter())

        # init workflow run state
-        node = node_cls(
-            id=str(uuid.uuid4()),
-            config=node_config,
+        node_factory = DifyNodeFactory(
            graph_init_params=graph_init_params,
            graph_runtime_state=graph_runtime_state,
        )
+        node = node_factory.create_node(node_config)
+        node_cls = type(node)

        try:
            # variable selector to variable mapping
--- a/api/extensions/ext_celery.py
+++ b/api/extensions/ext_celery.py
@@ -163,6 +163,13 @@ def init_app(app: DifyApp) -> Celery:
            "task": "schedule.clean_workflow_runlogs_precise.clean_workflow_runlogs_precise",
            "schedule": crontab(minute="0", hour="2"),
        }
+    if dify_config.ENABLE_WORKFLOW_RUN_CLEANUP_TASK:
+        # for saas only
+        imports.append("schedule.clean_workflow_runs_task")
+        beat_schedule["clean_workflow_runs_task"] = {
+            "task": "schedule.clean_workflow_runs_task.clean_workflow_runs_task",
+            "schedule": crontab(minute="0", hour="0"),
+        }
    if dify_config.ENABLE_WORKFLOW_SCHEDULE_POLLER_TASK:
        imports.append("schedule.workflow_schedule_task")
        beat_schedule["workflow_schedule_task"] = {
--- a/api/extensions/ext_commands.py
+++ b/api/extensions/ext_commands.py
@@ -4,6 +4,7 @@ from dify_app import DifyApp
 def init_app(app: DifyApp):
    from commands import (
        add_qdrant_index,
+        clean_workflow_runs,
        cleanup_orphaned_draft_variables,
        clear_free_plan_tenant_expired_logs,
        clear_orphaned_file_records,
@@ -56,6 +57,7 @@ def init_app(app: DifyApp):
        setup_datasource_oauth_client,
        transform_datasource_credentials,
        install_rag_pipeline_plugins,
+        clean_workflow_runs,
    ]
    for cmd in cmds_to_register:
        app.cli.add_command(cmd)
--- a/api/migrations/versions/2026_01_09_1630-905527cc8fd3_.py
+++ b/api/migrations/versions/2026_01_09_1630-905527cc8fd3_.py
@@ -0,0 +1,30 @@
+"""add workflow_run_created_at_id_idx
+
+Revision ID: 905527cc8fd3
+Revises: 7df29de0f6be
+Create Date: 2025-01-09 16:30:02.462084
+
+"""
+from alembic import op
+import models as models
+
+# revision identifiers, used by Alembic.
+revision = '905527cc8fd3'
+down_revision = '7df29de0f6be'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('workflow_runs', schema=None) as batch_op:
+        batch_op.create_index('workflow_run_created_at_id_idx', ['created_at', 'id'], unique=False)
+
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('workflow_runs', schema=None) as batch_op:
+        batch_op.drop_index('workflow_run_created_at_id_idx')
+    # ### end Alembic commands ###
--- a/api/models/model.py
+++ b/api/models/model.py
@@ -1843,7 +1843,7 @@ class MessageChain(TypeBase):
    )


-class MessageAgentThought(Base):
+class MessageAgentThought(TypeBase):
    __tablename__ = "message_agent_thoughts"
    __table_args__ = (
        sa.PrimaryKeyConstraint("id", name="message_agent_thought_pkey"),
@@ -1851,34 +1851,42 @@ class MessageAgentThought(Base):
        sa.Index("message_agent_thought_message_chain_id_idx", "message_chain_id"),
    )

-    id = mapped_column(StringUUID, default=lambda: str(uuid4()))
-    message_id = mapped_column(StringUUID, nullable=False)
-    message_chain_id = mapped_column(StringUUID, nullable=True)
+    id: Mapped[str] = mapped_column(
+        StringUUID, insert_default=lambda: str(uuid4()), default_factory=lambda: str(uuid4()), init=False
+    )
+    message_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
    position: Mapped[int] = mapped_column(sa.Integer, nullable=False)
-    thought = mapped_column(LongText, nullable=True)
-    tool = mapped_column(LongText, nullable=True)
-    tool_labels_str = mapped_column(LongText, nullable=False, default=sa.text("'{}'"))
-    tool_meta_str = mapped_column(LongText, nullable=False, default=sa.text("'{}'"))
-    tool_input = mapped_column(LongText, nullable=True)
-    observation = mapped_column(LongText, nullable=True)
+    created_by_role: Mapped[str] = mapped_column(String(255), nullable=False)
+    created_by: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    message_chain_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None)
+    thought: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
+    tool: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
+    tool_labels_str: Mapped[str] = mapped_column(LongText, nullable=False, default=sa.text("'{}'"))
+    tool_meta_str: Mapped[str] = mapped_column(LongText, nullable=False, default=sa.text("'{}'"))
+    tool_input: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
+    observation: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
    # plugin_id = mapped_column(StringUUID, nullable=True)  ## for future design
-    tool_process_data = mapped_column(LongText, nullable=True)
-    message = mapped_column(LongText, nullable=True)
-    message_token: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
-    message_unit_price = mapped_column(sa.Numeric, nullable=True)
-    message_price_unit = mapped_column(sa.Numeric(10, 7), nullable=False, server_default=sa.text("0.001"))
-    message_files = mapped_column(LongText, nullable=True)
-    answer = mapped_column(LongText, nullable=True)
-    answer_token: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
-    answer_unit_price = mapped_column(sa.Numeric, nullable=True)
-    answer_price_unit = mapped_column(sa.Numeric(10, 7), nullable=False, server_default=sa.text("0.001"))
-    tokens: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
-    total_price = mapped_column(sa.Numeric, nullable=True)
-    currency = mapped_column(String(255), nullable=True)
-    latency: Mapped[float | None] = mapped_column(sa.Float, nullable=True)
-    created_by_role = mapped_column(String(255), nullable=False)
-    created_by = mapped_column(StringUUID, nullable=False)
-    created_at = mapped_column(sa.DateTime, nullable=False, server_default=sa.func.current_timestamp())
+    tool_process_data: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
+    message: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
+    message_token: Mapped[int | None] = mapped_column(sa.Integer, nullable=True, default=None)
+    message_unit_price: Mapped[Decimal | None] = mapped_column(sa.Numeric, nullable=True, default=None)
+    message_price_unit: Mapped[Decimal] = mapped_column(
+        sa.Numeric(10, 7), nullable=False, default=Decimal("0.001"), server_default=sa.text("0.001")
+    )
+    message_files: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
+    answer: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
+    answer_token: Mapped[int | None] = mapped_column(sa.Integer, nullable=True, default=None)
+    answer_unit_price: Mapped[Decimal | None] = mapped_column(sa.Numeric, nullable=True, default=None)
+    answer_price_unit: Mapped[Decimal] = mapped_column(
+        sa.Numeric(10, 7), nullable=False, default=Decimal("0.001"), server_default=sa.text("0.001")
+    )
+    tokens: Mapped[int | None] = mapped_column(sa.Integer, nullable=True, default=None)
+    total_price: Mapped[Decimal | None] = mapped_column(sa.Numeric, nullable=True, default=None)
+    currency: Mapped[str | None] = mapped_column(String(255), nullable=True, default=None)
+    latency: Mapped[float | None] = mapped_column(sa.Float, nullable=True, default=None)
+    created_at: Mapped[datetime] = mapped_column(
+        sa.DateTime, nullable=False, init=False, server_default=sa.func.current_timestamp()
+    )

    @property
    def files(self) -> list[Any]:
--- a/api/models/workflow.py
+++ b/api/models/workflow.py
@@ -597,6 +597,7 @@ class WorkflowRun(Base):
    __table_args__ = (
        sa.PrimaryKeyConstraint("id", name="workflow_run_pkey"),
        sa.Index("workflow_run_triggerd_from_idx", "tenant_id", "app_id", "triggered_from"),
+        sa.Index("workflow_run_created_at_id_idx", "created_at", "id"),
    )

    id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()))
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -189,7 +189,7 @@ storage = [
    "opendal~=0.46.0",
    "oss2==2.18.5",
    "supabase~=2.18.1",
-    "tos~=2.7.1",
+    "tos~=2.9.0",
 ]

 ############################################################
--- a/api/repositories/api_workflow_run_repository.py
+++ b/api/repositories/api_workflow_run_repository.py
@@ -34,11 +34,14 @@ Example:
    ```
 """

-from collections.abc import Sequence
+from collections.abc import Callable, Sequence
 from datetime import datetime
 from typing import Protocol

+from sqlalchemy.orm import Session
+
 from core.workflow.entities.pause_reason import PauseReason
+from core.workflow.enums import WorkflowType
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
 from libs.infinite_scroll_pagination import InfiniteScrollPagination
 from models.enums import WorkflowRunTriggeredFrom
@@ -253,6 +256,44 @@ class APIWorkflowRunRepository(WorkflowExecutionRepository, Protocol):
        """
        ...

+    def get_runs_batch_by_time_range(
+        self,
+        start_from: datetime | None,
+        end_before: datetime,
+        last_seen: tuple[datetime, str] | None,
+        batch_size: int,
+        run_types: Sequence[WorkflowType] | None = None,
+        tenant_ids: Sequence[str] | None = None,
+    ) -> Sequence[WorkflowRun]:
+        """
+        Fetch ended workflow runs in a time window for archival and clean batching.
+        """
+        ...
+
+    def delete_runs_with_related(
+        self,
+        runs: Sequence[WorkflowRun],
+        delete_node_executions: Callable[[Session, Sequence[WorkflowRun]], tuple[int, int]] | None = None,
+        delete_trigger_logs: Callable[[Session, Sequence[str]], int] | None = None,
+    ) -> dict[str, int]:
+        """
+        Delete workflow runs and their related records (node executions, offloads, app logs,
+        trigger logs, pauses, pause reasons).
+        """
+        ...
+
+    def count_runs_with_related(
+        self,
+        runs: Sequence[WorkflowRun],
+        count_node_executions: Callable[[Session, Sequence[WorkflowRun]], tuple[int, int]] | None = None,
+        count_trigger_logs: Callable[[Session, Sequence[str]], int] | None = None,
+    ) -> dict[str, int]:
+        """
+        Count workflow runs and their related records (node executions, offloads, app logs,
+        trigger logs, pauses, pause reasons) without deleting data.
+        """
+        ...
+
    def create_workflow_pause(
        self,
        workflow_run_id: str,
--- a/api/repositories/sqlalchemy_api_workflow_node_execution_repository.py
+++ b/api/repositories/sqlalchemy_api_workflow_node_execution_repository.py
@@ -7,13 +7,18 @@ using SQLAlchemy 2.0 style queries for WorkflowNodeExecutionModel operations.

 from collections.abc import Sequence
 from datetime import datetime
-from typing import cast
+from typing import TypedDict, cast

-from sqlalchemy import asc, delete, desc, select
+from sqlalchemy import asc, delete, desc, func, select, tuple_
 from sqlalchemy.engine import CursorResult
 from sqlalchemy.orm import Session, sessionmaker

-from models.workflow import WorkflowNodeExecutionModel
+from models.enums import WorkflowRunTriggeredFrom
+from models.workflow import (
+    WorkflowNodeExecutionModel,
+    WorkflowNodeExecutionOffload,
+    WorkflowNodeExecutionTriggeredFrom,
+)
 from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository


@@ -44,6 +49,26 @@ class DifyAPISQLAlchemyWorkflowNodeExecutionRepository(DifyAPIWorkflowNodeExecut
        """
        self._session_maker = session_maker

+    @staticmethod
+    def _map_run_triggered_from_to_node_triggered_from(triggered_from: str) -> str:
+        """
+        Map workflow run triggered_from values to workflow node execution triggered_from values.
+        """
+        if triggered_from in {
+            WorkflowRunTriggeredFrom.APP_RUN.value,
+            WorkflowRunTriggeredFrom.DEBUGGING.value,
+            WorkflowRunTriggeredFrom.SCHEDULE.value,
+            WorkflowRunTriggeredFrom.PLUGIN.value,
+            WorkflowRunTriggeredFrom.WEBHOOK.value,
+        }:
+            return WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value
+        if triggered_from in {
+            WorkflowRunTriggeredFrom.RAG_PIPELINE_RUN.value,
+            WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING.value,
+        }:
+            return WorkflowNodeExecutionTriggeredFrom.RAG_PIPELINE_RUN.value
+        return ""
+
    def get_node_last_execution(
        self,
        tenant_id: str,
@@ -290,3 +315,119 @@ class DifyAPISQLAlchemyWorkflowNodeExecutionRepository(DifyAPIWorkflowNodeExecut
            result = cast(CursorResult, session.execute(stmt))
            session.commit()
            return result.rowcount
+
+    class RunContext(TypedDict):
+        run_id: str
+        tenant_id: str
+        app_id: str
+        workflow_id: str
+        triggered_from: str
+
+    @staticmethod
+    def delete_by_runs(session: Session, runs: Sequence[RunContext]) -> tuple[int, int]:
+        """
+        Delete node executions (and offloads) for the given workflow runs using indexed columns.
+
+        Uses the composite index on (tenant_id, app_id, workflow_id, triggered_from, workflow_run_id)
+        by filtering on those columns with tuple IN.
+        """
+        if not runs:
+            return 0, 0
+
+        tuple_values = [
+            (
+                run["tenant_id"],
+                run["app_id"],
+                run["workflow_id"],
+                DifyAPISQLAlchemyWorkflowNodeExecutionRepository._map_run_triggered_from_to_node_triggered_from(
+                    run["triggered_from"]
+                ),
+                run["run_id"],
+            )
+            for run in runs
+        ]
+
+        node_execution_ids = session.scalars(
+            select(WorkflowNodeExecutionModel.id).where(
+                tuple_(
+                    WorkflowNodeExecutionModel.tenant_id,
+                    WorkflowNodeExecutionModel.app_id,
+                    WorkflowNodeExecutionModel.workflow_id,
+                    WorkflowNodeExecutionModel.triggered_from,
+                    WorkflowNodeExecutionModel.workflow_run_id,
+                ).in_(tuple_values)
+            )
+        ).all()
+
+        if not node_execution_ids:
+            return 0, 0
+
+        offloads_deleted = (
+            cast(
+                CursorResult,
+                session.execute(
+                    delete(WorkflowNodeExecutionOffload).where(
+                        WorkflowNodeExecutionOffload.node_execution_id.in_(node_execution_ids)
+                    )
+                ),
+            ).rowcount
+            or 0
+        )
+
+        node_executions_deleted = (
+            cast(
+                CursorResult,
+                session.execute(
+                    delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(node_execution_ids))
+                ),
+            ).rowcount
+            or 0
+        )
+
+        return node_executions_deleted, offloads_deleted
+
+    @staticmethod
+    def count_by_runs(session: Session, runs: Sequence[RunContext]) -> tuple[int, int]:
+        """
+        Count node executions (and offloads) for the given workflow runs using indexed columns.
+        """
+        if not runs:
+            return 0, 0
+
+        tuple_values = [
+            (
+                run["tenant_id"],
+                run["app_id"],
+                run["workflow_id"],
+                DifyAPISQLAlchemyWorkflowNodeExecutionRepository._map_run_triggered_from_to_node_triggered_from(
+                    run["triggered_from"]
+                ),
+                run["run_id"],
+            )
+            for run in runs
+        ]
+        tuple_filter = tuple_(
+            WorkflowNodeExecutionModel.tenant_id,
+            WorkflowNodeExecutionModel.app_id,
+            WorkflowNodeExecutionModel.workflow_id,
+            WorkflowNodeExecutionModel.triggered_from,
+            WorkflowNodeExecutionModel.workflow_run_id,
+        ).in_(tuple_values)
+
+        node_executions_count = (
+            session.scalar(select(func.count()).select_from(WorkflowNodeExecutionModel).where(tuple_filter)) or 0
+        )
+        offloads_count = (
+            session.scalar(
+                select(func.count())
+                .select_from(WorkflowNodeExecutionOffload)
+                .join(
+                    WorkflowNodeExecutionModel,
+                    WorkflowNodeExecutionOffload.node_execution_id == WorkflowNodeExecutionModel.id,
+                )
+                .where(tuple_filter)
+            )
+            or 0
+        )
+
+        return int(node_executions_count), int(offloads_count)
--- a/api/repositories/sqlalchemy_api_workflow_run_repository.py
+++ b/api/repositories/sqlalchemy_api_workflow_run_repository.py
@@ -21,7 +21,7 @@ Implementation Notes:

 import logging
 import uuid
-from collections.abc import Sequence
+from collections.abc import Callable, Sequence
 from datetime import datetime
 from decimal import Decimal
 from typing import Any, cast
@@ -32,7 +32,7 @@ from sqlalchemy.engine import CursorResult
 from sqlalchemy.orm import Session, selectinload, sessionmaker

 from core.workflow.entities.pause_reason import HumanInputRequired, PauseReason, SchedulingPause
-from core.workflow.enums import WorkflowExecutionStatus
+from core.workflow.enums import WorkflowExecutionStatus, WorkflowType
 from extensions.ext_storage import storage
 from libs.datetime_utils import naive_utc_now
 from libs.helper import convert_datetime_to_date
@@ -40,8 +40,14 @@ from libs.infinite_scroll_pagination import InfiniteScrollPagination
 from libs.time_parser import get_time_threshold
 from libs.uuid_utils import uuidv7
 from models.enums import WorkflowRunTriggeredFrom
-from models.workflow import WorkflowPause as WorkflowPauseModel
-from models.workflow import WorkflowPauseReason, WorkflowRun
+from models.workflow import (
+    WorkflowAppLog,
+    WorkflowPauseReason,
+    WorkflowRun,
+)
+from models.workflow import (
+    WorkflowPause as WorkflowPauseModel,
+)
 from repositories.api_workflow_run_repository import APIWorkflowRunRepository
 from repositories.entities.workflow_pause import WorkflowPauseEntity
 from repositories.types import (
@@ -314,6 +320,171 @@ class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository):
        logger.info("Total deleted %s workflow runs for app %s", total_deleted, app_id)
        return total_deleted

+    def get_runs_batch_by_time_range(
+        self,
+        start_from: datetime | None,
+        end_before: datetime,
+        last_seen: tuple[datetime, str] | None,
+        batch_size: int,
+        run_types: Sequence[WorkflowType] | None = None,
+        tenant_ids: Sequence[str] | None = None,
+    ) -> Sequence[WorkflowRun]:
+        """
+        Fetch ended workflow runs in a time window for archival and clean batching.
+
+        Query scope:
+        - created_at in [start_from, end_before)
+        - type in run_types (when provided)
+        - status is an ended state
+        - optional tenant_id filter and cursor (last_seen) for pagination
+        """
+        with self._session_maker() as session:
+            stmt = (
+                select(WorkflowRun)
+                .where(
+                    WorkflowRun.created_at < end_before,
+                    WorkflowRun.status.in_(WorkflowExecutionStatus.ended_values()),
+                )
+                .order_by(WorkflowRun.created_at.asc(), WorkflowRun.id.asc())
+                .limit(batch_size)
+            )
+            if run_types is not None:
+                if not run_types:
+                    return []
+                stmt = stmt.where(WorkflowRun.type.in_(run_types))
+
+            if start_from:
+                stmt = stmt.where(WorkflowRun.created_at >= start_from)
+
+            if tenant_ids:
+                stmt = stmt.where(WorkflowRun.tenant_id.in_(tenant_ids))
+
+            if last_seen:
+                stmt = stmt.where(
+                    or_(
+                        WorkflowRun.created_at > last_seen[0],
+                        and_(WorkflowRun.created_at == last_seen[0], WorkflowRun.id > last_seen[1]),
+                    )
+                )
+
+            return session.scalars(stmt).all()
+
+    def delete_runs_with_related(
+        self,
+        runs: Sequence[WorkflowRun],
+        delete_node_executions: Callable[[Session, Sequence[WorkflowRun]], tuple[int, int]] | None = None,
+        delete_trigger_logs: Callable[[Session, Sequence[str]], int] | None = None,
+    ) -> dict[str, int]:
+        if not runs:
+            return {
+                "runs": 0,
+                "node_executions": 0,
+                "offloads": 0,
+                "app_logs": 0,
+                "trigger_logs": 0,
+                "pauses": 0,
+                "pause_reasons": 0,
+            }
+
+        with self._session_maker() as session:
+            run_ids = [run.id for run in runs]
+            if delete_node_executions:
+                node_executions_deleted, offloads_deleted = delete_node_executions(session, runs)
+            else:
+                node_executions_deleted, offloads_deleted = 0, 0
+
+            app_logs_result = session.execute(delete(WorkflowAppLog).where(WorkflowAppLog.workflow_run_id.in_(run_ids)))
+            app_logs_deleted = cast(CursorResult, app_logs_result).rowcount or 0
+
+            pause_ids = session.scalars(
+                select(WorkflowPauseModel.id).where(WorkflowPauseModel.workflow_run_id.in_(run_ids))
+            ).all()
+            pause_reasons_deleted = 0
+            pauses_deleted = 0
+
+            if pause_ids:
+                pause_reasons_result = session.execute(
+                    delete(WorkflowPauseReason).where(WorkflowPauseReason.pause_id.in_(pause_ids))
+                )
+                pause_reasons_deleted = cast(CursorResult, pause_reasons_result).rowcount or 0
+                pauses_result = session.execute(delete(WorkflowPauseModel).where(WorkflowPauseModel.id.in_(pause_ids)))
+                pauses_deleted = cast(CursorResult, pauses_result).rowcount or 0
+
+            trigger_logs_deleted = delete_trigger_logs(session, run_ids) if delete_trigger_logs else 0
+
+            runs_result = session.execute(delete(WorkflowRun).where(WorkflowRun.id.in_(run_ids)))
+            runs_deleted = cast(CursorResult, runs_result).rowcount or 0
+
+            session.commit()
+
+            return {
+                "runs": runs_deleted,
+                "node_executions": node_executions_deleted,
+                "offloads": offloads_deleted,
+                "app_logs": app_logs_deleted,
+                "trigger_logs": trigger_logs_deleted,
+                "pauses": pauses_deleted,
+                "pause_reasons": pause_reasons_deleted,
+            }
+
+    def count_runs_with_related(
+        self,
+        runs: Sequence[WorkflowRun],
+        count_node_executions: Callable[[Session, Sequence[WorkflowRun]], tuple[int, int]] | None = None,
+        count_trigger_logs: Callable[[Session, Sequence[str]], int] | None = None,
+    ) -> dict[str, int]:
+        if not runs:
+            return {
+                "runs": 0,
+                "node_executions": 0,
+                "offloads": 0,
+                "app_logs": 0,
+                "trigger_logs": 0,
+                "pauses": 0,
+                "pause_reasons": 0,
+            }
+
+        with self._session_maker() as session:
+            run_ids = [run.id for run in runs]
+            if count_node_executions:
+                node_executions_count, offloads_count = count_node_executions(session, runs)
+            else:
+                node_executions_count, offloads_count = 0, 0
+
+            app_logs_count = (
+                session.scalar(
+                    select(func.count()).select_from(WorkflowAppLog).where(WorkflowAppLog.workflow_run_id.in_(run_ids))
+                )
+                or 0
+            )
+
+            pause_ids = session.scalars(
+                select(WorkflowPauseModel.id).where(WorkflowPauseModel.workflow_run_id.in_(run_ids))
+            ).all()
+            pauses_count = len(pause_ids)
+            pause_reasons_count = 0
+            if pause_ids:
+                pause_reasons_count = (
+                    session.scalar(
+                        select(func.count())
+                        .select_from(WorkflowPauseReason)
+                        .where(WorkflowPauseReason.pause_id.in_(pause_ids))
+                    )
+                    or 0
+                )
+
+            trigger_logs_count = count_trigger_logs(session, run_ids) if count_trigger_logs else 0
+
+            return {
+                "runs": len(runs),
+                "node_executions": node_executions_count,
+                "offloads": offloads_count,
+                "app_logs": int(app_logs_count),
+                "trigger_logs": trigger_logs_count,
+                "pauses": pauses_count,
+                "pause_reasons": int(pause_reasons_count),
+            }
+
    def create_workflow_pause(
        self,
        workflow_run_id: str,
--- a/api/repositories/sqlalchemy_workflow_trigger_log_repository.py
+++ b/api/repositories/sqlalchemy_workflow_trigger_log_repository.py
@@ -4,8 +4,10 @@ SQLAlchemy implementation of WorkflowTriggerLogRepository.

 from collections.abc import Sequence
 from datetime import UTC, datetime, timedelta
+from typing import cast

-from sqlalchemy import and_, select
+from sqlalchemy import and_, delete, func, select
+from sqlalchemy.engine import CursorResult
 from sqlalchemy.orm import Session

 from models.enums import WorkflowTriggerStatus
@@ -84,3 +86,37 @@ class SQLAlchemyWorkflowTriggerLogRepository(WorkflowTriggerLogRepository):
        )

        return list(self.session.scalars(query).all())
+
+    def delete_by_run_ids(self, run_ids: Sequence[str]) -> int:
+        """
+        Delete trigger logs associated with the given workflow run ids.
+
+        Args:
+            run_ids: Collection of workflow run identifiers.
+
+        Returns:
+            Number of rows deleted.
+        """
+        if not run_ids:
+            return 0
+
+        result = self.session.execute(delete(WorkflowTriggerLog).where(WorkflowTriggerLog.workflow_run_id.in_(run_ids)))
+        return cast(CursorResult, result).rowcount or 0
+
+    def count_by_run_ids(self, run_ids: Sequence[str]) -> int:
+        """
+        Count trigger logs associated with the given workflow run ids.
+
+        Args:
+            run_ids: Collection of workflow run identifiers.
+
+        Returns:
+            Number of rows matched.
+        """
+        if not run_ids:
+            return 0
+
+        count = self.session.scalar(
+            select(func.count()).select_from(WorkflowTriggerLog).where(WorkflowTriggerLog.workflow_run_id.in_(run_ids))
+        )
+        return int(count or 0)
--- a/api/repositories/workflow_trigger_log_repository.py
+++ b/api/repositories/workflow_trigger_log_repository.py
@@ -109,3 +109,15 @@ class WorkflowTriggerLogRepository(Protocol):
            A sequence of recent WorkflowTriggerLog instances
        """
        ...
+
+    def delete_by_run_ids(self, run_ids: Sequence[str]) -> int:
+        """
+        Delete trigger logs for workflow run IDs.
+
+        Args:
+            run_ids: Workflow run IDs to delete
+
+        Returns:
+            Number of rows deleted
+        """
+        ...
--- a/api/schedule/clean_workflow_runs_task.py
+++ b/api/schedule/clean_workflow_runs_task.py
@@ -0,0 +1,43 @@
+from datetime import UTC, datetime
+
+import click
+
+import app
+from configs import dify_config
+from services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs import WorkflowRunCleanup
+
+
+@app.celery.task(queue="retention")
+def clean_workflow_runs_task() -> None:
+    """
+    Scheduled cleanup for workflow runs and related records (sandbox tenants only).
+    """
+    click.echo(
+        click.style(
+            (
+                "Scheduled workflow run cleanup starting: "
+                f"cutoff={dify_config.SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS} days, "
+                f"batch={dify_config.SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE}"
+            ),
+            fg="green",
+        )
+    )
+
+    start_time = datetime.now(UTC)
+
+    WorkflowRunCleanup(
+        days=dify_config.SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS,
+        batch_size=dify_config.SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE,
+        start_from=None,
+        end_before=None,
+    ).run()
+
+    end_time = datetime.now(UTC)
+    elapsed = end_time - start_time
+    click.echo(
+        click.style(
+            f"Scheduled workflow run cleanup finished. start={start_time.isoformat()} "
+            f"end={end_time.isoformat()} duration={elapsed}",
+            fg="green",
+        )
+    )
--- a/api/services/retention/init.py
+++ b/api/services/retention/init.py
--- a/api/services/retention/workflow_run/init.py
+++ b/api/services/retention/workflow_run/init.py
--- a/api/services/retention/workflow_run/clear_free_plan_expired_workflow_run_logs.py
+++ b/api/services/retention/workflow_run/clear_free_plan_expired_workflow_run_logs.py
@@ -0,0 +1,301 @@
+import datetime
+import logging
+from collections.abc import Iterable, Sequence
+
+import click
+from sqlalchemy.orm import Session, sessionmaker
+
+from configs import dify_config
+from enums.cloud_plan import CloudPlan
+from extensions.ext_database import db
+from models.workflow import WorkflowRun
+from repositories.api_workflow_run_repository import APIWorkflowRunRepository
+from repositories.sqlalchemy_api_workflow_node_execution_repository import (
+    DifyAPISQLAlchemyWorkflowNodeExecutionRepository,
+)
+from repositories.sqlalchemy_workflow_trigger_log_repository import SQLAlchemyWorkflowTriggerLogRepository
+from services.billing_service import BillingService, SubscriptionPlan
+
+logger = logging.getLogger(__name__)
+
+
+class WorkflowRunCleanup:
+    def __init__(
+        self,
+        days: int,
+        batch_size: int,
+        start_from: datetime.datetime | None = None,
+        end_before: datetime.datetime | None = None,
+        workflow_run_repo: APIWorkflowRunRepository | None = None,
+        dry_run: bool = False,
+    ):
+        if (start_from is None) ^ (end_before is None):
+            raise ValueError("start_from and end_before must be both set or both omitted.")
+
+        computed_cutoff = datetime.datetime.now() - datetime.timedelta(days=days)
+        self.window_start = start_from
+        self.window_end = end_before or computed_cutoff
+
+        if self.window_start and self.window_end <= self.window_start:
+            raise ValueError("end_before must be greater than start_from.")
+
+        if batch_size <= 0:
+            raise ValueError("batch_size must be greater than 0.")
+
+        self.batch_size = batch_size
+        self._cleanup_whitelist: set[str] | None = None
+        self.dry_run = dry_run
+        self.free_plan_grace_period_days = dify_config.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD
+        self.workflow_run_repo: APIWorkflowRunRepository
+        if workflow_run_repo:
+            self.workflow_run_repo = workflow_run_repo
+        else:
+            # Lazy import to avoid circular dependencies during module import
+            from repositories.factory import DifyAPIRepositoryFactory
+
+            session_maker = sessionmaker(bind=db.engine, expire_on_commit=False)
+            self.workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker)
+
+    def run(self) -> None:
+        click.echo(
+            click.style(
+                f"{'Inspecting' if self.dry_run else 'Cleaning'} workflow runs "
+                f"{'between ' + self.window_start.isoformat() + ' and ' if self.window_start else 'before '}"
+                f"{self.window_end.isoformat()} (batch={self.batch_size})",
+                fg="white",
+            )
+        )
+        if self.dry_run:
+            click.echo(click.style("Dry run mode enabled. No data will be deleted.", fg="yellow"))
+
+        total_runs_deleted = 0
+        total_runs_targeted = 0
+        related_totals = self._empty_related_counts() if self.dry_run else None
+        batch_index = 0
+        last_seen: tuple[datetime.datetime, str] | None = None
+
+        while True:
+            run_rows = self.workflow_run_repo.get_runs_batch_by_time_range(
+                start_from=self.window_start,
+                end_before=self.window_end,
+                last_seen=last_seen,
+                batch_size=self.batch_size,
+            )
+            if not run_rows:
+                break
+
+            batch_index += 1
+            last_seen = (run_rows[-1].created_at, run_rows[-1].id)
+            tenant_ids = {row.tenant_id for row in run_rows}
+            free_tenants = self._filter_free_tenants(tenant_ids)
+            free_runs = [row for row in run_rows if row.tenant_id in free_tenants]
+            paid_or_skipped = len(run_rows) - len(free_runs)
+
+            if not free_runs:
+                click.echo(
+                    click.style(
+                        f"[batch #{batch_index}] skipped (no sandbox runs in batch, {paid_or_skipped} paid/unknown)",
+                        fg="yellow",
+                    )
+                )
+                continue
+
+            total_runs_targeted += len(free_runs)
+
+            if self.dry_run:
+                batch_counts = self.workflow_run_repo.count_runs_with_related(
+                    free_runs,
+                    count_node_executions=self._count_node_executions,
+                    count_trigger_logs=self._count_trigger_logs,
+                )
+                if related_totals is not None:
+                    for key in related_totals:
+                        related_totals[key] += batch_counts.get(key, 0)
+                sample_ids = ", ".join(run.id for run in free_runs[:5])
+                click.echo(
+                    click.style(
+                        f"[batch #{batch_index}] would delete {len(free_runs)} runs "
+                        f"(sample ids: {sample_ids}) and skip {paid_or_skipped} paid/unknown",
+                        fg="yellow",
+                    )
+                )
+                continue
+
+            try:
+                counts = self.workflow_run_repo.delete_runs_with_related(
+                    free_runs,
+                    delete_node_executions=self._delete_node_executions,
+                    delete_trigger_logs=self._delete_trigger_logs,
+                )
+            except Exception:
+                logger.exception("Failed to delete workflow runs batch ending at %s", last_seen[0])
+                raise
+
+            total_runs_deleted += counts["runs"]
+            click.echo(
+                click.style(
+                    f"[batch #{batch_index}] deleted runs: {counts['runs']} "
+                    f"(nodes {counts['node_executions']}, offloads {counts['offloads']}, "
+                    f"app_logs {counts['app_logs']}, trigger_logs {counts['trigger_logs']}, "
+                    f"pauses {counts['pauses']}, pause_reasons {counts['pause_reasons']}); "
+                    f"skipped {paid_or_skipped} paid/unknown",
+                    fg="green",
+                )
+            )
+
+        if self.dry_run:
+            if self.window_start:
+                summary_message = (
+                    f"Dry run complete. Would delete {total_runs_targeted} workflow runs "
+                    f"between {self.window_start.isoformat()} and {self.window_end.isoformat()}"
+                )
+            else:
+                summary_message = (
+                    f"Dry run complete. Would delete {total_runs_targeted} workflow runs "
+                    f"before {self.window_end.isoformat()}"
+                )
+            if related_totals is not None:
+                summary_message = f"{summary_message}; related records: {self._format_related_counts(related_totals)}"
+            summary_color = "yellow"
+        else:
+            if self.window_start:
+                summary_message = (
+                    f"Cleanup complete. Deleted {total_runs_deleted} workflow runs "
+                    f"between {self.window_start.isoformat()} and {self.window_end.isoformat()}"
+                )
+            else:
+                summary_message = (
+                    f"Cleanup complete. Deleted {total_runs_deleted} workflow runs before {self.window_end.isoformat()}"
+                )
+            summary_color = "white"
+
+        click.echo(click.style(summary_message, fg=summary_color))
+
+    def _filter_free_tenants(self, tenant_ids: Iterable[str]) -> set[str]:
+        tenant_id_list = list(tenant_ids)
+
+        if not dify_config.BILLING_ENABLED:
+            return set(tenant_id_list)
+
+        if not tenant_id_list:
+            return set()
+
+        cleanup_whitelist = self._get_cleanup_whitelist()
+
+        try:
+            bulk_info = BillingService.get_plan_bulk_with_cache(tenant_id_list)
+        except Exception:
+            bulk_info = {}
+            logger.exception("Failed to fetch billing plans in bulk for tenants: %s", tenant_id_list)
+
+        eligible_free_tenants: set[str] = set()
+        for tenant_id in tenant_id_list:
+            if tenant_id in cleanup_whitelist:
+                continue
+
+            info = bulk_info.get(tenant_id)
+            if info is None:
+                logger.warning("Missing billing info for tenant %s in bulk resp; treating as non-free", tenant_id)
+                continue
+
+            if info.get("plan") != CloudPlan.SANDBOX:
+                continue
+
+            if self._is_within_grace_period(tenant_id, info):
+                continue
+
+            eligible_free_tenants.add(tenant_id)
+
+        return eligible_free_tenants
+
+    def _expiration_datetime(self, tenant_id: str, expiration_value: int) -> datetime.datetime | None:
+        if expiration_value < 0:
+            return None
+
+        try:
+            return datetime.datetime.fromtimestamp(expiration_value, datetime.UTC)
+        except (OverflowError, OSError, ValueError):
+            logger.exception("Failed to parse expiration timestamp for tenant %s", tenant_id)
+            return None
+
+    def _is_within_grace_period(self, tenant_id: str, info: SubscriptionPlan) -> bool:
+        if self.free_plan_grace_period_days <= 0:
+            return False
+
+        expiration_value = info.get("expiration_date", -1)
+        expiration_at = self._expiration_datetime(tenant_id, expiration_value)
+        if expiration_at is None:
+            return False
+
+        grace_deadline = expiration_at + datetime.timedelta(days=self.free_plan_grace_period_days)
+        return datetime.datetime.now(datetime.UTC) < grace_deadline
+
+    def _get_cleanup_whitelist(self) -> set[str]:
+        if self._cleanup_whitelist is not None:
+            return self._cleanup_whitelist
+
+        if not dify_config.BILLING_ENABLED:
+            self._cleanup_whitelist = set()
+            return self._cleanup_whitelist
+
+        try:
+            whitelist_ids = BillingService.get_expired_subscription_cleanup_whitelist()
+        except Exception:
+            logger.exception("Failed to fetch cleanup whitelist from billing service")
+            whitelist_ids = []
+
+        self._cleanup_whitelist = set(whitelist_ids)
+        return self._cleanup_whitelist
+
+    def _delete_trigger_logs(self, session: Session, run_ids: Sequence[str]) -> int:
+        trigger_repo = SQLAlchemyWorkflowTriggerLogRepository(session)
+        return trigger_repo.delete_by_run_ids(run_ids)
+
+    def _count_trigger_logs(self, session: Session, run_ids: Sequence[str]) -> int:
+        trigger_repo = SQLAlchemyWorkflowTriggerLogRepository(session)
+        return trigger_repo.count_by_run_ids(run_ids)
+
+    @staticmethod
+    def _build_run_contexts(
+        runs: Sequence[WorkflowRun],
+    ) -> list[DifyAPISQLAlchemyWorkflowNodeExecutionRepository.RunContext]:
+        return [
+            {
+                "run_id": run.id,
+                "tenant_id": run.tenant_id,
+                "app_id": run.app_id,
+                "workflow_id": run.workflow_id,
+                "triggered_from": run.triggered_from,
+            }
+            for run in runs
+        ]
+
+    @staticmethod
+    def _empty_related_counts() -> dict[str, int]:
+        return {
+            "node_executions": 0,
+            "offloads": 0,
+            "app_logs": 0,
+            "trigger_logs": 0,
+            "pauses": 0,
+            "pause_reasons": 0,
+        }
+
+    @staticmethod
+    def _format_related_counts(counts: dict[str, int]) -> str:
+        return (
+            f"node_executions {counts['node_executions']}, "
+            f"offloads {counts['offloads']}, "
+            f"app_logs {counts['app_logs']}, "
+            f"trigger_logs {counts['trigger_logs']}, "
+            f"pauses {counts['pauses']}, "
+            f"pause_reasons {counts['pause_reasons']}"
+        )
+
+    def _count_node_executions(self, session: Session, runs: Sequence[WorkflowRun]) -> tuple[int, int]:
+        run_contexts = self._build_run_contexts(runs)
+        return DifyAPISQLAlchemyWorkflowNodeExecutionRepository.count_by_runs(session, run_contexts)
+
+    def _delete_node_executions(self, session: Session, runs: Sequence[WorkflowRun]) -> tuple[int, int]:
+        run_contexts = self._build_run_contexts(runs)
+        return DifyAPISQLAlchemyWorkflowNodeExecutionRepository.delete_by_runs(session, run_contexts)
--- a/api/tests/fixtures/workflow/test_streaming_conversation_variables_v1_overwrite.yml
+++ b/api/tests/fixtures/workflow/test_streaming_conversation_variables_v1_overwrite.yml
@@ -0,0 +1,158 @@
+app:
+  description: Validate v1 Variable Assigner blocks streaming until conversation variable is updated.
+  icon: 🤖
+  icon_background: '#FFEAD5'
+  mode: advanced-chat
+  name: test_streaming_conversation_variables_v1_overwrite
+  use_icon_as_answer_icon: false
+dependencies: []
+kind: app
+version: 0.5.0
+workflow:
+  conversation_variables:
+  - description: ''
+    id: 6ddf2d7f-3d1b-4bb0-9a5e-9b0c87c7b5e6
+    name: conv_var
+    selector:
+    - conversation
+    - conv_var
+    value: default
+    value_type: string
+  environment_variables: []
+  features:
+    file_upload:
+      allowed_file_extensions:
+      - .JPG
+      - .JPEG
+      - .PNG
+      - .GIF
+      - .WEBP
+      - .SVG
+      allowed_file_types:
+      - image
+      allowed_file_upload_methods:
+      - local_file
+      - remote_url
+      enabled: false
+      fileUploadConfig:
+        audio_file_size_limit: 50
+        batch_count_limit: 5
+        file_size_limit: 15
+        image_file_size_limit: 10
+        video_file_size_limit: 100
+        workflow_file_upload_limit: 10
+      image:
+        enabled: false
+        number_limits: 3
+        transfer_methods:
+        - local_file
+        - remote_url
+      number_limits: 3
+    opening_statement: ''
+    retriever_resource:
+      enabled: true
+    sensitive_word_avoidance:
+      enabled: false
+    speech_to_text:
+      enabled: false
+    suggested_questions: []
+    suggested_questions_after_answer:
+      enabled: false
+    text_to_speech:
+      enabled: false
+      language: ''
+      voice: ''
+  graph:
+    edges:
+    - data:
+        isInIteration: false
+        isInLoop: false
+        sourceType: start
+        targetType: assigner
+      id: start-source-assigner-target
+      source: start
+      sourceHandle: source
+      target: assigner
+      targetHandle: target
+      type: custom
+      zIndex: 0
+    - data:
+        isInLoop: false
+        sourceType: assigner
+        targetType: answer
+      id: assigner-source-answer-target
+      source: assigner
+      sourceHandle: source
+      target: answer
+      targetHandle: target
+      type: custom
+      zIndex: 0
+    nodes:
+    - data:
+        desc: ''
+        selected: false
+        title: Start
+        type: start
+        variables: []
+      height: 54
+      id: start
+      position:
+        x: 30
+        y: 253
+      positionAbsolute:
+        x: 30
+        y: 253
+      selected: false
+      sourcePosition: right
+      targetPosition: left
+      type: custom
+      width: 244
+    - data:
+        answer: 'Current Value Of `conv_var` is:{{#conversation.conv_var#}}'
+        desc: ''
+        selected: false
+        title: Answer
+        type: answer
+        variables: []
+      height: 106
+      id: answer
+      position:
+        x: 638
+        y: 253
+      positionAbsolute:
+        x: 638
+        y: 253
+      selected: true
+      sourcePosition: right
+      targetPosition: left
+      type: custom
+      width: 244
+    - data:
+        assigned_variable_selector:
+        - conversation
+        - conv_var
+        desc: ''
+        input_variable_selector:
+        - sys
+        - query
+        selected: false
+        title: Variable Assigner
+        type: assigner
+        write_mode: over-write
+      height: 84
+      id: assigner
+      position:
+        x: 334
+        y: 253
+      positionAbsolute:
+        x: 334
+        y: 253
+      selected: false
+      sourcePosition: right
+      targetPosition: left
+      type: custom
+      width: 244
+    viewport:
+      x: 0
+      y: 0
+      zoom: 0.7
--- a/api/tests/test_containers_integration_tests/services/test_agent_service.py
+++ b/api/tests/test_containers_integration_tests/services/test_agent_service.py
@@ -230,7 +230,6 @@ class TestAgentService:

        # Create first agent thought
        thought1 = MessageAgentThought(
-            id=fake.uuid4(),
            message_id=message.id,
            position=1,
            thought="I need to analyze the user's request",
@@ -257,7 +256,6 @@ class TestAgentService:

        # Create second agent thought
        thought2 = MessageAgentThought(
-            id=fake.uuid4(),
            message_id=message.id,
            position=2,
            thought="Based on the analysis, I can provide a response",
@@ -545,7 +543,6 @@ class TestAgentService:

        # Create agent thought with tool error
        thought_with_error = MessageAgentThought(
-            id=fake.uuid4(),
            message_id=message.id,
            position=1,
            thought="I need to analyze the user's request",
@@ -759,7 +756,6 @@ class TestAgentService:

        # Create agent thought with multiple tools
        complex_thought = MessageAgentThought(
-            id=fake.uuid4(),
            message_id=message.id,
            position=1,
            thought="I need to use multiple tools to complete this task",
@@ -877,7 +873,6 @@ class TestAgentService:

        # Create agent thought with files
        thought_with_files = MessageAgentThought(
-            id=fake.uuid4(),
            message_id=message.id,
            position=1,
            thought="I need to process some files",
@@ -957,7 +952,6 @@ class TestAgentService:

        # Create agent thought with empty tool data
        empty_thought = MessageAgentThought(
-            id=fake.uuid4(),
            message_id=message.id,
            position=1,
            thought="I need to analyze the user's request",
@@ -999,7 +993,6 @@ class TestAgentService:

        # Create agent thought with malformed JSON
        malformed_thought = MessageAgentThought(
-            id=fake.uuid4(),
            message_id=message.id,
            position=1,
            thought="I need to analyze the user's request",
--- a/api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_task_pipeline_answer_node.py
+++ b/api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_task_pipeline_answer_node.py
@@ -1,390 +0,0 @@
-"""
-Tests for AdvancedChatAppGenerateTaskPipeline._handle_node_succeeded_event method,
-specifically testing the ANSWER node message_replace logic.
-"""
-
-from datetime import datetime
-from types import SimpleNamespace
-from unittest.mock import MagicMock, Mock, patch
-
-import pytest
-
-from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity
-from core.app.entities.queue_entities import QueueNodeSucceededEvent
-from core.workflow.enums import NodeType
-from models import EndUser
-from models.model import AppMode
-
-
-class TestAnswerNodeMessageReplace:
-    """Test cases for ANSWER node message_replace event logic."""
-
-    @pytest.fixture
-    def mock_application_generate_entity(self):
-        """Create a mock application generate entity."""
-        entity = Mock(spec=AdvancedChatAppGenerateEntity)
-        entity.task_id = "test-task-id"
-        entity.app_id = "test-app-id"
-        entity.workflow_run_id = "test-workflow-run-id"
-        # minimal app_config used by pipeline internals
-        entity.app_config = SimpleNamespace(
-            tenant_id="test-tenant-id",
-            app_id="test-app-id",
-            app_mode=AppMode.ADVANCED_CHAT,
-            app_model_config_dict={},
-            additional_features=None,
-            sensitive_word_avoidance=None,
-        )
-        entity.query = "test query"
-        entity.files = []
-        entity.extras = {}
-        entity.trace_manager = None
-        entity.inputs = {}
-        entity.invoke_from = "debugger"
-        return entity
-
-    @pytest.fixture
-    def mock_workflow(self):
-        """Create a mock workflow."""
-        workflow = Mock()
-        workflow.id = "test-workflow-id"
-        workflow.features_dict = {}
-        return workflow
-
-    @pytest.fixture
-    def mock_queue_manager(self):
-        """Create a mock queue manager."""
-        manager = Mock()
-        manager.listen.return_value = []
-        manager.graph_runtime_state = None
-        return manager
-
-    @pytest.fixture
-    def mock_conversation(self):
-        """Create a mock conversation."""
-        conversation = Mock()
-        conversation.id = "test-conversation-id"
-        conversation.mode = "advanced_chat"
-        return conversation
-
-    @pytest.fixture
-    def mock_message(self):
-        """Create a mock message."""
-        message = Mock()
-        message.id = "test-message-id"
-        message.query = "test query"
-        message.created_at = Mock()
-        message.created_at.timestamp.return_value = 1234567890
-        return message
-
-    @pytest.fixture
-    def mock_user(self):
-        """Create a mock end user."""
-        user = MagicMock(spec=EndUser)
-        user.id = "test-user-id"
-        user.session_id = "test-session-id"
-        return user
-
-    @pytest.fixture
-    def mock_draft_var_saver_factory(self):
-        """Create a mock draft variable saver factory."""
-        return Mock()
-
-    @pytest.fixture
-    def pipeline(
-        self,
-        mock_application_generate_entity,
-        mock_workflow,
-        mock_queue_manager,
-        mock_conversation,
-        mock_message,
-        mock_user,
-        mock_draft_var_saver_factory,
-    ):
-        """Create an AdvancedChatAppGenerateTaskPipeline instance with mocked dependencies."""
-        from core.app.apps.advanced_chat.generate_task_pipeline import AdvancedChatAppGenerateTaskPipeline
-
-        with patch("core.app.apps.advanced_chat.generate_task_pipeline.db"):
-            pipeline = AdvancedChatAppGenerateTaskPipeline(
-                application_generate_entity=mock_application_generate_entity,
-                workflow=mock_workflow,
-                queue_manager=mock_queue_manager,
-                conversation=mock_conversation,
-                message=mock_message,
-                user=mock_user,
-                stream=True,
-                dialogue_count=1,
-                draft_var_saver_factory=mock_draft_var_saver_factory,
-            )
-            # Initialize workflow run id to avoid validation errors
-            pipeline._workflow_run_id = "test-workflow-run-id"
-            # Mock the message cycle manager methods we need to track
-            pipeline._message_cycle_manager.message_replace_to_stream_response = Mock()
-            return pipeline
-
-    def test_answer_node_with_different_output_sends_message_replace(self, pipeline, mock_application_generate_entity):
-        """
-        Test that when an ANSWER node's final output differs from accumulated answer,
-        a message_replace event is sent.
-        """
-        # Arrange: Set initial accumulated answer
-        pipeline._task_state.answer = "initial answer"
-
-        # Create ANSWER node succeeded event with different final output
-        event = QueueNodeSucceededEvent(
-            node_execution_id="test-node-execution-id",
-            node_id="test-answer-node",
-            node_type=NodeType.ANSWER,
-            start_at=datetime.now(),
-            outputs={"answer": "updated final answer"},
-        )
-
-        # Mock the workflow response converter to avoid extra processing
-        pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
-        pipeline._save_output_for_event = Mock()
-
-        # Act
-        responses = list(pipeline._handle_node_succeeded_event(event))
-
-        # Assert
-        assert pipeline._task_state.answer == "updated final answer"
-        # Verify message_replace was called
-        pipeline._message_cycle_manager.message_replace_to_stream_response.assert_called_once_with(
-            answer="updated final answer", reason="variable_update"
-        )
-
-    def test_answer_node_with_same_output_does_not_send_message_replace(self, pipeline):
-        """
-        Test that when an ANSWER node's final output is the same as accumulated answer,
-        no message_replace event is sent.
-        """
-        # Arrange: Set initial accumulated answer
-        pipeline._task_state.answer = "same answer"
-
-        # Create ANSWER node succeeded event with same output
-        event = QueueNodeSucceededEvent(
-            node_execution_id="test-node-execution-id",
-            node_id="test-answer-node",
-            node_type=NodeType.ANSWER,
-            start_at=datetime.now(),
-            outputs={"answer": "same answer"},
-        )
-
-        # Mock the workflow response converter
-        pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
-        pipeline._save_output_for_event = Mock()
-
-        # Act
-        list(pipeline._handle_node_succeeded_event(event))
-
-        # Assert: answer should remain unchanged
-        assert pipeline._task_state.answer == "same answer"
-        # Verify message_replace was NOT called
-        pipeline._message_cycle_manager.message_replace_to_stream_response.assert_not_called()
-
-    def test_answer_node_with_none_output_does_not_send_message_replace(self, pipeline):
-        """
-        Test that when an ANSWER node's output is None or missing 'answer' key,
-        no message_replace event is sent.
-        """
-        # Arrange: Set initial accumulated answer
-        pipeline._task_state.answer = "existing answer"
-
-        # Create ANSWER node succeeded event with None output
-        event = QueueNodeSucceededEvent(
-            node_execution_id="test-node-execution-id",
-            node_id="test-answer-node",
-            node_type=NodeType.ANSWER,
-            start_at=datetime.now(),
-            outputs={"answer": None},
-        )
-
-        # Mock the workflow response converter
-        pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
-        pipeline._save_output_for_event = Mock()
-
-        # Act
-        list(pipeline._handle_node_succeeded_event(event))
-
-        # Assert: answer should remain unchanged
-        assert pipeline._task_state.answer == "existing answer"
-        # Verify message_replace was NOT called
-        pipeline._message_cycle_manager.message_replace_to_stream_response.assert_not_called()
-
-    def test_answer_node_with_empty_outputs_does_not_send_message_replace(self, pipeline):
-        """
-        Test that when an ANSWER node has empty outputs dict,
-        no message_replace event is sent.
-        """
-        # Arrange: Set initial accumulated answer
-        pipeline._task_state.answer = "existing answer"
-
-        # Create ANSWER node succeeded event with empty outputs
-        event = QueueNodeSucceededEvent(
-            node_execution_id="test-node-execution-id",
-            node_id="test-answer-node",
-            node_type=NodeType.ANSWER,
-            start_at=datetime.now(),
-            outputs={},
-        )
-
-        # Mock the workflow response converter
-        pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
-        pipeline._save_output_for_event = Mock()
-
-        # Act
-        list(pipeline._handle_node_succeeded_event(event))
-
-        # Assert: answer should remain unchanged
-        assert pipeline._task_state.answer == "existing answer"
-        # Verify message_replace was NOT called
-        pipeline._message_cycle_manager.message_replace_to_stream_response.assert_not_called()
-
-    def test_answer_node_with_no_answer_key_in_outputs(self, pipeline):
-        """
-        Test that when an ANSWER node's outputs don't contain 'answer' key,
-        no message_replace event is sent.
-        """
-        # Arrange: Set initial accumulated answer
-        pipeline._task_state.answer = "existing answer"
-
-        # Create ANSWER node succeeded event without 'answer' key in outputs
-        event = QueueNodeSucceededEvent(
-            node_execution_id="test-node-execution-id",
-            node_id="test-answer-node",
-            node_type=NodeType.ANSWER,
-            start_at=datetime.now(),
-            outputs={"other_key": "some value"},
-        )
-
-        # Mock the workflow response converter
-        pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
-        pipeline._save_output_for_event = Mock()
-
-        # Act
-        list(pipeline._handle_node_succeeded_event(event))
-
-        # Assert: answer should remain unchanged
-        assert pipeline._task_state.answer == "existing answer"
-        # Verify message_replace was NOT called
-        pipeline._message_cycle_manager.message_replace_to_stream_response.assert_not_called()
-
-    def test_non_answer_node_does_not_send_message_replace(self, pipeline):
-        """
-        Test that non-ANSWER nodes (e.g., LLM, END) don't trigger message_replace events.
-        """
-        # Arrange: Set initial accumulated answer
-        pipeline._task_state.answer = "existing answer"
-
-        # Test with LLM node
-        llm_event = QueueNodeSucceededEvent(
-            node_execution_id="test-llm-execution-id",
-            node_id="test-llm-node",
-            node_type=NodeType.LLM,
-            start_at=datetime.now(),
-            outputs={"answer": "different answer"},
-        )
-
-        # Mock the workflow response converter
-        pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
-        pipeline._save_output_for_event = Mock()
-
-        # Act
-        list(pipeline._handle_node_succeeded_event(llm_event))
-
-        # Assert: answer should remain unchanged
-        assert pipeline._task_state.answer == "existing answer"
-        # Verify message_replace was NOT called
-        pipeline._message_cycle_manager.message_replace_to_stream_response.assert_not_called()
-
-    def test_end_node_does_not_send_message_replace(self, pipeline):
-        """
-        Test that END nodes don't trigger message_replace events even with 'answer' output.
-        """
-        # Arrange: Set initial accumulated answer
-        pipeline._task_state.answer = "existing answer"
-
-        # Create END node succeeded event with answer output
-        event = QueueNodeSucceededEvent(
-            node_execution_id="test-end-execution-id",
-            node_id="test-end-node",
-            node_type=NodeType.END,
-            start_at=datetime.now(),
-            outputs={"answer": "different answer"},
-        )
-
-        # Mock the workflow response converter
-        pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
-        pipeline._save_output_for_event = Mock()
-
-        # Act
-        list(pipeline._handle_node_succeeded_event(event))
-
-        # Assert: answer should remain unchanged
-        assert pipeline._task_state.answer == "existing answer"
-        # Verify message_replace was NOT called
-        pipeline._message_cycle_manager.message_replace_to_stream_response.assert_not_called()
-
-    def test_answer_node_with_numeric_output_converts_to_string(self, pipeline):
-        """
-        Test that when an ANSWER node's final output is numeric,
-        it gets converted to string properly.
-        """
-        # Arrange: Set initial accumulated answer
-        pipeline._task_state.answer = "text answer"
-
-        # Create ANSWER node succeeded event with numeric output
-        event = QueueNodeSucceededEvent(
-            node_execution_id="test-node-execution-id",
-            node_id="test-answer-node",
-            node_type=NodeType.ANSWER,
-            start_at=datetime.now(),
-            outputs={"answer": 12345},
-        )
-
-        # Mock the workflow response converter
-        pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
-        pipeline._save_output_for_event = Mock()
-
-        # Act
-        list(pipeline._handle_node_succeeded_event(event))
-
-        # Assert: answer should be converted to string
-        assert pipeline._task_state.answer == "12345"
-        # Verify message_replace was called with string
-        pipeline._message_cycle_manager.message_replace_to_stream_response.assert_called_once_with(
-            answer="12345", reason="variable_update"
-        )
-
-    def test_answer_node_files_are_recorded(self, pipeline):
-        """
-        Test that ANSWER nodes properly record files from outputs.
-        """
-        # Arrange
-        pipeline._task_state.answer = "existing answer"
-
-        # Create ANSWER node succeeded event with files
-        event = QueueNodeSucceededEvent(
-            node_execution_id="test-node-execution-id",
-            node_id="test-answer-node",
-            node_type=NodeType.ANSWER,
-            start_at=datetime.now(),
-            outputs={
-                "answer": "same answer",
-                "files": [
-                    {"type": "image", "transfer_method": "remote_url", "remote_url": "http://example.com/img.png"}
-                ],
-            },
-        )
-
-        # Mock the workflow response converter
-        pipeline._workflow_response_converter.fetch_files_from_node_outputs = Mock(return_value=event.outputs["files"])
-        pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
-        pipeline._save_output_for_event = Mock()
-
-        # Act
-        list(pipeline._handle_node_succeeded_event(event))
-
-        # Assert: files should be recorded
-        assert len(pipeline._recorded_files) == 1
-        assert pipeline._recorded_files[0] == event.outputs["files"][0]
--- a/api/tests/unit_tests/core/workflow/graph_engine/test_streaming_conversation_variables.py
+++ b/api/tests/unit_tests/core/workflow/graph_engine/test_streaming_conversation_variables.py
@@ -45,3 +45,33 @@ def test_streaming_conversation_variables():
    runner = TableTestRunner()
    result = runner.run_test_case(case)
    assert result.success, f"Test failed: {result.error}"
+
+
+def test_streaming_conversation_variables_v1_overwrite_waits_for_assignment():
+    fixture_name = "test_streaming_conversation_variables_v1_overwrite"
+    input_query = "overwrite-value"
+
+    case = WorkflowTestCase(
+        fixture_path=fixture_name,
+        use_auto_mock=False,
+        mock_config=MockConfigBuilder().build(),
+        query=input_query,
+        inputs={},
+        expected_outputs={"answer": f"Current Value Of `conv_var` is:{input_query}"},
+    )
+
+    runner = TableTestRunner()
+    result = runner.run_test_case(case)
+    assert result.success, f"Test failed: {result.error}"
+
+    events = result.events
+    conv_var_chunk_events = [
+        event
+        for event in events
+        if isinstance(event, NodeRunStreamChunkEvent) and tuple(event.selector) == ("conversation", "conv_var")
+    ]
+
+    assert conv_var_chunk_events, "Expected conversation variable chunk events to be emitted"
+    assert all(event.chunk == input_query for event in conv_var_chunk_events), (
+        "Expected streamed conversation variable value to match the input query"
+    )
--- a/api/tests/unit_tests/core/workflow/nodes/test_start_node_json_object.py
+++ b/api/tests/unit_tests/core/workflow/nodes/test_start_node_json_object.py
@@ -58,6 +58,8 @@ def test_json_object_valid_schema():
        }
    )

+    schema = json.loads(schema)
+
    variables = [
        VariableEntity(
            variable="profile",
@@ -68,7 +70,7 @@ def test_json_object_valid_schema():
        )
    ]

-    user_inputs = {"profile": json.dumps({"age": 20, "name": "Tom"})}
+    user_inputs = {"profile": {"age": 20, "name": "Tom"}}

    node = make_start_node(user_inputs, variables)
    result = node._run()
@@ -87,6 +89,8 @@ def test_json_object_invalid_json_string():
            "required": ["age", "name"],
        }
    )
+
+    schema = json.loads(schema)
    variables = [
        VariableEntity(
            variable="profile",
@@ -97,12 +101,12 @@ def test_json_object_invalid_json_string():
        )
    ]

-    # Missing closing brace makes this invalid JSON
+    # Providing a string instead of an object should raise a type error
    user_inputs = {"profile": '{"age": 20, "name": "Tom"'}

    node = make_start_node(user_inputs, variables)

-    with pytest.raises(ValueError, match='{"age": 20, "name": "Tom" must be a valid JSON object'):
+    with pytest.raises(ValueError, match="JSON object for 'profile' must be an object"):
        node._run()


@@ -118,6 +122,8 @@ def test_json_object_does_not_match_schema():
        }
    )

+    schema = json.loads(schema)
+
    variables = [
        VariableEntity(
            variable="profile",
@@ -129,7 +135,7 @@ def test_json_object_does_not_match_schema():
    ]

    # age is a string, which violates the schema (expects number)
-    user_inputs = {"profile": json.dumps({"age": "twenty", "name": "Tom"})}
+    user_inputs = {"profile": {"age": "twenty", "name": "Tom"}}

    node = make_start_node(user_inputs, variables)

@@ -149,6 +155,8 @@ def test_json_object_missing_required_schema_field():
        }
    )

+    schema = json.loads(schema)
+
    variables = [
        VariableEntity(
            variable="profile",
@@ -160,7 +168,7 @@ def test_json_object_missing_required_schema_field():
    ]

    # Missing required field "name"
-    user_inputs = {"profile": json.dumps({"age": 20})}
+    user_inputs = {"profile": {"age": 20}}

    node = make_start_node(user_inputs, variables)

--- a/api/tests/unit_tests/core/workflow/test_workflow_entry.py
+++ b/api/tests/unit_tests/core/workflow/test_workflow_entry.py
@@ -2,13 +2,17 @@ from types import SimpleNamespace

 import pytest

+from configs import dify_config
 from core.file.enums import FileType
 from core.file.models import File, FileTransferMethod
+from core.helper.code_executor.code_executor import CodeLanguage
 from core.variables.variables import StringVariable
 from core.workflow.constants import (
    CONVERSATION_VARIABLE_NODE_ID,
    ENVIRONMENT_VARIABLE_NODE_ID,
 )
+from core.workflow.nodes.code.code_node import CodeNode
+from core.workflow.nodes.code.limits import CodeNodeLimits
 from core.workflow.runtime import VariablePool
 from core.workflow.system_variable import SystemVariable
 from core.workflow.workflow_entry import WorkflowEntry
@@ -96,6 +100,58 @@ class TestWorkflowEntry:
        assert output_var is not None
        assert output_var.value == "system_user"

+    def test_single_step_run_injects_code_limits(self):
+        """Ensure single-step CodeNode execution configures limits."""
+        # Arrange
+        node_id = "code_node"
+        node_data = {
+            "type": "code",
+            "title": "Code",
+            "desc": None,
+            "variables": [],
+            "code_language": CodeLanguage.PYTHON3,
+            "code": "def main():\n    return {}",
+            "outputs": {},
+        }
+        node_config = {"id": node_id, "data": node_data}
+
+        class StubWorkflow:
+            def __init__(self):
+                self.tenant_id = "tenant"
+                self.app_id = "app"
+                self.id = "workflow"
+                self.graph_dict = {"nodes": [node_config], "edges": []}
+
+            def get_node_config_by_id(self, target_id: str):
+                assert target_id == node_id
+                return node_config
+
+        workflow = StubWorkflow()
+        variable_pool = VariablePool(system_variables=SystemVariable.empty(), user_inputs={})
+        expected_limits = CodeNodeLimits(
+            max_string_length=dify_config.CODE_MAX_STRING_LENGTH,
+            max_number=dify_config.CODE_MAX_NUMBER,
+            min_number=dify_config.CODE_MIN_NUMBER,
+            max_precision=dify_config.CODE_MAX_PRECISION,
+            max_depth=dify_config.CODE_MAX_DEPTH,
+            max_number_array_length=dify_config.CODE_MAX_NUMBER_ARRAY_LENGTH,
+            max_string_array_length=dify_config.CODE_MAX_STRING_ARRAY_LENGTH,
+            max_object_array_length=dify_config.CODE_MAX_OBJECT_ARRAY_LENGTH,
+        )
+
+        # Act
+        node, _ = WorkflowEntry.single_step_run(
+            workflow=workflow,
+            node_id=node_id,
+            user_id="user",
+            user_inputs={},
+            variable_pool=variable_pool,
+        )
+
+        # Assert
+        assert isinstance(node, CodeNode)
+        assert node._limits == expected_limits
+
    def test_mapping_user_inputs_to_variable_pool_with_env_variables(self):
        """Test mapping environment variables from user inputs to variable pool."""
        # Initialize variable pool with environment variables
--- a/api/tests/unit_tests/repositories/test_sqlalchemy_api_workflow_run_repository.py
+++ b/api/tests/unit_tests/repositories/test_sqlalchemy_api_workflow_run_repository.py
@@ -4,6 +4,7 @@ from datetime import UTC, datetime
 from unittest.mock import Mock, patch

 import pytest
+from sqlalchemy.dialects import postgresql
 from sqlalchemy.orm import Session, sessionmaker

 from core.workflow.enums import WorkflowExecutionStatus
@@ -104,6 +105,42 @@ class TestDifyAPISQLAlchemyWorkflowRunRepository:
        return pause


+class TestGetRunsBatchByTimeRange(TestDifyAPISQLAlchemyWorkflowRunRepository):
+    def test_get_runs_batch_by_time_range_filters_terminal_statuses(
+        self, repository: DifyAPISQLAlchemyWorkflowRunRepository, mock_session: Mock
+    ):
+        scalar_result = Mock()
+        scalar_result.all.return_value = []
+        mock_session.scalars.return_value = scalar_result
+
+        repository.get_runs_batch_by_time_range(
+            start_from=None,
+            end_before=datetime(2024, 1, 1),
+            last_seen=None,
+            batch_size=50,
+        )
+
+        stmt = mock_session.scalars.call_args[0][0]
+        compiled_sql = str(
+            stmt.compile(
+                dialect=postgresql.dialect(),
+                compile_kwargs={"literal_binds": True},
+            )
+        )
+
+        assert "workflow_runs.status" in compiled_sql
+        for status in (
+            WorkflowExecutionStatus.SUCCEEDED,
+            WorkflowExecutionStatus.FAILED,
+            WorkflowExecutionStatus.STOPPED,
+            WorkflowExecutionStatus.PARTIAL_SUCCEEDED,
+        ):
+            assert f"'{status.value}'" in compiled_sql
+
+        assert "'running'" not in compiled_sql
+        assert "'paused'" not in compiled_sql
+
+
 class TestCreateWorkflowPause(TestDifyAPISQLAlchemyWorkflowRunRepository):
    """Test create_workflow_pause method."""

@@ -181,6 +218,61 @@ class TestCreateWorkflowPause(TestDifyAPISQLAlchemyWorkflowRunRepository):
            )


+class TestDeleteRunsWithRelated(TestDifyAPISQLAlchemyWorkflowRunRepository):
+    def test_uses_trigger_log_repository(self, repository: DifyAPISQLAlchemyWorkflowRunRepository, mock_session: Mock):
+        node_ids_result = Mock()
+        node_ids_result.all.return_value = []
+        pause_ids_result = Mock()
+        pause_ids_result.all.return_value = []
+        mock_session.scalars.side_effect = [node_ids_result, pause_ids_result]
+
+        # app_logs delete, runs delete
+        mock_session.execute.side_effect = [Mock(rowcount=0), Mock(rowcount=1)]
+
+        fake_trigger_repo = Mock()
+        fake_trigger_repo.delete_by_run_ids.return_value = 3
+
+        run = Mock(id="run-1", tenant_id="t1", app_id="a1", workflow_id="w1", triggered_from="tf")
+        counts = repository.delete_runs_with_related(
+            [run],
+            delete_node_executions=lambda session, runs: (2, 1),
+            delete_trigger_logs=lambda session, run_ids: fake_trigger_repo.delete_by_run_ids(run_ids),
+        )
+
+        fake_trigger_repo.delete_by_run_ids.assert_called_once_with(["run-1"])
+        assert counts["node_executions"] == 2
+        assert counts["offloads"] == 1
+        assert counts["trigger_logs"] == 3
+        assert counts["runs"] == 1
+
+
+class TestCountRunsWithRelated(TestDifyAPISQLAlchemyWorkflowRunRepository):
+    def test_uses_trigger_log_repository(self, repository: DifyAPISQLAlchemyWorkflowRunRepository, mock_session: Mock):
+        pause_ids_result = Mock()
+        pause_ids_result.all.return_value = ["pause-1", "pause-2"]
+        mock_session.scalars.return_value = pause_ids_result
+        mock_session.scalar.side_effect = [5, 2]
+
+        fake_trigger_repo = Mock()
+        fake_trigger_repo.count_by_run_ids.return_value = 3
+
+        run = Mock(id="run-1", tenant_id="t1", app_id="a1", workflow_id="w1", triggered_from="tf")
+        counts = repository.count_runs_with_related(
+            [run],
+            count_node_executions=lambda session, runs: (2, 1),
+            count_trigger_logs=lambda session, run_ids: fake_trigger_repo.count_by_run_ids(run_ids),
+        )
+
+        fake_trigger_repo.count_by_run_ids.assert_called_once_with(["run-1"])
+        assert counts["node_executions"] == 2
+        assert counts["offloads"] == 1
+        assert counts["trigger_logs"] == 3
+        assert counts["app_logs"] == 5
+        assert counts["pauses"] == 2
+        assert counts["pause_reasons"] == 2
+        assert counts["runs"] == 1
+
+
 class TestResumeWorkflowPause(TestDifyAPISQLAlchemyWorkflowRunRepository):
    """Test resume_workflow_pause method."""

--- a/api/tests/unit_tests/repositories/test_sqlalchemy_workflow_trigger_log_repository.py
+++ b/api/tests/unit_tests/repositories/test_sqlalchemy_workflow_trigger_log_repository.py
@@ -0,0 +1,31 @@
+from unittest.mock import Mock
+
+from sqlalchemy.dialects import postgresql
+from sqlalchemy.orm import Session
+
+from repositories.sqlalchemy_workflow_trigger_log_repository import SQLAlchemyWorkflowTriggerLogRepository
+
+
+def test_delete_by_run_ids_executes_delete():
+    session = Mock(spec=Session)
+    session.execute.return_value = Mock(rowcount=2)
+    repo = SQLAlchemyWorkflowTriggerLogRepository(session)
+
+    deleted = repo.delete_by_run_ids(["run-1", "run-2"])
+
+    stmt = session.execute.call_args[0][0]
+    compiled_sql = str(stmt.compile(dialect=postgresql.dialect(), compile_kwargs={"literal_binds": True}))
+    assert "workflow_trigger_logs" in compiled_sql
+    assert "'run-1'" in compiled_sql
+    assert "'run-2'" in compiled_sql
+    assert deleted == 2
+
+
+def test_delete_by_run_ids_empty_short_circuits():
+    session = Mock(spec=Session)
+    repo = SQLAlchemyWorkflowTriggerLogRepository(session)
+
+    deleted = repo.delete_by_run_ids([])
+
+    session.execute.assert_not_called()
+    assert deleted == 0
--- a/api/tests/unit_tests/services/test_clear_free_plan_expired_workflow_run_logs.py
+++ b/api/tests/unit_tests/services/test_clear_free_plan_expired_workflow_run_logs.py
@@ -0,0 +1,327 @@
+import datetime
+from typing import Any
+
+import pytest
+
+from services.billing_service import SubscriptionPlan
+from services.retention.workflow_run import clear_free_plan_expired_workflow_run_logs as cleanup_module
+from services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs import WorkflowRunCleanup
+
+
+class FakeRun:
+    def __init__(
+        self,
+        run_id: str,
+        tenant_id: str,
+        created_at: datetime.datetime,
+        app_id: str = "app-1",
+        workflow_id: str = "wf-1",
+        triggered_from: str = "workflow-run",
+    ) -> None:
+        self.id = run_id
+        self.tenant_id = tenant_id
+        self.app_id = app_id
+        self.workflow_id = workflow_id
+        self.triggered_from = triggered_from
+        self.created_at = created_at
+
+
+class FakeRepo:
+    def __init__(
+        self,
+        batches: list[list[FakeRun]],
+        delete_result: dict[str, int] | None = None,
+        count_result: dict[str, int] | None = None,
+    ) -> None:
+        self.batches = batches
+        self.call_idx = 0
+        self.deleted: list[list[str]] = []
+        self.counted: list[list[str]] = []
+        self.delete_result = delete_result or {
+            "runs": 0,
+            "node_executions": 0,
+            "offloads": 0,
+            "app_logs": 0,
+            "trigger_logs": 0,
+            "pauses": 0,
+            "pause_reasons": 0,
+        }
+        self.count_result = count_result or {
+            "runs": 0,
+            "node_executions": 0,
+            "offloads": 0,
+            "app_logs": 0,
+            "trigger_logs": 0,
+            "pauses": 0,
+            "pause_reasons": 0,
+        }
+
+    def get_runs_batch_by_time_range(
+        self,
+        start_from: datetime.datetime | None,
+        end_before: datetime.datetime,
+        last_seen: tuple[datetime.datetime, str] | None,
+        batch_size: int,
+    ) -> list[FakeRun]:
+        if self.call_idx >= len(self.batches):
+            return []
+        batch = self.batches[self.call_idx]
+        self.call_idx += 1
+        return batch
+
+    def delete_runs_with_related(
+        self, runs: list[FakeRun], delete_node_executions=None, delete_trigger_logs=None
+    ) -> dict[str, int]:
+        self.deleted.append([run.id for run in runs])
+        result = self.delete_result.copy()
+        result["runs"] = len(runs)
+        return result
+
+    def count_runs_with_related(
+        self, runs: list[FakeRun], count_node_executions=None, count_trigger_logs=None
+    ) -> dict[str, int]:
+        self.counted.append([run.id for run in runs])
+        result = self.count_result.copy()
+        result["runs"] = len(runs)
+        return result
+
+
+def plan_info(plan: str, expiration: int) -> SubscriptionPlan:
+    return SubscriptionPlan(plan=plan, expiration_date=expiration)
+
+
+def create_cleanup(
+    monkeypatch: pytest.MonkeyPatch,
+    repo: FakeRepo,
+    *,
+    grace_period_days: int = 0,
+    whitelist: set[str] | None = None,
+    **kwargs: Any,
+) -> WorkflowRunCleanup:
+    monkeypatch.setattr(
+        cleanup_module.dify_config,
+        "SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD",
+        grace_period_days,
+    )
+    monkeypatch.setattr(
+        cleanup_module.WorkflowRunCleanup,
+        "_get_cleanup_whitelist",
+        lambda self: whitelist or set(),
+    )
+    return WorkflowRunCleanup(workflow_run_repo=repo, **kwargs)
+
+
+def test_filter_free_tenants_billing_disabled(monkeypatch: pytest.MonkeyPatch) -> None:
+    cleanup = create_cleanup(monkeypatch, repo=FakeRepo([]), days=30, batch_size=10)
+
+    monkeypatch.setattr(cleanup_module.dify_config, "BILLING_ENABLED", False)
+
+    def fail_bulk(_: list[str]) -> dict[str, SubscriptionPlan]:
+        raise RuntimeError("should not call")
+
+    monkeypatch.setattr(cleanup_module.BillingService, "get_plan_bulk_with_cache", staticmethod(fail_bulk))
+
+    tenants = {"t1", "t2"}
+    free = cleanup._filter_free_tenants(tenants)
+
+    assert free == tenants
+
+
+def test_filter_free_tenants_bulk_mixed(monkeypatch: pytest.MonkeyPatch) -> None:
+    cleanup = create_cleanup(monkeypatch, repo=FakeRepo([]), days=30, batch_size=10)
+
+    monkeypatch.setattr(cleanup_module.dify_config, "BILLING_ENABLED", True)
+    monkeypatch.setattr(
+        cleanup_module.BillingService,
+        "get_plan_bulk_with_cache",
+        staticmethod(
+            lambda tenant_ids: {
+                tenant_id: (plan_info("team", -1) if tenant_id == "t_paid" else plan_info("sandbox", -1))
+                for tenant_id in tenant_ids
+            }
+        ),
+    )
+
+    free = cleanup._filter_free_tenants({"t_free", "t_paid", "t_missing"})
+
+    assert free == {"t_free", "t_missing"}
+
+
+def test_filter_free_tenants_respects_grace_period(monkeypatch: pytest.MonkeyPatch) -> None:
+    cleanup = create_cleanup(monkeypatch, repo=FakeRepo([]), days=30, batch_size=10, grace_period_days=45)
+
+    monkeypatch.setattr(cleanup_module.dify_config, "BILLING_ENABLED", True)
+    now = datetime.datetime.now(datetime.UTC)
+    within_grace_ts = int((now - datetime.timedelta(days=10)).timestamp())
+    outside_grace_ts = int((now - datetime.timedelta(days=90)).timestamp())
+
+    def fake_bulk(_: list[str]) -> dict[str, SubscriptionPlan]:
+        return {
+            "recently_downgraded": plan_info("sandbox", within_grace_ts),
+            "long_sandbox": plan_info("sandbox", outside_grace_ts),
+        }
+
+    monkeypatch.setattr(cleanup_module.BillingService, "get_plan_bulk_with_cache", staticmethod(fake_bulk))
+
+    free = cleanup._filter_free_tenants({"recently_downgraded", "long_sandbox"})
+
+    assert free == {"long_sandbox"}
+
+
+def test_filter_free_tenants_skips_cleanup_whitelist(monkeypatch: pytest.MonkeyPatch) -> None:
+    cleanup = create_cleanup(
+        monkeypatch,
+        repo=FakeRepo([]),
+        days=30,
+        batch_size=10,
+        whitelist={"tenant_whitelist"},
+    )
+
+    monkeypatch.setattr(cleanup_module.dify_config, "BILLING_ENABLED", True)
+    monkeypatch.setattr(
+        cleanup_module.BillingService,
+        "get_plan_bulk_with_cache",
+        staticmethod(
+            lambda tenant_ids: {
+                tenant_id: (plan_info("team", -1) if tenant_id == "t_paid" else plan_info("sandbox", -1))
+                for tenant_id in tenant_ids
+            }
+        ),
+    )
+
+    tenants = {"tenant_whitelist", "tenant_regular"}
+    free = cleanup._filter_free_tenants(tenants)
+
+    assert free == {"tenant_regular"}
+
+
+def test_filter_free_tenants_bulk_failure(monkeypatch: pytest.MonkeyPatch) -> None:
+    cleanup = create_cleanup(monkeypatch, repo=FakeRepo([]), days=30, batch_size=10)
+
+    monkeypatch.setattr(cleanup_module.dify_config, "BILLING_ENABLED", True)
+    monkeypatch.setattr(
+        cleanup_module.BillingService,
+        "get_plan_bulk_with_cache",
+        staticmethod(lambda tenant_ids: (_ for _ in ()).throw(RuntimeError("boom"))),
+    )
+
+    free = cleanup._filter_free_tenants({"t1", "t2"})
+
+    assert free == set()
+
+
+def test_run_deletes_only_free_tenants(monkeypatch: pytest.MonkeyPatch) -> None:
+    cutoff = datetime.datetime.now()
+    repo = FakeRepo(
+        batches=[
+            [
+                FakeRun("run-free", "t_free", cutoff),
+                FakeRun("run-paid", "t_paid", cutoff),
+            ]
+        ]
+    )
+    cleanup = create_cleanup(monkeypatch, repo=repo, days=30, batch_size=10)
+
+    monkeypatch.setattr(cleanup_module.dify_config, "BILLING_ENABLED", True)
+    monkeypatch.setattr(
+        cleanup_module.BillingService,
+        "get_plan_bulk_with_cache",
+        staticmethod(
+            lambda tenant_ids: {
+                tenant_id: (plan_info("team", -1) if tenant_id == "t_paid" else plan_info("sandbox", -1))
+                for tenant_id in tenant_ids
+            }
+        ),
+    )
+
+    cleanup.run()
+
+    assert repo.deleted == [["run-free"]]
+
+
+def test_run_skips_when_no_free_tenants(monkeypatch: pytest.MonkeyPatch) -> None:
+    cutoff = datetime.datetime.now()
+    repo = FakeRepo(batches=[[FakeRun("run-paid", "t_paid", cutoff)]])
+    cleanup = create_cleanup(monkeypatch, repo=repo, days=30, batch_size=10)
+
+    monkeypatch.setattr(cleanup_module.dify_config, "BILLING_ENABLED", True)
+    monkeypatch.setattr(
+        cleanup_module.BillingService,
+        "get_plan_bulk_with_cache",
+        staticmethod(lambda tenant_ids: {tenant_id: plan_info("team", 1893456000) for tenant_id in tenant_ids}),
+    )
+
+    cleanup.run()
+
+    assert repo.deleted == []
+
+
+def test_run_exits_on_empty_batch(monkeypatch: pytest.MonkeyPatch) -> None:
+    cleanup = create_cleanup(monkeypatch, repo=FakeRepo([]), days=30, batch_size=10)
+
+    cleanup.run()
+
+
+def test_run_dry_run_skips_deletions(monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None:
+    cutoff = datetime.datetime.now()
+    repo = FakeRepo(
+        batches=[[FakeRun("run-free", "t_free", cutoff)]],
+        count_result={
+            "runs": 0,
+            "node_executions": 2,
+            "offloads": 1,
+            "app_logs": 3,
+            "trigger_logs": 4,
+            "pauses": 5,
+            "pause_reasons": 6,
+        },
+    )
+    cleanup = create_cleanup(monkeypatch, repo=repo, days=30, batch_size=10, dry_run=True)
+
+    monkeypatch.setattr(cleanup_module.dify_config, "BILLING_ENABLED", False)
+
+    cleanup.run()
+
+    assert repo.deleted == []
+    assert repo.counted == [["run-free"]]
+    captured = capsys.readouterr().out
+    assert "Dry run mode enabled" in captured
+    assert "would delete 1 runs" in captured
+    assert "related records" in captured
+    assert "node_executions 2" in captured
+    assert "offloads 1" in captured
+    assert "app_logs 3" in captured
+    assert "trigger_logs 4" in captured
+    assert "pauses 5" in captured
+    assert "pause_reasons 6" in captured
+
+
+def test_between_sets_window_bounds(monkeypatch: pytest.MonkeyPatch) -> None:
+    start_from = datetime.datetime(2024, 5, 1, 0, 0, 0)
+    end_before = datetime.datetime(2024, 6, 1, 0, 0, 0)
+    cleanup = create_cleanup(
+        monkeypatch, repo=FakeRepo([]), days=30, batch_size=10, start_from=start_from, end_before=end_before
+    )
+
+    assert cleanup.window_start == start_from
+    assert cleanup.window_end == end_before
+
+
+def test_between_requires_both_boundaries(monkeypatch: pytest.MonkeyPatch) -> None:
+    with pytest.raises(ValueError):
+        create_cleanup(
+            monkeypatch, repo=FakeRepo([]), days=30, batch_size=10, start_from=datetime.datetime.now(), end_before=None
+        )
+    with pytest.raises(ValueError):
+        create_cleanup(
+            monkeypatch, repo=FakeRepo([]), days=30, batch_size=10, start_from=None, end_before=datetime.datetime.now()
+        )
+
+
+def test_between_requires_end_after_start(monkeypatch: pytest.MonkeyPatch) -> None:
+    start_from = datetime.datetime(2024, 6, 1, 0, 0, 0)
+    end_before = datetime.datetime(2024, 5, 1, 0, 0, 0)
+    with pytest.raises(ValueError):
+        create_cleanup(
+            monkeypatch, repo=FakeRepo([]), days=30, batch_size=10, start_from=start_from, end_before=end_before
+        )
--- a/api/uv.lock
+++ b/api/uv.lock
@@ -1731,7 +1731,7 @@ storage = [
    { name = "opendal", specifier = "~=0.46.0" },
    { name = "oss2", specifier = "==2.18.5" },
    { name = "supabase", specifier = "~=2.18.1" },
-    { name = "tos", specifier = "~=2.7.1" },
+    { name = "tos", specifier = "~=2.9.0" },
 ]
 tools = [
    { name = "cloudscraper", specifier = "~=1.2.71" },
@@ -6148,7 +6148,7 @@ wheels = [

 [[package]]
 name = "tos"
-version = "2.7.2"
+version = "2.9.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "crcmod" },
@@ -6156,8 +6156,9 @@ dependencies = [
    { name = "pytz" },
    { name = "requests" },
    { name = "six" },
+    { name = "wrapt" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/0c/01/f811af86f1f80d5f289be075c3b281e74bf3fe081cfbe5cfce44954d2c3a/tos-2.7.2.tar.gz", hash = "sha256:3c31257716785bca7b2cac51474ff32543cda94075a7b7aff70d769c15c7b7ed", size = 123407, upload-time = "2024-10-16T15:59:08.634Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/9a/b3/13451226f564f88d9db2323e9b7eabcced792a0ad5ee1e333751a7634257/tos-2.9.0.tar.gz", hash = "sha256:861cfc348e770f099f911cb96b2c41774ada6c9c51b7a89d97e0c426074dd99e", size = 157071, upload-time = "2026-01-06T04:13:08.921Z" }

 [[package]]
 name = "tqdm"
@@ -7146,31 +7147,31 @@ wheels = [

 [[package]]
 name = "wrapt"
-version = "1.17.3"
+version = "1.16.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547, upload-time = "2025-08-12T05:53:21.714Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/95/4c/063a912e20bcef7124e0df97282a8af3ff3e4b603ce84c481d6d7346be0a/wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d", size = 53972, upload-time = "2023-11-09T06:33:30.191Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/52/db/00e2a219213856074a213503fdac0511203dceefff26e1daa15250cc01a0/wrapt-1.17.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:273a736c4645e63ac582c60a56b0acb529ef07f78e08dc6bfadf6a46b19c0da7", size = 53482, upload-time = "2025-08-12T05:51:45.79Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/30/ca3c4a5eba478408572096fe9ce36e6e915994dd26a4e9e98b4f729c06d9/wrapt-1.17.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5531d911795e3f935a9c23eb1c8c03c211661a5060aab167065896bbf62a5f85", size = 38674, upload-time = "2025-08-12T05:51:34.629Z" },
-    { url = "https://files.pythonhosted.org/packages/31/25/3e8cc2c46b5329c5957cec959cb76a10718e1a513309c31399a4dad07eb3/wrapt-1.17.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0610b46293c59a3adbae3dee552b648b984176f8562ee0dba099a56cfbe4df1f", size = 38959, upload-time = "2025-08-12T05:51:56.074Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/8f/a32a99fc03e4b37e31b57cb9cefc65050ea08147a8ce12f288616b05ef54/wrapt-1.17.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b32888aad8b6e68f83a8fdccbf3165f5469702a7544472bdf41f582970ed3311", size = 82376, upload-time = "2025-08-12T05:52:32.134Z" },
-    { url = "https://files.pythonhosted.org/packages/31/57/4930cb8d9d70d59c27ee1332a318c20291749b4fba31f113c2f8ac49a72e/wrapt-1.17.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cccf4f81371f257440c88faed6b74f1053eef90807b77e31ca057b2db74edb1", size = 83604, upload-time = "2025-08-12T05:52:11.663Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/f3/1afd48de81d63dd66e01b263a6fbb86e1b5053b419b9b33d13e1f6d0f7d0/wrapt-1.17.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8a210b158a34164de8bb68b0e7780041a903d7b00c87e906fb69928bf7890d5", size = 82782, upload-time = "2025-08-12T05:52:12.626Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/d7/4ad5327612173b144998232f98a85bb24b60c352afb73bc48e3e0d2bdc4e/wrapt-1.17.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:79573c24a46ce11aab457b472efd8d125e5a51da2d1d24387666cd85f54c05b2", size = 82076, upload-time = "2025-08-12T05:52:33.168Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/59/e0adfc831674a65694f18ea6dc821f9fcb9ec82c2ce7e3d73a88ba2e8718/wrapt-1.17.3-cp311-cp311-win32.whl", hash = "sha256:c31eebe420a9a5d2887b13000b043ff6ca27c452a9a22fa71f35f118e8d4bf89", size = 36457, upload-time = "2025-08-12T05:53:03.936Z" },
-    { url = "https://files.pythonhosted.org/packages/83/88/16b7231ba49861b6f75fc309b11012ede4d6b0a9c90969d9e0db8d991aeb/wrapt-1.17.3-cp311-cp311-win_amd64.whl", hash = "sha256:0b1831115c97f0663cb77aa27d381237e73ad4f721391a9bfb2fe8bc25fa6e77", size = 38745, upload-time = "2025-08-12T05:53:02.885Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/1e/c4d4f3398ec073012c51d1c8d87f715f56765444e1a4b11e5180577b7e6e/wrapt-1.17.3-cp311-cp311-win_arm64.whl", hash = "sha256:5a7b3c1ee8265eb4c8f1b7d29943f195c00673f5ab60c192eba2d4a7eae5f46a", size = 36806, upload-time = "2025-08-12T05:52:53.368Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/41/cad1aba93e752f1f9268c77270da3c469883d56e2798e7df6240dcb2287b/wrapt-1.17.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0", size = 53998, upload-time = "2025-08-12T05:51:47.138Z" },
-    { url = "https://files.pythonhosted.org/packages/60/f8/096a7cc13097a1869fe44efe68dace40d2a16ecb853141394047f0780b96/wrapt-1.17.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba", size = 39020, upload-time = "2025-08-12T05:51:35.906Z" },
-    { url = "https://files.pythonhosted.org/packages/33/df/bdf864b8997aab4febb96a9ae5c124f700a5abd9b5e13d2a3214ec4be705/wrapt-1.17.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd", size = 39098, upload-time = "2025-08-12T05:51:57.474Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/81/5d931d78d0eb732b95dc3ddaeeb71c8bb572fb01356e9133916cd729ecdd/wrapt-1.17.3-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828", size = 88036, upload-time = "2025-08-12T05:52:34.784Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/38/2e1785df03b3d72d34fc6252d91d9d12dc27a5c89caef3335a1bbb8908ca/wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9", size = 88156, upload-time = "2025-08-12T05:52:13.599Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/8b/48cdb60fe0603e34e05cffda0b2a4adab81fd43718e11111a4b0100fd7c1/wrapt-1.17.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396", size = 87102, upload-time = "2025-08-12T05:52:14.56Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/51/d81abca783b58f40a154f1b2c56db1d2d9e0d04fa2d4224e357529f57a57/wrapt-1.17.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc", size = 87732, upload-time = "2025-08-12T05:52:36.165Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/b1/43b286ca1392a006d5336412d41663eeef1ad57485f3e52c767376ba7e5a/wrapt-1.17.3-cp312-cp312-win32.whl", hash = "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe", size = 36705, upload-time = "2025-08-12T05:53:07.123Z" },
-    { url = "https://files.pythonhosted.org/packages/28/de/49493f962bd3c586ab4b88066e967aa2e0703d6ef2c43aa28cb83bf7b507/wrapt-1.17.3-cp312-cp312-win_amd64.whl", hash = "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c", size = 38877, upload-time = "2025-08-12T05:53:05.436Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/48/0f7102fe9cb1e8a5a77f80d4f0956d62d97034bbe88d33e94699f99d181d/wrapt-1.17.3-cp312-cp312-win_arm64.whl", hash = "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6", size = 36885, upload-time = "2025-08-12T05:52:54.367Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload-time = "2025-08-12T05:53:20.674Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/03/c188ac517f402775b90d6f312955a5e53b866c964b32119f2ed76315697e/wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09", size = 37313, upload-time = "2023-11-09T06:31:52.168Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/16/ea627d7817394db04518f62934a5de59874b587b792300991b3c347ff5e0/wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d", size = 38164, upload-time = "2023-11-09T06:31:53.522Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/a7/f1212ba098f3de0fd244e2de0f8791ad2539c03bef6c05a9fcb03e45b089/wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389", size = 80890, upload-time = "2023-11-09T06:31:55.247Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/96/bb5e08b3d6db003c9ab219c487714c13a237ee7dcc572a555eaf1ce7dc82/wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060", size = 73118, upload-time = "2023-11-09T06:31:57.023Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/52/2da48b35193e39ac53cfb141467d9f259851522d0e8c87153f0ba4205fb1/wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1", size = 80746, upload-time = "2023-11-09T06:31:58.686Z" },
+    { url = "https://files.pythonhosted.org/packages/11/fb/18ec40265ab81c0e82a934de04596b6ce972c27ba2592c8b53d5585e6bcd/wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3", size = 85668, upload-time = "2023-11-09T06:31:59.992Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/ef/0ecb1fa23145560431b970418dce575cfaec555ab08617d82eb92afc7ccf/wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956", size = 78556, upload-time = "2023-11-09T06:32:01.942Z" },
+    { url = "https://files.pythonhosted.org/packages/25/62/cd284b2b747f175b5a96cbd8092b32e7369edab0644c45784871528eb852/wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d", size = 85712, upload-time = "2023-11-09T06:32:03.686Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/a7/47b7ff74fbadf81b696872d5ba504966591a3468f1bc86bca2f407baef68/wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362", size = 35327, upload-time = "2023-11-09T06:32:05.284Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/c3/0084351951d9579ae83a3d9e38c140371e4c6b038136909235079f2e6e78/wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89", size = 37523, upload-time = "2023-11-09T06:32:07.17Z" },
+    { url = "https://files.pythonhosted.org/packages/92/17/224132494c1e23521868cdd57cd1e903f3b6a7ba6996b7b8f077ff8ac7fe/wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b", size = 37614, upload-time = "2023-11-09T06:32:08.859Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/d7/cfcd73e8f4858079ac59d9db1ec5a1349bc486ae8e9ba55698cc1f4a1dff/wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36", size = 38316, upload-time = "2023-11-09T06:32:10.719Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/79/5ff0a5c54bda5aec75b36453d06be4f83d5cd4932cc84b7cb2b52cee23e2/wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73", size = 86322, upload-time = "2023-11-09T06:32:12.592Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/81/e799bf5d419f422d8712108837c1d9bf6ebe3cb2a81ad94413449543a923/wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809", size = 79055, upload-time = "2023-11-09T06:32:14.394Z" },
+    { url = "https://files.pythonhosted.org/packages/62/62/30ca2405de6a20448ee557ab2cd61ab9c5900be7cbd18a2639db595f0b98/wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b", size = 87291, upload-time = "2023-11-09T06:32:16.201Z" },
+    { url = "https://files.pythonhosted.org/packages/49/4e/5d2f6d7b57fc9956bf06e944eb00463551f7d52fc73ca35cfc4c2cdb7aed/wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81", size = 90374, upload-time = "2023-11-09T06:32:18.052Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/9b/c2c21b44ff5b9bf14a83252a8b973fb84923764ff63db3e6dfc3895cf2e0/wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9", size = 83896, upload-time = "2023-11-09T06:32:19.533Z" },
+    { url = "https://files.pythonhosted.org/packages/14/26/93a9fa02c6f257df54d7570dfe8011995138118d11939a4ecd82cb849613/wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c", size = 91738, upload-time = "2023-11-09T06:32:20.989Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/5b/4660897233eb2c8c4de3dc7cefed114c61bacb3c28327e64150dc44ee2f6/wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc", size = 35568, upload-time = "2023-11-09T06:32:22.715Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/cc/8297f9658506b224aa4bd71906447dea6bb0ba629861a758c28f67428b91/wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8", size = 37653, upload-time = "2023-11-09T06:32:24.533Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/21/abdedb4cdf6ff41ebf01a74087740a709e2edb146490e4d9beea054b0b7a/wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1", size = 23362, upload-time = "2023-11-09T06:33:28.271Z" },
 ]

 [[package]]
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -1478,6 +1478,7 @@ ENABLE_CLEAN_UNUSED_DATASETS_TASK=false
 ENABLE_CREATE_TIDB_SERVERLESS_TASK=false
 ENABLE_UPDATE_TIDB_SERVERLESS_STATUS_TASK=false
 ENABLE_CLEAN_MESSAGES=false
+ENABLE_WORKFLOW_RUN_CLEANUP_TASK=false
 ENABLE_MAIL_CLEAN_DOCUMENT_NOTIFY_TASK=false
 ENABLE_DATASETS_QUEUE_MONITOR=false
 ENABLE_CHECK_UPGRADABLE_PLUGIN_TASK=true
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -662,6 +662,7 @@ x-shared-env: &shared-api-worker-env
  ENABLE_CREATE_TIDB_SERVERLESS_TASK: ${ENABLE_CREATE_TIDB_SERVERLESS_TASK:-false}
  ENABLE_UPDATE_TIDB_SERVERLESS_STATUS_TASK: ${ENABLE_UPDATE_TIDB_SERVERLESS_STATUS_TASK:-false}
  ENABLE_CLEAN_MESSAGES: ${ENABLE_CLEAN_MESSAGES:-false}
+  ENABLE_WORKFLOW_RUN_CLEANUP_TASK: ${ENABLE_WORKFLOW_RUN_CLEANUP_TASK:-false}
  ENABLE_MAIL_CLEAN_DOCUMENT_NOTIFY_TASK: ${ENABLE_MAIL_CLEAN_DOCUMENT_NOTIFY_TASK:-false}
  ENABLE_DATASETS_QUEUE_MONITOR: ${ENABLE_DATASETS_QUEUE_MONITOR:-false}
  ENABLE_CHECK_UPGRADABLE_PLUGIN_TASK: ${ENABLE_CHECK_UPGRADABLE_PLUGIN_TASK:-true}
--- a/web/.env.example
+++ b/web/.env.example
@@ -31,6 +31,8 @@ NEXT_PUBLIC_UPLOAD_IMAGE_AS_ICON=false

 # The timeout for the text generation in millisecond
 NEXT_PUBLIC_TEXT_GENERATION_TIMEOUT_MS=60000
+# Used by web/docker/entrypoint.sh to overwrite/export NEXT_PUBLIC_TEXT_GENERATION_TIMEOUT_MS at container startup (Docker only)
+TEXT_GENERATION_TIMEOUT_MS=60000

 # CSP https://developer.mozilla.org/en-US/docs/Web/HTTP/CSP
 NEXT_PUBLIC_CSP_WHITELIST=
--- a/web/.nvmrc
+++ b/web/.nvmrc
@@ -0,0 +1 @@
+22.21.1
--- a/web/.storybook/preview.tsx
+++ b/web/.storybook/preview.tsx
@@ -1,8 +1,10 @@
 import type { Preview } from '@storybook/react'
+import type { Resource } from 'i18next'
 import { withThemeByDataAttribute } from '@storybook/addon-themes'
 import { QueryClient, QueryClientProvider } from '@tanstack/react-query'
 import { ToastProvider } from '../app/components/base/toast'
-import I18N from '../app/components/i18n'
+import { I18nClientProvider as I18N } from '../app/components/provider/i18n'
+import commonEnUS from '../i18n/en-US/common.json'

 import '../app/styles/globals.css'
 import '../app/styles/markdown.scss'
@@ -16,6 +18,14 @@ const queryClient = new QueryClient({
  },
 })

+const storyResources: Resource = {
+  'en-US': {
+    // Preload the most common namespace to avoid missing keys during initial render;
+    // other namespaces will be loaded on demand via resourcesToBackend.
+    common: commonEnUS as unknown as Record<string, unknown>,
+  },
+}
+
 export const decorators = [
  withThemeByDataAttribute({
    themes: {
@@ -28,7 +38,7 @@ export const decorators = [
  (Story) => {
    return (
      <QueryClientProvider client={queryClient}>
-        <I18N locale="en-US">
+        <I18N locale="en-US" resource={storyResources}>
          <ToastProvider>
            <Story />
          </ToastProvider>
--- a/web/Dockerfile
+++ b/web/Dockerfile
@@ -1,5 +1,5 @@
 # base image
-FROM node:22-alpine3.21 AS base
+FROM node:22.21.1-alpine3.23 AS base
 LABEL maintainer="takatost@gmail.com"

 # if you located in China, you can use aliyun mirror to speed up
--- a/web/README.md
+++ b/web/README.md
@@ -11,6 +11,16 @@ Before starting the web frontend service, please make sure the following environ
 - [Node.js](https://nodejs.org) >= v22.11.x
 - [pnpm](https://pnpm.io) v10.x

+> [!TIP]
+> It is recommended to install and enable Corepack to manage package manager versions automatically:
+>
+> ```bash
+> npm install -g corepack
+> corepack enable
+> ```
+>
+> Learn more: [Corepack](https://github.com/nodejs/corepack#readme)
+
 First, install the dependencies:

 ```bash
--- a/web/app/(commonLayout)/plugins/page.tsx
+++ b/web/app/(commonLayout)/plugins/page.tsx
@@ -2,11 +2,11 @@ import Marketplace from '@/app/components/plugins/marketplace'
 import PluginPage from '@/app/components/plugins/plugin-page'
 import PluginsPanel from '@/app/components/plugins/plugin-page/plugins-panel'

-const PluginList = async () => {
+const PluginList = () => {
  return (
    <PluginPage
      plugins={<PluginsPanel />}
-      marketplace={<Marketplace pluginTypeSwitchClassName="top-[60px]" showSearchParams={false} />}
+      marketplace={<Marketplace pluginTypeSwitchClassName="top-[60px]" />}
    />
  )
 }
--- a/web/app/components/app-sidebar/app-info.tsx
+++ b/web/app/components/app-sidebar/app-info.tsx
@@ -26,6 +26,7 @@ import { NEED_REFRESH_APP_LIST_KEY } from '@/config'
 import { useAppContext } from '@/context/app-context'
 import { useProviderContext } from '@/context/provider-context'
 import { copyApp, deleteApp, exportAppConfig, updateAppInfo } from '@/service/apps'
+import { useInvalidateAppList } from '@/service/use-apps'
 import { fetchWorkflowDraft } from '@/service/workflow'
 import { AppModeEnum } from '@/types/app'
 import { getRedirection } from '@/utils/app-redirection'
@@ -66,6 +67,7 @@ const AppInfo = ({ expand, onlyShowDetail = false, openState = false, onDetailEx
  const { onPlanInfoChanged } = useProviderContext()
  const appDetail = useAppStore(state => state.appDetail)
  const setAppDetail = useAppStore(state => state.setAppDetail)
+  const invalidateAppList = useInvalidateAppList()
  const [open, setOpen] = useState(openState)
  const [showEditModal, setShowEditModal] = useState(false)
  const [showDuplicateModal, setShowDuplicateModal] = useState(false)
@@ -191,6 +193,7 @@ const AppInfo = ({ expand, onlyShowDetail = false, openState = false, onDetailEx
    try {
      await deleteApp(appDetail.id)
      notify({ type: 'success', message: t('appDeleted', { ns: 'app' }) })
+      invalidateAppList()
      onPlanInfoChanged()
      setAppDetail()
      replace('/apps')
@@ -202,7 +205,7 @@ const AppInfo = ({ expand, onlyShowDetail = false, openState = false, onDetailEx
      })
    }
    setShowConfirmDelete(false)
-  }, [appDetail, notify, onPlanInfoChanged, replace, setAppDetail, t])
+  }, [appDetail, invalidateAppList, notify, onPlanInfoChanged, replace, setAppDetail, t])

  const { isCurrentWorkspaceEditor } = useAppContext()

--- a/web/app/components/app/configuration/config-var/config-modal/index.tsx
+++ b/web/app/components/app/configuration/config-var/config-modal/index.tsx
@@ -83,7 +83,7 @@ const ConfigModal: FC<IConfigModalProps> = ({
    if (!isJsonObject || !tempPayload.json_schema)
      return ''
    try {
-      return JSON.stringify(JSON.parse(tempPayload.json_schema), null, 2)
+      return tempPayload.json_schema
    }
    catch {
      return ''
--- a/web/app/components/app/log/list.spec.tsx
+++ b/web/app/components/app/log/list.spec.tsx
@@ -0,0 +1,228 @@
+/**
+ * Tests for race condition prevention logic in chat message loading.
+ * These tests verify the core algorithms used in fetchData and loadMoreMessages
+ * to prevent race conditions, infinite loops, and stale state issues.
+ * See GitHub issue #30259 for context.
+ */
+
+// Test the race condition prevention logic in isolation
+describe('Chat Message Loading Race Condition Prevention', () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+    vi.useFakeTimers()
+  })
+
+  afterEach(() => {
+    vi.useRealTimers()
+  })
+
+  describe('Request Deduplication', () => {
+    it('should deduplicate messages with same IDs when merging responses', async () => {
+      // Simulate the deduplication logic used in setAllChatItems
+      const existingItems = [
+        { id: 'msg-1', isAnswer: false },
+        { id: 'msg-2', isAnswer: true },
+      ]
+      const newItems = [
+        { id: 'msg-2', isAnswer: true }, // duplicate
+        { id: 'msg-3', isAnswer: false }, // new
+      ]
+
+      const existingIds = new Set(existingItems.map(item => item.id))
+      const uniqueNewItems = newItems.filter(item => !existingIds.has(item.id))
+      const mergedItems = [...uniqueNewItems, ...existingItems]
+
+      expect(uniqueNewItems).toHaveLength(1)
+      expect(uniqueNewItems[0].id).toBe('msg-3')
+      expect(mergedItems).toHaveLength(3)
+    })
+  })
+
+  describe('Retry Counter Logic', () => {
+    const MAX_RETRY_COUNT = 3
+
+    it('should increment retry counter when no unique items found', () => {
+      const state = { retryCount: 0 }
+      const prevItemsLength = 5
+
+      // Simulate the retry logic from loadMoreMessages
+      const uniqueNewItemsLength = 0
+
+      if (uniqueNewItemsLength === 0) {
+        if (state.retryCount < MAX_RETRY_COUNT && prevItemsLength > 1) {
+          state.retryCount++
+        }
+        else {
+          state.retryCount = 0
+        }
+      }
+
+      expect(state.retryCount).toBe(1)
+    })
+
+    it('should reset retry counter after MAX_RETRY_COUNT attempts', () => {
+      const state = { retryCount: MAX_RETRY_COUNT }
+      const prevItemsLength = 5
+      const uniqueNewItemsLength = 0
+
+      if (uniqueNewItemsLength === 0) {
+        if (state.retryCount < MAX_RETRY_COUNT && prevItemsLength > 1) {
+          state.retryCount++
+        }
+        else {
+          state.retryCount = 0
+        }
+      }
+
+      expect(state.retryCount).toBe(0)
+    })
+
+    it('should reset retry counter when unique items are found', () => {
+      const state = { retryCount: 2 }
+
+      // Simulate finding unique items (length > 0)
+      const processRetry = (uniqueCount: number) => {
+        if (uniqueCount === 0) {
+          state.retryCount++
+        }
+        else {
+          state.retryCount = 0
+        }
+      }
+
+      processRetry(3) // Found 3 unique items
+
+      expect(state.retryCount).toBe(0)
+    })
+  })
+
+  describe('Throttling Logic', () => {
+    const SCROLL_DEBOUNCE_MS = 200
+
+    it('should throttle requests within debounce window', () => {
+      const state = { lastLoadTime: 0 }
+      const results: boolean[] = []
+
+      const tryRequest = (now: number): boolean => {
+        if (now - state.lastLoadTime >= SCROLL_DEBOUNCE_MS) {
+          state.lastLoadTime = now
+          return true
+        }
+        return false
+      }
+
+      // First request - should pass
+      results.push(tryRequest(1000))
+      // Second request within debounce - should be blocked
+      results.push(tryRequest(1100))
+      // Third request after debounce - should pass
+      results.push(tryRequest(1300))
+
+      expect(results).toEqual([true, false, true])
+    })
+  })
+
+  describe('AbortController Cancellation', () => {
+    it('should abort previous request when new request starts', () => {
+      const state: { controller: AbortController | null } = { controller: null }
+      const abortedSignals: boolean[] = []
+
+      // First request
+      const controller1 = new AbortController()
+      state.controller = controller1
+
+      // Second request - should abort first
+      if (state.controller) {
+        state.controller.abort()
+        abortedSignals.push(state.controller.signal.aborted)
+      }
+      const controller2 = new AbortController()
+      state.controller = controller2
+
+      expect(abortedSignals).toEqual([true])
+      expect(controller1.signal.aborted).toBe(true)
+      expect(controller2.signal.aborted).toBe(false)
+    })
+  })
+
+  describe('Stale Response Detection', () => {
+    it('should ignore responses from outdated requests', () => {
+      const state = { requestId: 0 }
+      const processedResponses: number[] = []
+
+      // Simulate concurrent requests - each gets its own captured ID
+      const request1Id = ++state.requestId
+      const request2Id = ++state.requestId
+
+      // Request 2 completes first (current requestId is 2)
+      if (request2Id === state.requestId) {
+        processedResponses.push(request2Id)
+      }
+
+      // Request 1 completes later (stale - requestId is still 2)
+      if (request1Id === state.requestId) {
+        processedResponses.push(request1Id)
+      }
+
+      expect(processedResponses).toEqual([2])
+      expect(processedResponses).not.toContain(1)
+    })
+  })
+
+  describe('Pagination Anchor Management', () => {
+    it('should track oldest answer ID for pagination', () => {
+      let oldestAnswerIdRef: string | undefined
+
+      const chatItems = [
+        { id: 'question-1', isAnswer: false },
+        { id: 'answer-1', isAnswer: true },
+        { id: 'question-2', isAnswer: false },
+        { id: 'answer-2', isAnswer: true },
+      ]
+
+      // Update pagination anchor with oldest answer ID
+      const answerItems = chatItems.filter(item => item.isAnswer)
+      const oldestAnswer = answerItems[answerItems.length - 1]
+      if (oldestAnswer?.id) {
+        oldestAnswerIdRef = oldestAnswer.id
+      }
+
+      expect(oldestAnswerIdRef).toBe('answer-2')
+    })
+
+    it('should use pagination anchor in subsequent requests', () => {
+      const oldestAnswerIdRef = 'answer-123'
+      const params: { conversation_id: string, limit: number, first_id?: string } = {
+        conversation_id: 'conv-1',
+        limit: 10,
+      }
+
+      if (oldestAnswerIdRef) {
+        params.first_id = oldestAnswerIdRef
+      }
+
+      expect(params.first_id).toBe('answer-123')
+    })
+  })
+})
+
+describe('Functional State Update Pattern', () => {
+  it('should use functional update to avoid stale closures', () => {
+    // Simulate the functional update pattern used in setAllChatItems
+    let state = [{ id: '1' }, { id: '2' }]
+
+    const newItems = [{ id: '3' }, { id: '2' }] // id '2' is duplicate
+
+    // Functional update pattern
+    const updater = (prevItems: { id: string }[]) => {
+      const existingIds = new Set(prevItems.map(item => item.id))
+      const uniqueNewItems = newItems.filter(item => !existingIds.has(item.id))
+      return [...uniqueNewItems, ...prevItems]
+    }
+
+    state = updater(state)
+
+    expect(state).toHaveLength(3)
+    expect(state.map(i => i.id)).toEqual(['3', '1', '2'])
+  })
+})
--- a/web/app/components/app/log/list.tsx
+++ b/web/app/components/app/log/list.tsx
@@ -209,7 +209,6 @@ type IDetailPanel = {

 function DetailPanel({ detail, onFeedback }: IDetailPanel) {
  const MIN_ITEMS_FOR_SCROLL_LOADING = 8
-  const SCROLL_THRESHOLD_PX = 50
  const SCROLL_DEBOUNCE_MS = 200
  const { userProfile: { timezone } } = useAppContext()
  const { formatTime } = useTimestamp()
@@ -228,69 +227,103 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
  const [hasMore, setHasMore] = useState(true)
  const [varValues, setVarValues] = useState<Record<string, string>>({})
  const isLoadingRef = useRef(false)
+  const abortControllerRef = useRef<AbortController | null>(null)
+  const requestIdRef = useRef(0)
+  const lastLoadTimeRef = useRef(0)
+  const retryCountRef = useRef(0)
+  const oldestAnswerIdRef = useRef<string | undefined>(undefined)
+  const MAX_RETRY_COUNT = 3

  const [allChatItems, setAllChatItems] = useState<IChatItem[]>([])
  const [chatItemTree, setChatItemTree] = useState<ChatItemInTree[]>([])
  const [threadChatItems, setThreadChatItems] = useState<IChatItem[]>([])

  const fetchData = useCallback(async () => {
-    if (isLoadingRef.current)
+    if (isLoadingRef.current || !hasMore)
      return

+    // Cancel any in-flight request
+    if (abortControllerRef.current) {
+      abortControllerRef.current.abort()
+    }
+
+    const controller = new AbortController()
+    abortControllerRef.current = controller
+    const currentRequestId = ++requestIdRef.current
+
    try {
      isLoadingRef.current = true

-      if (!hasMore)
-        return
-
      const params: ChatMessagesRequest = {
        conversation_id: detail.id,
        limit: 10,
      }
-      // Use the oldest answer item ID for pagination
-      const answerItems = allChatItems.filter(item => item.isAnswer)
-      const oldestAnswerItem = answerItems[answerItems.length - 1]
-      if (oldestAnswerItem?.id)
-        params.first_id = oldestAnswerItem.id
+      // Use ref for pagination anchor to avoid stale closure issues
+      if (oldestAnswerIdRef.current)
+        params.first_id = oldestAnswerIdRef.current
+
      const messageRes = await fetchChatMessages({
        url: `/apps/${appDetail?.id}/chat-messages`,
        params,
      })
+
+      // Ignore stale responses
+      if (currentRequestId !== requestIdRef.current || controller.signal.aborted)
+        return
      if (messageRes.data.length > 0) {
        const varValues = messageRes.data.at(-1)!.inputs
        setVarValues(varValues)
      }
      setHasMore(messageRes.has_more)

-      const newAllChatItems = [
-        ...getFormattedChatList(messageRes.data, detail.id, timezone!, t('dateTimeFormat', { ns: 'appLog' }) as string),
-        ...allChatItems,
-      ]
-      setAllChatItems(newAllChatItems)
+      const newItems = getFormattedChatList(messageRes.data, detail.id, timezone!, t('dateTimeFormat', { ns: 'appLog' }) as string)

-      let tree = buildChatItemTree(newAllChatItems)
-      if (messageRes.has_more === false && detail?.model_config?.configs?.introduction) {
-        tree = [{
-          id: 'introduction',
-          isAnswer: true,
-          isOpeningStatement: true,
-          content: detail?.model_config?.configs?.introduction ?? 'hello',
-          feedbackDisabled: true,
-          children: tree,
-        }]
-      }
-      setChatItemTree(tree)
-
-      const lastMessageId = newAllChatItems.length > 0 ? newAllChatItems[newAllChatItems.length - 1].id : undefined
-      setThreadChatItems(getThreadMessages(tree, lastMessageId))
+      // Use functional update to avoid stale state issues
+      setAllChatItems((prevItems: IChatItem[]) => {
+        const existingIds = new Set(prevItems.map(item => item.id))
+        const uniqueNewItems = newItems.filter(item => !existingIds.has(item.id))
+        return [...uniqueNewItems, ...prevItems]
+      })
    }
-    catch (err) {
+    catch (err: unknown) {
+      if (err instanceof Error && err.name === 'AbortError')
+        return
      console.error('fetchData execution failed:', err)
    }
    finally {
      isLoadingRef.current = false
+      if (abortControllerRef.current === controller)
+        abortControllerRef.current = null
    }
-  }, [allChatItems, detail.id, hasMore, timezone, t, appDetail, detail?.model_config?.configs?.introduction])
+  }, [detail.id, hasMore, timezone, t, appDetail, detail?.model_config?.configs?.introduction])
+
+  // Derive chatItemTree, threadChatItems, and oldestAnswerIdRef from allChatItems
+  useEffect(() => {
+    if (allChatItems.length === 0)
+      return
+
+    let tree = buildChatItemTree(allChatItems)
+    if (!hasMore && detail?.model_config?.configs?.introduction) {
+      tree = [{
+        id: 'introduction',
+        isAnswer: true,
+        isOpeningStatement: true,
+        content: detail?.model_config?.configs?.introduction ?? 'hello',
+        feedbackDisabled: true,
+        children: tree,
+      }]
+    }
+    setChatItemTree(tree)
+
+    const lastMessageId = allChatItems.length > 0 ? allChatItems[allChatItems.length - 1].id : undefined
+    setThreadChatItems(getThreadMessages(tree, lastMessageId))
+
+    // Update pagination anchor ref with the oldest answer ID
+    const answerItems = allChatItems.filter(item => item.isAnswer)
+    const oldestAnswer = answerItems[answerItems.length - 1]
+    if (oldestAnswer?.id)
+      oldestAnswerIdRef.current = oldestAnswer.id
+  }, [allChatItems, hasMore, detail?.model_config?.configs?.introduction])

  const switchSibling = useCallback((siblingMessageId: string) => {
    const newThreadChatItems = getThreadMessages(chatItemTree, siblingMessageId)
@@ -397,6 +430,12 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
    if (isLoading || !hasMore || !appDetail?.id || !detail.id)
      return

+    // Throttle using ref to persist across re-renders
+    const now = Date.now()
+    if (now - lastLoadTimeRef.current < SCROLL_DEBOUNCE_MS)
+      return
+    lastLoadTimeRef.current = now
+
    setIsLoading(true)

    try {
@@ -405,15 +444,9 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
        limit: 10,
      }

-      // Use the earliest response item as the first_id
-      const answerItems = allChatItems.filter(item => item.isAnswer)
-      const oldestAnswerItem = answerItems[answerItems.length - 1]
-      if (oldestAnswerItem?.id) {
-        params.first_id = oldestAnswerItem.id
-      }
-      else if (allChatItems.length > 0 && allChatItems[0]?.id) {
-        const firstId = allChatItems[0].id.replace('question-', '').replace('answer-', '')
-        params.first_id = firstId
+      // Use ref for pagination anchor to avoid stale closure issues
+      if (oldestAnswerIdRef.current) {
+        params.first_id = oldestAnswerIdRef.current
      }

      const messageRes = await fetchChatMessages({
@@ -423,6 +456,7 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {

      if (!messageRes.data || messageRes.data.length === 0) {
        setHasMore(false)
+        retryCountRef.current = 0
        return
      }

@@ -440,91 +474,36 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
        t('dateTimeFormat', { ns: 'appLog' }) as string,
      )

-      // Check for duplicate messages
-      const existingIds = new Set(allChatItems.map(item => item.id))
-      const uniqueNewItems = newItems.filter(item => !existingIds.has(item.id))
+      // Use functional update to get latest state and avoid stale closures
+      setAllChatItems((prevItems: IChatItem[]) => {
+        const existingIds = new Set(prevItems.map(item => item.id))
+        const uniqueNewItems = newItems.filter(item => !existingIds.has(item.id))

-      if (uniqueNewItems.length === 0) {
-        if (allChatItems.length > 1) {
-          const nextId = allChatItems[1].id.replace('question-', '').replace('answer-', '')
-
-          const retryParams = {
-            ...params,
-            first_id: nextId,
+        // If no unique items and we haven't exceeded retry limit, signal retry needed
+        if (uniqueNewItems.length === 0) {
+          if (retryCountRef.current < MAX_RETRY_COUNT && prevItems.length > 1) {
+            retryCountRef.current++
+            return prevItems
          }
-
-          const retryRes = await fetchChatMessages({
-            url: `/apps/${appDetail.id}/chat-messages`,
-            params: retryParams,
-          })
-
-          if (retryRes.data && retryRes.data.length > 0) {
-            const retryItems = getFormattedChatList(
-              retryRes.data,
-              detail.id,
-              timezone!,
-              t('dateTimeFormat', { ns: 'appLog' }) as string,
-            )
-
-            const retryUniqueItems = retryItems.filter(item => !existingIds.has(item.id))
-            if (retryUniqueItems.length > 0) {
-              const newAllChatItems = [
-                ...retryUniqueItems,
-                ...allChatItems,
-              ]
-
-              setAllChatItems(newAllChatItems)
-
-              let tree = buildChatItemTree(newAllChatItems)
-              if (retryRes.has_more === false && detail?.model_config?.configs?.introduction) {
-                tree = [{
-                  id: 'introduction',
-                  isAnswer: true,
-                  isOpeningStatement: true,
-                  content: detail?.model_config?.configs?.introduction ?? 'hello',
-                  feedbackDisabled: true,
-                  children: tree,
-                }]
-              }
-              setChatItemTree(tree)
-              setHasMore(retryRes.has_more)
-              setThreadChatItems(getThreadMessages(tree, newAllChatItems.at(-1)?.id))
-              return
-            }
+          else {
+            retryCountRef.current = 0
+            return prevItems
          }
        }
-      }

-      const newAllChatItems = [
-        ...uniqueNewItems,
-        ...allChatItems,
-      ]
-
-      setAllChatItems(newAllChatItems)
-
-      let tree = buildChatItemTree(newAllChatItems)
-      if (messageRes.has_more === false && detail?.model_config?.configs?.introduction) {
-        tree = [{
-          id: 'introduction',
-          isAnswer: true,
-          isOpeningStatement: true,
-          content: detail?.model_config?.configs?.introduction ?? 'hello',
-          feedbackDisabled: true,
-          children: tree,
-        }]
-      }
-      setChatItemTree(tree)
-
-      setThreadChatItems(getThreadMessages(tree, newAllChatItems.at(-1)?.id))
+        retryCountRef.current = 0
+        return [...uniqueNewItems, ...prevItems]
+      })
    }
    catch (error) {
      console.error(error)
      setHasMore(false)
+      retryCountRef.current = 0
    }
    finally {
      setIsLoading(false)
    }
-  }, [allChatItems, detail.id, hasMore, isLoading, timezone, t, appDetail])
+  }, [detail.id, hasMore, isLoading, timezone, t, appDetail, detail?.model_config?.configs?.introduction])

  useEffect(() => {
    const scrollableDiv = document.getElementById('scrollableDiv')
@@ -556,24 +535,11 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
    if (!scrollContainer)
      return

-    let lastLoadTime = 0
-    const throttleDelay = 200
-
    const handleScroll = () => {
      const currentScrollTop = scrollContainer!.scrollTop
-      const scrollHeight = scrollContainer!.scrollHeight
-      const clientHeight = scrollContainer!.clientHeight
+      const isNearTop = currentScrollTop < 30

-      const distanceFromTop = currentScrollTop
-      const distanceFromBottom = scrollHeight - currentScrollTop - clientHeight
-
-      const now = Date.now()
-
-      const isNearTop = distanceFromTop < 30
-      // eslint-disable-next-line sonarjs/no-unused-vars
-      const _distanceFromBottom = distanceFromBottom < 30
-      if (isNearTop && hasMore && !isLoading && (now - lastLoadTime > throttleDelay)) {
-        lastLoadTime = now
+      if (isNearTop && hasMore && !isLoading) {
        loadMoreMessages()
      }
    }
@@ -619,36 +585,6 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
    return () => cancelAnimationFrame(raf)
  }, [])

-  // Add scroll listener to ensure loading is triggered
-  useEffect(() => {
-    if (threadChatItems.length >= MIN_ITEMS_FOR_SCROLL_LOADING && hasMore) {
-      const scrollableDiv = document.getElementById('scrollableDiv')
-
-      if (scrollableDiv) {
-        let loadingTimeout: NodeJS.Timeout | null = null
-
-        const handleScroll = () => {
-          const { scrollTop } = scrollableDiv
-
-          // Trigger loading when scrolling near the top
-          if (scrollTop < SCROLL_THRESHOLD_PX && !isLoadingRef.current) {
-            if (loadingTimeout)
-              clearTimeout(loadingTimeout)
-
-            loadingTimeout = setTimeout(fetchData, SCROLL_DEBOUNCE_MS) // 200ms debounce
-          }
-        }
-
-        scrollableDiv.addEventListener('scroll', handleScroll)
-        return () => {
-          scrollableDiv.removeEventListener('scroll', handleScroll)
-          if (loadingTimeout)
-            clearTimeout(loadingTimeout)
-        }
-      }
-    }
-  }, [threadChatItems.length, hasMore, fetchData])
-
  return (
    <div ref={ref} className="flex h-full flex-col rounded-xl border-[0.5px] border-components-panel-border">
      {/* Panel Header */}
--- a/web/app/components/apps/list.spec.tsx
+++ b/web/app/components/apps/list.spec.tsx
@@ -10,6 +10,7 @@ const mockReplace = vi.fn()
 const mockRouter = { replace: mockReplace }
 vi.mock('next/navigation', () => ({
  useRouter: () => mockRouter,
+  useSearchParams: () => new URLSearchParams(''),
 }))

 // Mock app context
--- a/web/app/components/apps/list.tsx
+++ b/web/app/components/apps/list.tsx
@@ -12,6 +12,7 @@ import { useDebounceFn } from 'ahooks'
 import dynamic from 'next/dynamic'
 import {
  useRouter,
+  useSearchParams,
 } from 'next/navigation'
 import { parseAsString, useQueryState } from 'nuqs'
 import { useCallback, useEffect, useRef, useState } from 'react'
@@ -28,6 +29,7 @@ import { CheckModal } from '@/hooks/use-pay'
 import { useInfiniteAppList } from '@/service/use-apps'
 import { AppModeEnum } from '@/types/app'
 import { cn } from '@/utils/classnames'
+import { isServer } from '@/utils/client'
 import AppCard from './app-card'
 import { AppCardSkeleton } from './app-card-skeleton'
 import Empty from './empty'
@@ -36,6 +38,16 @@ import useAppsQueryState from './hooks/use-apps-query-state'
 import { useDSLDragDrop } from './hooks/use-dsl-drag-drop'
 import NewAppCard from './new-app-card'

+// Define valid tabs at module scope to avoid re-creation on each render and stale closures
+const validTabs = new Set<string | AppModeEnum>([
+  'all',
+  AppModeEnum.WORKFLOW,
+  AppModeEnum.ADVANCED_CHAT,
+  AppModeEnum.CHAT,
+  AppModeEnum.AGENT_CHAT,
+  AppModeEnum.COMPLETION,
+])
+
 const TagManagementModal = dynamic(() => import('@/app/components/base/tag-management'), {
  ssr: false,
 })
@@ -47,12 +59,41 @@ const List = () => {
  const { t } = useTranslation()
  const { systemFeatures } = useGlobalPublicStore()
  const router = useRouter()
+  const searchParams = useSearchParams()
  const { isCurrentWorkspaceEditor, isCurrentWorkspaceDatasetOperator, isLoadingCurrentWorkspace } = useAppContext()
  const showTagManagementModal = useTagStore(s => s.showTagManagementModal)
  const [activeTab, setActiveTab] = useQueryState(
    'category',
    parseAsString.withDefault('all').withOptions({ history: 'push' }),
  )
+
+  // valid tabs for apps list; anything else should fallback to 'all'
+
+  // 1) Normalize legacy/incorrect query params like ?mode=discover -> ?category=all
+  useEffect(() => {
+    // avoid running on server
+    if (isServer)
+      return
+    const mode = searchParams.get('mode')
+    if (!mode)
+      return
+    const url = new URL(window.location.href)
+    url.searchParams.delete('mode')
+    if (validTabs.has(mode)) {
+      // migrate to category key
+      url.searchParams.set('category', mode)
+    }
+    else {
+      url.searchParams.set('category', 'all')
+    }
+    router.replace(url.pathname + url.search)
+  }, [router, searchParams])
+
+  // 2) If category has an invalid value (e.g., 'discover'), reset to 'all'
+  useEffect(() => {
+    if (!validTabs.has(activeTab))
+      setActiveTab('all')
+  }, [activeTab, setActiveTab])
  const { query: { tagIDs = [], keywords = '', isCreatedByMe: queryIsCreatedByMe = false }, setQuery } = useAppsQueryState()
  const [isCreatedByMe, setIsCreatedByMe] = useState(queryIsCreatedByMe)
  const [tagFilterValue, setTagFilterValue] = useState<string[]>(tagIDs)
--- a/web/app/components/base/amplitude/AmplitudeProvider.tsx
+++ b/web/app/components/base/amplitude/AmplitudeProvider.tsx
@@ -54,7 +54,7 @@ const pageNameEnrichmentPlugin = (): amplitude.Types.EnrichmentPlugin => {
 }

 const AmplitudeProvider: FC<IAmplitudeProps> = ({
-  sessionReplaySampleRate = 1,
+  sessionReplaySampleRate = 0.5,
 }) => {
  useEffect(() => {
    // Only enable in Saas edition with valid API key
--- a/web/app/components/base/chat/chat/utils.ts
+++ b/web/app/components/base/chat/chat/utils.ts
@@ -37,7 +37,7 @@ export const getProcessedInputs = (inputs: Record<string, any>, inputsForm: Inpu
      return
    }

-    if (!inputValue)
+    if (inputValue == null)
      return

    if (item.type === InputVarType.singleFile) {
@@ -52,6 +52,20 @@ export const getProcessedInputs = (inputs: Record<string, any>, inputsForm: Inpu
      else
        processedInputs[item.variable] = getProcessedFiles(inputValue)
    }
+    else if (item.type === InputVarType.jsonObject) {
+      // Prefer sending an object if the user entered valid JSON; otherwise keep the raw string.
+      try {
+        const v = typeof inputValue === 'string' ? JSON.parse(inputValue) : inputValue
+        if (v && typeof v === 'object' && !Array.isArray(v))
+          processedInputs[item.variable] = v
+        else
+          processedInputs[item.variable] = inputValue
+      }
+      catch {
+        // keep original string; backend will parse/validate
+        processedInputs[item.variable] = inputValue
+      }
+    }
  })

  return processedInputs
--- a/web/app/components/base/chat/embedded-chatbot/header/index.tsx
+++ b/web/app/components/base/chat/embedded-chatbot/header/index.tsx
@@ -11,6 +11,7 @@ import DifyLogo from '@/app/components/base/logo/dify-logo'
 import Tooltip from '@/app/components/base/tooltip'
 import { useGlobalPublicStore } from '@/context/global-public-context'
 import { cn } from '@/utils/classnames'
+import { isClient } from '@/utils/client'
 import {
  useEmbeddedChatbotContext,
 } from '../context'
@@ -40,7 +41,6 @@ const Header: FC<IHeaderProps> = ({
    allInputsHidden,
  } = useEmbeddedChatbotContext()

-  const isClient = typeof window !== 'undefined'
  const isIframe = isClient ? window.self !== window.top : false
  const [parentOrigin, setParentOrigin] = useState('')
  const [showToggleExpandButton, setShowToggleExpandButton] = useState(false)
@@ -66,7 +66,9 @@ const Header: FC<IHeaderProps> = ({
    const listener = (event: MessageEvent) => handleMessageReceived(event)
    window.addEventListener('message', listener)

-    window.parent.postMessage({ type: 'dify-chatbot-iframe-ready' }, '*')
+    // Security: Use document.referrer to get parent origin
+    const targetOrigin = document.referrer ? new URL(document.referrer).origin : '*'
+    window.parent.postMessage({ type: 'dify-chatbot-iframe-ready' }, targetOrigin)

    return () => window.removeEventListener('message', listener)
  }, [isIframe, handleMessageReceived])
--- a/web/app/components/datasets/create/embedding-process/index.spec.tsx
+++ b/web/app/components/datasets/create/embedding-process/index.spec.tsx
--- a/web/app/components/datasets/create/embedding-process/index.tsx
+++ b/web/app/components/datasets/create/embedding-process/index.tsx
@@ -1,47 +1,29 @@
 import type { FC } from 'react'
-import type {
-  DataSourceInfo,
-  FullDocumentDetail,
-  IndexingStatusResponse,
-  LegacyDataSourceInfo,
-  ProcessRuleResponse,
-} from '@/models/datasets'
+import type { FullDocumentDetail } from '@/models/datasets'
+import type { RETRIEVE_METHOD } from '@/types/app'
 import {
  RiArrowRightLine,
-  RiCheckboxCircleFill,
-  RiErrorWarningFill,
  RiLoader2Fill,
  RiTerminalBoxLine,
 } from '@remixicon/react'
-import Image from 'next/image'
 import Link from 'next/link'
 import { useRouter } from 'next/navigation'
-import * as React from 'react'
-import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
+import { useMemo } from 'react'
 import { useTranslation } from 'react-i18next'
 import Button from '@/app/components/base/button'
 import Divider from '@/app/components/base/divider'
-import { ZapFast } from '@/app/components/base/icons/src/vender/solid/general'
-import NotionIcon from '@/app/components/base/notion-icon'
-import Tooltip from '@/app/components/base/tooltip'
-import PriorityLabel from '@/app/components/billing/priority-label'
 import { Plan } from '@/app/components/billing/type'
-import UpgradeBtn from '@/app/components/billing/upgrade-btn'
-import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
 import { useProviderContext } from '@/context/provider-context'
 import { useDatasetApiAccessUrl } from '@/hooks/use-api-access-url'
-import { DataSourceType, ProcessMode } from '@/models/datasets'
-import { fetchIndexingStatusBatch as doFetchIndexingStatus } from '@/service/datasets'
 import { useProcessRule } from '@/service/knowledge/use-dataset'
 import { useInvalidDocumentList } from '@/service/knowledge/use-document'
-import { RETRIEVE_METHOD } from '@/types/app'
-import { sleep } from '@/utils'
-import { cn } from '@/utils/classnames'
-import DocumentFileIcon from '../../common/document-file-icon'
-import { indexMethodIcon, retrievalIcon } from '../icons'
-import { IndexingType } from '../step-two'
+import IndexingProgressItem from './indexing-progress-item'
+import RuleDetail from './rule-detail'
+import UpgradeBanner from './upgrade-banner'
+import { useIndexingStatusPolling } from './use-indexing-status-polling'
+import { createDocumentLookup } from './utils'

-type Props = {
+type EmbeddingProcessProps = {
  datasetId: string
  batchId: string
  documents?: FullDocumentDetail[]
@@ -49,333 +31,121 @@ type Props = {
  retrievalMethod?: RETRIEVE_METHOD
 }

-const RuleDetail: FC<{
-  sourceData?: ProcessRuleResponse
-  indexingType?: string
-  retrievalMethod?: RETRIEVE_METHOD
-}> = ({ sourceData, indexingType, retrievalMethod }) => {
+// Status header component
+const StatusHeader: FC<{ isEmbedding: boolean, isCompleted: boolean }> = ({
+  isEmbedding,
+  isCompleted,
+}) => {
  const { t } = useTranslation()

-  const segmentationRuleMap = {
-    mode: t('embedding.mode', { ns: 'datasetDocuments' }),
-    segmentLength: t('embedding.segmentLength', { ns: 'datasetDocuments' }),
-    textCleaning: t('embedding.textCleaning', { ns: 'datasetDocuments' }),
-  }
-
-  const getRuleName = (key: string) => {
-    if (key === 'remove_extra_spaces')
-      return t('stepTwo.removeExtraSpaces', { ns: 'datasetCreation' })
-
-    if (key === 'remove_urls_emails')
-      return t('stepTwo.removeUrlEmails', { ns: 'datasetCreation' })
-
-    if (key === 'remove_stopwords')
-      return t('stepTwo.removeStopwords', { ns: 'datasetCreation' })
-  }
-
-  const isNumber = (value: unknown) => {
-    return typeof value === 'number'
-  }
-
-  const getValue = useCallback((field: string) => {
-    let value: string | number | undefined = '-'
-    const maxTokens = isNumber(sourceData?.rules?.segmentation?.max_tokens)
-      ? sourceData.rules.segmentation.max_tokens
-      : value
-    const childMaxTokens = isNumber(sourceData?.rules?.subchunk_segmentation?.max_tokens)
-      ? sourceData.rules.subchunk_segmentation.max_tokens
-      : value
-    switch (field) {
-      case 'mode':
-        value = !sourceData?.mode
-          ? value
-          : sourceData.mode === ProcessMode.general
-            ? (t('embedding.custom', { ns: 'datasetDocuments' }) as string)
-            : `${t('embedding.hierarchical', { ns: 'datasetDocuments' })} · ${sourceData?.rules?.parent_mode === 'paragraph'
-              ? t('parentMode.paragraph', { ns: 'dataset' })
-              : t('parentMode.fullDoc', { ns: 'dataset' })}`
-        break
-      case 'segmentLength':
-        value = !sourceData?.mode
-          ? value
-          : sourceData.mode === ProcessMode.general
-            ? maxTokens
-            : `${t('embedding.parentMaxTokens', { ns: 'datasetDocuments' })} ${maxTokens}; ${t('embedding.childMaxTokens', { ns: 'datasetDocuments' })} ${childMaxTokens}`
-        break
-      default:
-        value = !sourceData?.mode
-          ? value
-          : sourceData?.rules?.pre_processing_rules?.filter(rule =>
-              rule.enabled).map(rule => getRuleName(rule.id)).join(',')
-        break
-    }
-    return value
-  }, [sourceData])
-
  return (
-    <div className="flex flex-col gap-1">
-      {Object.keys(segmentationRuleMap).map((field) => {
-        return (
-          <FieldInfo
-            key={field}
-            label={segmentationRuleMap[field as keyof typeof segmentationRuleMap]}
-            displayedValue={String(getValue(field))}
-          />
-        )
-      })}
-      <FieldInfo
-        label={t('stepTwo.indexMode', { ns: 'datasetCreation' })}
-        displayedValue={t(`stepTwo.${indexingType === IndexingType.ECONOMICAL ? 'economical' : 'qualified'}`, { ns: 'datasetCreation' }) as string}
-        valueIcon={(
-          <Image
-            className="size-4"
-            src={
-              indexingType === IndexingType.ECONOMICAL
-                ? indexMethodIcon.economical
-                : indexMethodIcon.high_quality
-            }
-            alt=""
-          />
-        )}
-      />
-      <FieldInfo
-        label={t('form.retrievalSetting.title', { ns: 'datasetSettings' })}
-        // displayedValue={t(`datasetSettings.form.retrievalSetting.${retrievalMethod}`) as string}
-        displayedValue={t(`retrieval.${indexingType === IndexingType.ECONOMICAL ? 'keyword_search' : retrievalMethod ?? 'semantic_search'}.title`, { ns: 'dataset' })}
-        valueIcon={(
-          <Image
-            className="size-4"
-            src={
-              retrievalMethod === RETRIEVE_METHOD.fullText
-                ? retrievalIcon.fullText
-                : retrievalMethod === RETRIEVE_METHOD.hybrid
-                  ? retrievalIcon.hybrid
-                  : retrievalIcon.vector
-            }
-            alt=""
-          />
-        )}
-      />
+    <div className="system-md-semibold-uppercase flex items-center gap-x-1 text-text-secondary">
+      {isEmbedding && (
+        <>
+          <RiLoader2Fill className="size-4 animate-spin" />
+          <span>{t('embedding.processing', { ns: 'datasetDocuments' })}</span>
+        </>
+      )}
+      {isCompleted && t('embedding.completed', { ns: 'datasetDocuments' })}
    </div>
  )
 }

-const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType, retrievalMethod }) => {
+// Action buttons component
+const ActionButtons: FC<{
+  apiReferenceUrl: string
+  onNavToDocuments: () => void
+}> = ({ apiReferenceUrl, onNavToDocuments }) => {
  const { t } = useTranslation()
+
+  return (
+    <div className="mt-6 flex items-center gap-x-2 py-2">
+      <Link href={apiReferenceUrl} target="_blank" rel="noopener noreferrer">
+        <Button className="w-fit gap-x-0.5 px-3">
+          <RiTerminalBoxLine className="size-4" />
+          <span className="px-0.5">Access the API</span>
+        </Button>
+      </Link>
+      <Button
+        className="w-fit gap-x-0.5 px-3"
+        variant="primary"
+        onClick={onNavToDocuments}
+      >
+        <span className="px-0.5">{t('stepThree.navTo', { ns: 'datasetCreation' })}</span>
+        <RiArrowRightLine className="size-4 stroke-current stroke-1" />
+      </Button>
+    </div>
+  )
+}
+
+const EmbeddingProcess: FC<EmbeddingProcessProps> = ({
+  datasetId,
+  batchId,
+  documents = [],
+  indexingType,
+  retrievalMethod,
+}) => {
  const { enableBilling, plan } = useProviderContext()
-
-  const getFirstDocument = documents[0]
-
-  const [indexingStatusBatchDetail, setIndexingStatusDetail] = useState<IndexingStatusResponse[]>([])
-  const fetchIndexingStatus = async () => {
-    const status = await doFetchIndexingStatus({ datasetId, batchId })
-    setIndexingStatusDetail(status.data)
-    return status.data
-  }
-
-  const [isStopQuery, setIsStopQuery] = useState(false)
-  const isStopQueryRef = useRef(isStopQuery)
-  useEffect(() => {
-    isStopQueryRef.current = isStopQuery
-  }, [isStopQuery])
-  const stopQueryStatus = () => {
-    setIsStopQuery(true)
-  }
-
-  const startQueryStatus = async () => {
-    if (isStopQueryRef.current)
-      return
-
-    try {
-      const indexingStatusBatchDetail = await fetchIndexingStatus()
-      const isCompleted = indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail.indexing_status))
-      if (isCompleted) {
-        stopQueryStatus()
-        return
-      }
-      await sleep(2500)
-      await startQueryStatus()
-    }
-    catch {
-      await sleep(2500)
-      await startQueryStatus()
-    }
-  }
-
-  useEffect(() => {
-    setIsStopQuery(false)
-    startQueryStatus()
-    return () => {
-      stopQueryStatus()
-    }
-  }, [])
-
-  // get rule
-  const { data: ruleDetail } = useProcessRule(getFirstDocument?.id)
-
  const router = useRouter()
  const invalidDocumentList = useInvalidDocumentList()
-  const navToDocumentList = () => {
+  const apiReferenceUrl = useDatasetApiAccessUrl()
+
+  // Polling hook for indexing status
+  const { statusList, isEmbedding, isEmbeddingCompleted } = useIndexingStatusPolling({
+    datasetId,
+    batchId,
+  })
+
+  // Get process rule for the first document
+  const firstDocumentId = documents[0]?.id
+  const { data: ruleDetail } = useProcessRule(firstDocumentId)
+
+  // Document lookup utilities - memoized for performance
+  const documentLookup = useMemo(
+    () => createDocumentLookup(documents),
+    [documents],
+  )
+
+  const handleNavToDocuments = () => {
    invalidDocumentList()
    router.push(`/datasets/${datasetId}/documents`)
  }
-  const apiReferenceUrl = useDatasetApiAccessUrl()

-  const isEmbedding = useMemo(() => {
-    return indexingStatusBatchDetail.some(indexingStatusDetail => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || ''))
-  }, [indexingStatusBatchDetail])
-  const isEmbeddingCompleted = useMemo(() => {
-    return indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail?.indexing_status || ''))
-  }, [indexingStatusBatchDetail])
-
-  const getSourceName = (id: string) => {
-    const doc = documents.find(document => document.id === id)
-    return doc?.name
-  }
-  const getFileType = (name?: string) => name?.split('.').pop() || 'txt'
-  const getSourcePercent = (detail: IndexingStatusResponse) => {
-    const completedCount = detail.completed_segments || 0
-    const totalCount = detail.total_segments || 0
-    if (totalCount === 0)
-      return 0
-    const percent = Math.round(completedCount * 100 / totalCount)
-    return percent > 100 ? 100 : percent
-  }
-  const getSourceType = (id: string) => {
-    const doc = documents.find(document => document.id === id)
-    return doc?.data_source_type as DataSourceType
-  }
-
-  const isLegacyDataSourceInfo = (info: DataSourceInfo): info is LegacyDataSourceInfo => {
-    return info != null && typeof (info as LegacyDataSourceInfo).upload_file === 'object'
-  }
-
-  const getIcon = (id: string) => {
-    const doc = documents.find(document => document.id === id)
-    const info = doc?.data_source_info
-    if (info && isLegacyDataSourceInfo(info))
-      return info.notion_page_icon
-    return undefined
-  }
-  const isSourceEmbedding = (detail: IndexingStatusResponse) =>
-    ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '')
+  const showUpgradeBanner = enableBilling && plan.type !== Plan.team

  return (
    <>
      <div className="flex flex-col gap-y-3">
-        <div className="system-md-semibold-uppercase flex items-center gap-x-1 text-text-secondary">
-          {isEmbedding && (
-            <>
-              <RiLoader2Fill className="size-4 animate-spin" />
-              <span>{t('embedding.processing', { ns: 'datasetDocuments' })}</span>
-            </>
-          )}
-          {isEmbeddingCompleted && t('embedding.completed', { ns: 'datasetDocuments' })}
-        </div>
-        {
-          enableBilling && plan.type !== Plan.team && (
-            <div className="flex h-14 items-center rounded-xl border-[0.5px] border-black/5 bg-white p-3 shadow-md">
-              <div className="flex h-8 w-8 shrink-0 items-center justify-center rounded-lg bg-[#FFF6ED]">
-                <ZapFast className="h-4 w-4 text-[#FB6514]" />
-              </div>
-              <div className="mx-3 grow text-[13px] font-medium text-gray-700">
-                {t('plansCommon.documentProcessingPriorityUpgrade', { ns: 'billing' })}
-              </div>
-              <UpgradeBtn loc="knowledge-speed-up" />
-            </div>
-          )
-        }
+        <StatusHeader isEmbedding={isEmbedding} isCompleted={isEmbeddingCompleted} />
+
+        {showUpgradeBanner && <UpgradeBanner />}
+
        <div className="flex flex-col gap-0.5 pb-2">
-          {indexingStatusBatchDetail.map(indexingStatusDetail => (
-            <div
-              key={indexingStatusDetail.id}
-              className={cn(
-                'relative h-[26px] overflow-hidden rounded-md bg-components-progress-bar-bg',
-                indexingStatusDetail.indexing_status === 'error' && 'bg-state-destructive-hover-alt',
-              )}
-            >
-              {isSourceEmbedding(indexingStatusDetail) && (
-                <div
-                  className="absolute left-0 top-0 h-full min-w-0.5 border-r-[2px] border-r-components-progress-bar-progress-highlight bg-components-progress-bar-progress"
-                  style={{ width: `${getSourcePercent(indexingStatusDetail)}%` }}
-                />
-              )}
-              <div className="z-[1] flex h-full items-center gap-1 pl-[6px] pr-2">
-                {getSourceType(indexingStatusDetail.id) === DataSourceType.FILE && (
-                  <DocumentFileIcon
-                    size="sm"
-                    className="shrink-0"
-                    name={getSourceName(indexingStatusDetail.id)}
-                    extension={getFileType(getSourceName(indexingStatusDetail.id))}
-                  />
-                )}
-                {getSourceType(indexingStatusDetail.id) === DataSourceType.NOTION && (
-                  <NotionIcon
-                    className="shrink-0"
-                    type="page"
-                    src={getIcon(indexingStatusDetail.id)}
-                  />
-                )}
-                <div className="flex w-0 grow items-center gap-1" title={getSourceName(indexingStatusDetail.id)}>
-                  <div className="system-xs-medium truncate text-text-secondary">
-                    {getSourceName(indexingStatusDetail.id)}
-                  </div>
-                  {
-                    enableBilling && (
-                      <PriorityLabel className="ml-0" />
-                    )
-                  }
-                </div>
-                {isSourceEmbedding(indexingStatusDetail) && (
-                  <div className="shrink-0 text-xs text-text-secondary">{`${getSourcePercent(indexingStatusDetail)}%`}</div>
-                )}
-                {indexingStatusDetail.indexing_status === 'error' && (
-                  <Tooltip
-                    popupClassName="px-4 py-[14px] max-w-60 body-xs-regular text-text-secondary border-[0.5px] border-components-panel-border rounded-xl"
-                    offset={4}
-                    popupContent={indexingStatusDetail.error}
-                  >
-                    <span>
-                      <RiErrorWarningFill className="size-4 shrink-0 text-text-destructive" />
-                    </span>
-                  </Tooltip>
-                )}
-                {indexingStatusDetail.indexing_status === 'completed' && (
-                  <RiCheckboxCircleFill className="size-4 shrink-0 text-text-success" />
-                )}
-              </div>
-            </div>
+          {statusList.map(detail => (
+            <IndexingProgressItem
+              key={detail.id}
+              detail={detail}
+              name={documentLookup.getName(detail.id)}
+              sourceType={documentLookup.getSourceType(detail.id)}
+              notionIcon={documentLookup.getNotionIcon(detail.id)}
+              enableBilling={enableBilling}
+            />
          ))}
        </div>
+
        <Divider type="horizontal" className="my-0 bg-divider-subtle" />
+
        <RuleDetail
          sourceData={ruleDetail}
          indexingType={indexingType}
          retrievalMethod={retrievalMethod}
        />
      </div>
-      <div className="mt-6 flex items-center gap-x-2 py-2">
-        <Link
-          href={apiReferenceUrl}
-          target="_blank"
-          rel="noopener noreferrer"
-        >
-          <Button
-            className="w-fit gap-x-0.5 px-3"
-          >
-            <RiTerminalBoxLine className="size-4" />
-            <span className="px-0.5">Access the API</span>
-          </Button>
-        </Link>
-        <Button
-          className="w-fit gap-x-0.5 px-3"
-          variant="primary"
-          onClick={navToDocumentList}
-        >
-          <span className="px-0.5">{t('stepThree.navTo', { ns: 'datasetCreation' })}</span>
-          <RiArrowRightLine className="size-4 stroke-current stroke-1" />
-        </Button>
-      </div>
+
+      <ActionButtons
+        apiReferenceUrl={apiReferenceUrl}
+        onNavToDocuments={handleNavToDocuments}
+      />
    </>
  )
 }
--- a/web/app/components/datasets/create/embedding-process/indexing-progress-item.tsx
+++ b/web/app/components/datasets/create/embedding-process/indexing-progress-item.tsx
@@ -0,0 +1,120 @@
+import type { FC } from 'react'
+import type { IndexingStatusResponse } from '@/models/datasets'
+import {
+  RiCheckboxCircleFill,
+  RiErrorWarningFill,
+} from '@remixicon/react'
+import NotionIcon from '@/app/components/base/notion-icon'
+import Tooltip from '@/app/components/base/tooltip'
+import PriorityLabel from '@/app/components/billing/priority-label'
+import { DataSourceType } from '@/models/datasets'
+import { cn } from '@/utils/classnames'
+import DocumentFileIcon from '../../common/document-file-icon'
+import { getFileType, getSourcePercent, isSourceEmbedding } from './utils'
+
+type IndexingProgressItemProps = {
+  detail: IndexingStatusResponse
+  name?: string
+  sourceType?: DataSourceType
+  notionIcon?: string
+  enableBilling?: boolean
+}
+
+// Status icon component for completed/error states
+const StatusIcon: FC<{ status: string, error?: string }> = ({ status, error }) => {
+  if (status === 'completed')
+    return <RiCheckboxCircleFill className="size-4 shrink-0 text-text-success" />
+
+  if (status === 'error') {
+    return (
+      <Tooltip
+        popupClassName="px-4 py-[14px] max-w-60 body-xs-regular text-text-secondary border-[0.5px] border-components-panel-border rounded-xl"
+        offset={4}
+        popupContent={error}
+      >
+        <span>
+          <RiErrorWarningFill className="size-4 shrink-0 text-text-destructive" />
+        </span>
+      </Tooltip>
+    )
+  }
+
+  return null
+}
+
+// Source type icon component
+const SourceTypeIcon: FC<{
+  sourceType?: DataSourceType
+  name?: string
+  notionIcon?: string
+}> = ({ sourceType, name, notionIcon }) => {
+  if (sourceType === DataSourceType.FILE) {
+    return (
+      <DocumentFileIcon
+        size="sm"
+        className="shrink-0"
+        name={name}
+        extension={getFileType(name)}
+      />
+    )
+  }
+
+  if (sourceType === DataSourceType.NOTION) {
+    return (
+      <NotionIcon
+        className="shrink-0"
+        type="page"
+        src={notionIcon}
+      />
+    )
+  }
+
+  return null
+}
+
+const IndexingProgressItem: FC<IndexingProgressItemProps> = ({
+  detail,
+  name,
+  sourceType,
+  notionIcon,
+  enableBilling,
+}) => {
+  const isEmbedding = isSourceEmbedding(detail)
+  const percent = getSourcePercent(detail)
+  const isError = detail.indexing_status === 'error'
+
+  return (
+    <div
+      className={cn(
+        'relative h-[26px] overflow-hidden rounded-md bg-components-progress-bar-bg',
+        isError && 'bg-state-destructive-hover-alt',
+      )}
+    >
+      {isEmbedding && (
+        <div
+          className="absolute left-0 top-0 h-full min-w-0.5 border-r-[2px] border-r-components-progress-bar-progress-highlight bg-components-progress-bar-progress"
+          style={{ width: `${percent}%` }}
+        />
+      )}
+      <div className="z-[1] flex h-full items-center gap-1 pl-[6px] pr-2">
+        <SourceTypeIcon
+          sourceType={sourceType}
+          name={name}
+          notionIcon={notionIcon}
+        />
+        <div className="flex w-0 grow items-center gap-1" title={name}>
+          <div className="system-xs-medium truncate text-text-secondary">
+            {name}
+          </div>
+          {enableBilling && <PriorityLabel className="ml-0" />}
+        </div>
+        {isEmbedding && (
+          <div className="shrink-0 text-xs text-text-secondary">{`${percent}%`}</div>
+        )}
+        <StatusIcon status={detail.indexing_status} error={detail.error} />
+      </div>
+    </div>
+  )
+}
+
+export default IndexingProgressItem
--- a/web/app/components/datasets/create/embedding-process/rule-detail.tsx
+++ b/web/app/components/datasets/create/embedding-process/rule-detail.tsx
@@ -0,0 +1,133 @@
+import type { FC } from 'react'
+import type { ProcessRuleResponse } from '@/models/datasets'
+import Image from 'next/image'
+import { useCallback } from 'react'
+import { useTranslation } from 'react-i18next'
+import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
+import { ProcessMode } from '@/models/datasets'
+import { RETRIEVE_METHOD } from '@/types/app'
+import { indexMethodIcon, retrievalIcon } from '../icons'
+import { IndexingType } from '../step-two'
+
+type RuleDetailProps = {
+  sourceData?: ProcessRuleResponse
+  indexingType?: string
+  retrievalMethod?: RETRIEVE_METHOD
+}
+
+// Lookup table for pre-processing rule names
+const PRE_PROCESSING_RULE_KEYS = {
+  remove_extra_spaces: 'stepTwo.removeExtraSpaces',
+  remove_urls_emails: 'stepTwo.removeUrlEmails',
+  remove_stopwords: 'stepTwo.removeStopwords',
+} as const
+
+// Lookup table for retrieval method icons
+const RETRIEVAL_ICON_MAP: Partial<Record<RETRIEVE_METHOD, string>> = {
+  [RETRIEVE_METHOD.fullText]: retrievalIcon.fullText,
+  [RETRIEVE_METHOD.hybrid]: retrievalIcon.hybrid,
+  [RETRIEVE_METHOD.semantic]: retrievalIcon.vector,
+  [RETRIEVE_METHOD.invertedIndex]: retrievalIcon.fullText,
+  [RETRIEVE_METHOD.keywordSearch]: retrievalIcon.fullText,
+}
+
+const isNumber = (value: unknown): value is number => typeof value === 'number'
+
+const RuleDetail: FC<RuleDetailProps> = ({ sourceData, indexingType, retrievalMethod }) => {
+  const { t } = useTranslation()
+
+  const segmentationRuleLabels = {
+    mode: t('embedding.mode', { ns: 'datasetDocuments' }),
+    segmentLength: t('embedding.segmentLength', { ns: 'datasetDocuments' }),
+    textCleaning: t('embedding.textCleaning', { ns: 'datasetDocuments' }),
+  }
+
+  const getRuleName = useCallback((key: string): string | undefined => {
+    const translationKey = PRE_PROCESSING_RULE_KEYS[key as keyof typeof PRE_PROCESSING_RULE_KEYS]
+    return translationKey ? t(translationKey, { ns: 'datasetCreation' }) : undefined
+  }, [t])
+
+  const getModeValue = useCallback((): string => {
+    if (!sourceData?.mode)
+      return '-'
+
+    if (sourceData.mode === ProcessMode.general)
+      return t('embedding.custom', { ns: 'datasetDocuments' })
+
+    const parentModeLabel = sourceData.rules?.parent_mode === 'paragraph'
+      ? t('parentMode.paragraph', { ns: 'dataset' })
+      : t('parentMode.fullDoc', { ns: 'dataset' })
+
+    return `${t('embedding.hierarchical', { ns: 'datasetDocuments' })} · ${parentModeLabel}`
+  }, [sourceData, t])
+
+  const getSegmentLengthValue = useCallback((): string | number => {
+    if (!sourceData?.mode)
+      return '-'
+
+    const maxTokens = isNumber(sourceData.rules?.segmentation?.max_tokens)
+      ? sourceData.rules.segmentation.max_tokens
+      : '-'
+
+    if (sourceData.mode === ProcessMode.general)
+      return maxTokens
+
+    const childMaxTokens = isNumber(sourceData.rules?.subchunk_segmentation?.max_tokens)
+      ? sourceData.rules.subchunk_segmentation.max_tokens
+      : '-'
+
+    return `${t('embedding.parentMaxTokens', { ns: 'datasetDocuments' })} ${maxTokens}; ${t('embedding.childMaxTokens', { ns: 'datasetDocuments' })} ${childMaxTokens}`
+  }, [sourceData, t])
+
+  const getTextCleaningValue = useCallback((): string => {
+    if (!sourceData?.mode)
+      return '-'
+
+    const enabledRules = sourceData.rules?.pre_processing_rules?.filter(rule => rule.enabled) || []
+    const ruleNames = enabledRules
+      .map((rule) => {
+        const name = getRuleName(rule.id)
+        return typeof name === 'string' ? name : ''
+      })
+      .filter(name => name)
+    return ruleNames.length > 0 ? ruleNames.join(',') : '-'
+  }, [sourceData, getRuleName])
+
+  const fieldValueGetters: Record<string, () => string | number> = {
+    mode: getModeValue,
+    segmentLength: getSegmentLengthValue,
+    textCleaning: getTextCleaningValue,
+  }
+
+  const isEconomical = indexingType === IndexingType.ECONOMICAL
+  const indexMethodIconSrc = isEconomical ? indexMethodIcon.economical : indexMethodIcon.high_quality
+  const indexModeLabel = t(`stepTwo.${isEconomical ? 'economical' : 'qualified'}`, { ns: 'datasetCreation' })
+
+  const effectiveRetrievalMethod = isEconomical ? 'keyword_search' : (retrievalMethod ?? 'semantic_search')
+  const retrievalLabel = t(`retrieval.${effectiveRetrievalMethod}.title`, { ns: 'dataset' })
+  const retrievalIconSrc = RETRIEVAL_ICON_MAP[retrievalMethod as keyof typeof RETRIEVAL_ICON_MAP] ?? retrievalIcon.vector
+
+  return (
+    <div className="flex flex-col gap-1">
+      {Object.keys(segmentationRuleLabels).map(field => (
+        <FieldInfo
+          key={field}
+          label={segmentationRuleLabels[field as keyof typeof segmentationRuleLabels]}
+          displayedValue={String(fieldValueGetters[field]())}
+        />
+      ))}
+      <FieldInfo
+        label={t('stepTwo.indexMode', { ns: 'datasetCreation' })}
+        displayedValue={indexModeLabel}
+        valueIcon={<Image className="size-4" src={indexMethodIconSrc} alt="" />}
+      />
+      <FieldInfo
+        label={t('form.retrievalSetting.title', { ns: 'datasetSettings' })}
+        displayedValue={retrievalLabel}
+        valueIcon={<Image className="size-4" src={retrievalIconSrc} alt="" />}
+      />
+    </div>
+  )
+}
+
+export default RuleDetail
--- a/web/app/components/datasets/create/embedding-process/upgrade-banner.tsx
+++ b/web/app/components/datasets/create/embedding-process/upgrade-banner.tsx
@@ -0,0 +1,22 @@
+import type { FC } from 'react'
+import { useTranslation } from 'react-i18next'
+import { ZapFast } from '@/app/components/base/icons/src/vender/solid/general'
+import UpgradeBtn from '@/app/components/billing/upgrade-btn'
+
+const UpgradeBanner: FC = () => {
+  const { t } = useTranslation()
+
+  return (
+    <div className="flex h-14 items-center rounded-xl border-[0.5px] border-black/5 bg-white p-3 shadow-md">
+      <div className="flex h-8 w-8 shrink-0 items-center justify-center rounded-lg bg-[#FFF6ED]">
+        <ZapFast className="h-4 w-4 text-[#FB6514]" />
+      </div>
+      <div className="mx-3 grow text-[13px] font-medium text-gray-700">
+        {t('plansCommon.documentProcessingPriorityUpgrade', { ns: 'billing' })}
+      </div>
+      <UpgradeBtn loc="knowledge-speed-up" />
+    </div>
+  )
+}
+
+export default UpgradeBanner
--- a/web/app/components/datasets/create/embedding-process/use-indexing-status-polling.ts
+++ b/web/app/components/datasets/create/embedding-process/use-indexing-status-polling.ts
@@ -0,0 +1,90 @@
+import type { IndexingStatusResponse } from '@/models/datasets'
+import { useEffect, useRef, useState } from 'react'
+import { fetchIndexingStatusBatch } from '@/service/datasets'
+
+const POLLING_INTERVAL = 2500
+const COMPLETED_STATUSES = ['completed', 'error', 'paused'] as const
+const EMBEDDING_STATUSES = ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'] as const
+
+type IndexingStatusPollingParams = {
+  datasetId: string
+  batchId: string
+}
+
+type IndexingStatusPollingResult = {
+  statusList: IndexingStatusResponse[]
+  isEmbedding: boolean
+  isEmbeddingCompleted: boolean
+}
+
+const isStatusCompleted = (status: string): boolean =>
+  COMPLETED_STATUSES.includes(status as typeof COMPLETED_STATUSES[number])
+
+const isAllCompleted = (statusList: IndexingStatusResponse[]): boolean =>
+  statusList.every(item => isStatusCompleted(item.indexing_status))
+
+/**
+ * Custom hook for polling indexing status with automatic stop on completion.
+ * Handles the polling lifecycle and provides derived states for UI rendering.
+ */
+export const useIndexingStatusPolling = ({
+  datasetId,
+  batchId,
+}: IndexingStatusPollingParams): IndexingStatusPollingResult => {
+  const [statusList, setStatusList] = useState<IndexingStatusResponse[]>([])
+  const isStopPollingRef = useRef(false)
+
+  useEffect(() => {
+    // Reset polling state on mount
+    isStopPollingRef.current = false
+    let timeoutId: ReturnType<typeof setTimeout> | null = null
+
+    const fetchStatus = async (): Promise<IndexingStatusResponse[]> => {
+      const response = await fetchIndexingStatusBatch({ datasetId, batchId })
+      setStatusList(response.data)
+      return response.data
+    }
+
+    const poll = async (): Promise<void> => {
+      if (isStopPollingRef.current)
+        return
+
+      try {
+        const data = await fetchStatus()
+        if (isAllCompleted(data)) {
+          isStopPollingRef.current = true
+          return
+        }
+      }
+      catch {
+        // Continue polling on error
+      }
+
+      if (!isStopPollingRef.current) {
+        timeoutId = setTimeout(() => {
+          poll()
+        }, POLLING_INTERVAL)
+      }
+    }
+
+    poll()
+
+    return () => {
+      isStopPollingRef.current = true
+      if (timeoutId)
+        clearTimeout(timeoutId)
+    }
+  }, [datasetId, batchId])
+
+  const isEmbedding = statusList.some(item =>
+    EMBEDDING_STATUSES.includes(item?.indexing_status as typeof EMBEDDING_STATUSES[number]),
+  )
+
+  const isEmbeddingCompleted = statusList.length > 0 && isAllCompleted(statusList)
+
+  return {
+    statusList,
+    isEmbedding,
+    isEmbeddingCompleted,
+  }
+}
--- a/web/app/components/datasets/create/embedding-process/utils.ts
+++ b/web/app/components/datasets/create/embedding-process/utils.ts
@@ -0,0 +1,64 @@
+import type {
+  DataSourceInfo,
+  DataSourceType,
+  FullDocumentDetail,
+  IndexingStatusResponse,
+  LegacyDataSourceInfo,
+} from '@/models/datasets'
+
+const EMBEDDING_STATUSES = ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'] as const
+
+/**
+ * Type guard for legacy data source info with upload_file property
+ */
+export const isLegacyDataSourceInfo = (info: DataSourceInfo): info is LegacyDataSourceInfo => {
+  return info != null && typeof (info as LegacyDataSourceInfo).upload_file === 'object'
+}
+
+/**
+ * Check if a status indicates the source is being embedded
+ */
+export const isSourceEmbedding = (detail: IndexingStatusResponse): boolean =>
+  EMBEDDING_STATUSES.includes(detail.indexing_status as typeof EMBEDDING_STATUSES[number])
+
+/**
+ * Calculate the progress percentage for a document
+ */
+export const getSourcePercent = (detail: IndexingStatusResponse): number => {
+  const completedCount = detail.completed_segments || 0
+  const totalCount = detail.total_segments || 0
+
+  if (totalCount === 0)
+    return 0
+
+  const percent = Math.round(completedCount * 100 / totalCount)
+  return Math.min(percent, 100)
+}
+
+/**
+ * Get file extension from filename, defaults to 'txt'
+ */
+export const getFileType = (name?: string): string =>
+  name?.split('.').pop() || 'txt'
+
+/**
+ * Document lookup utilities - provides document info by ID from a list
+ */
+export const createDocumentLookup = (documents: FullDocumentDetail[]) => {
+  const documentMap = new Map(documents.map(doc => [doc.id, doc]))
+
+  return {
+    getDocument: (id: string) => documentMap.get(id),
+
+    getName: (id: string) => documentMap.get(id)?.name,
+
+    getSourceType: (id: string) => documentMap.get(id)?.data_source_type as DataSourceType | undefined,
+
+    getNotionIcon: (id: string) => {
+      const info = documentMap.get(id)?.data_source_info
+      if (info && isLegacyDataSourceInfo(info))
+        return info.notion_page_icon
+      return undefined
+    },
+  }
+}
--- a/web/app/components/datasets/create/step-two/components/general-chunking-options.tsx
+++ b/web/app/components/datasets/create/step-two/components/general-chunking-options.tsx
@@ -0,0 +1,199 @@
+'use client'
+
+import type { FC } from 'react'
+import type { PreProcessingRule } from '@/models/datasets'
+import {
+  RiAlertFill,
+  RiSearchEyeLine,
+} from '@remixicon/react'
+import Image from 'next/image'
+import { useTranslation } from 'react-i18next'
+import Button from '@/app/components/base/button'
+import Checkbox from '@/app/components/base/checkbox'
+import Divider from '@/app/components/base/divider'
+import Tooltip from '@/app/components/base/tooltip'
+import { IS_CE_EDITION } from '@/config'
+import { ChunkingMode } from '@/models/datasets'
+import SettingCog from '../../assets/setting-gear-mod.svg'
+import s from '../index.module.css'
+import LanguageSelect from '../language-select'
+import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs'
+import { OptionCard } from './option-card'
+
+type TextLabelProps = {
+  children: React.ReactNode
+}
+
+const TextLabel: FC<TextLabelProps> = ({ children }) => {
+  return <label className="system-sm-semibold text-text-secondary">{children}</label>
+}
+
+type GeneralChunkingOptionsProps = {
+  // State
+  segmentIdentifier: string
+  maxChunkLength: number
+  overlap: number
+  rules: PreProcessingRule[]
+  currentDocForm: ChunkingMode
+  docLanguage: string
+  // Flags
+  isActive: boolean
+  isInUpload: boolean
+  isNotUploadInEmptyDataset: boolean
+  hasCurrentDatasetDocForm: boolean
+  // Actions
+  onSegmentIdentifierChange: (value: string) => void
+  onMaxChunkLengthChange: (value: number) => void
+  onOverlapChange: (value: number) => void
+  onRuleToggle: (id: string) => void
+  onDocFormChange: (form: ChunkingMode) => void
+  onDocLanguageChange: (lang: string) => void
+  onPreview: () => void
+  onReset: () => void
+  // Locale
+  locale: string
+}
+
+export const GeneralChunkingOptions: FC<GeneralChunkingOptionsProps> = ({
+  segmentIdentifier,
+  maxChunkLength,
+  overlap,
+  rules,
+  currentDocForm,
+  docLanguage,
+  isActive,
+  isInUpload,
+  isNotUploadInEmptyDataset,
+  hasCurrentDatasetDocForm,
+  onSegmentIdentifierChange,
+  onMaxChunkLengthChange,
+  onOverlapChange,
+  onRuleToggle,
+  onDocFormChange,
+  onDocLanguageChange,
+  onPreview,
+  onReset,
+  locale,
+}) => {
+  const { t } = useTranslation()
+
+  const getRuleName = (key: string): string => {
+    const ruleNameMap: Record<string, string> = {
+      remove_extra_spaces: t('stepTwo.removeExtraSpaces', { ns: 'datasetCreation' }),
+      remove_urls_emails: t('stepTwo.removeUrlEmails', { ns: 'datasetCreation' }),
+      remove_stopwords: t('stepTwo.removeStopwords', { ns: 'datasetCreation' }),
+    }
+    return ruleNameMap[key] ?? key
+  }
+
+  return (
+    <OptionCard
+      className="mb-2 bg-background-section"
+      title={t('stepTwo.general', { ns: 'datasetCreation' })}
+      icon={<Image width={20} height={20} src={SettingCog} alt={t('stepTwo.general', { ns: 'datasetCreation' })} />}
+      activeHeaderClassName="bg-dataset-option-card-blue-gradient"
+      description={t('stepTwo.generalTip', { ns: 'datasetCreation' })}
+      isActive={isActive}
+      onSwitched={() => onDocFormChange(ChunkingMode.text)}
+      actions={(
+        <>
+          <Button variant="secondary-accent" onClick={onPreview}>
+            <RiSearchEyeLine className="mr-0.5 h-4 w-4" />
+            {t('stepTwo.previewChunk', { ns: 'datasetCreation' })}
+          </Button>
+          <Button variant="ghost" onClick={onReset}>
+            {t('stepTwo.reset', { ns: 'datasetCreation' })}
+          </Button>
+        </>
+      )}
+      noHighlight={isInUpload && isNotUploadInEmptyDataset}
+    >
+      <div className="flex flex-col gap-y-4">
+        <div className="flex gap-3">
+          <DelimiterInput
+            value={segmentIdentifier}
+            onChange={e => onSegmentIdentifierChange(e.target.value)}
+          />
+          <MaxLengthInput
+            unit="characters"
+            value={maxChunkLength}
+            onChange={onMaxChunkLengthChange}
+          />
+          <OverlapInput
+            unit="characters"
+            value={overlap}
+            min={1}
+            onChange={onOverlapChange}
+          />
+        </div>
+        <div className="flex w-full flex-col">
+          <div className="flex items-center gap-x-2">
+            <div className="inline-flex shrink-0">
+              <TextLabel>{t('stepTwo.rules', { ns: 'datasetCreation' })}</TextLabel>
+            </div>
+            <Divider className="grow" bgStyle="gradient" />
+          </div>
+          <div className="mt-1">
+            {rules.map(rule => (
+              <div
+                key={rule.id}
+                className={s.ruleItem}
+                onClick={() => onRuleToggle(rule.id)}
+              >
+                <Checkbox checked={rule.enabled} />
+                <label className="system-sm-regular ml-2 cursor-pointer text-text-secondary">
+                  {getRuleName(rule.id)}
+                </label>
+              </div>
+            ))}
+            {IS_CE_EDITION && (
+              <>
+                <Divider type="horizontal" className="my-4 bg-divider-subtle" />
+                <div className="flex items-center py-0.5">
+                  <div
+                    className="flex items-center"
+                    onClick={() => {
+                      if (hasCurrentDatasetDocForm)
+                        return
+                      if (currentDocForm === ChunkingMode.qa)
+                        onDocFormChange(ChunkingMode.text)
+                      else
+                        onDocFormChange(ChunkingMode.qa)
+                    }}
+                  >
+                    <Checkbox
+                      checked={currentDocForm === ChunkingMode.qa}
+                      disabled={hasCurrentDatasetDocForm}
+                    />
+                    <label className="system-sm-regular ml-2 cursor-pointer text-text-secondary">
+                      {t('stepTwo.useQALanguage', { ns: 'datasetCreation' })}
+                    </label>
+                  </div>
+                  <LanguageSelect
+                    currentLanguage={docLanguage || locale}
+                    onSelect={onDocLanguageChange}
+                    disabled={currentDocForm !== ChunkingMode.qa}
+                  />
+                  <Tooltip popupContent={t('stepTwo.QATip', { ns: 'datasetCreation' })} />
+                </div>
+                {currentDocForm === ChunkingMode.qa && (
+                  <div
+                    style={{
+                      background: 'linear-gradient(92deg, rgba(247, 144, 9, 0.1) 0%, rgba(255, 255, 255, 0.00) 100%)',
+                    }}
+                    className="mt-2 flex h-10 items-center gap-2 rounded-xl border border-components-panel-border px-3 text-xs shadow-xs backdrop-blur-[5px]"
+                  >
+                    <RiAlertFill className="size-4 text-text-warning-secondary" />
+                    <span className="system-xs-medium text-text-primary">
+                      {t('stepTwo.QATip', { ns: 'datasetCreation' })}
+                    </span>
+                  </div>
+                )}
+              </>
+            )}
+          </div>
+        </div>
+      </div>
+    </OptionCard>
+  )
+}
--- a/web/app/components/datasets/create/step-two/components/index.ts
+++ b/web/app/components/datasets/create/step-two/components/index.ts
@@ -0,0 +1,5 @@
+export { GeneralChunkingOptions } from './general-chunking-options'
+export { IndexingModeSection } from './indexing-mode-section'
+export { ParentChildOptions } from './parent-child-options'
+export { PreviewPanel } from './preview-panel'
+export { StepTwoFooter } from './step-two-footer'
--- a/web/app/components/datasets/create/step-two/components/indexing-mode-section.tsx
+++ b/web/app/components/datasets/create/step-two/components/indexing-mode-section.tsx
@@ -0,0 +1,253 @@
+'use client'
+
+import type { FC } from 'react'
+import type { DefaultModel, Model } from '@/app/components/header/account-setting/model-provider-page/declarations'
+import type { RetrievalConfig } from '@/types/app'
+import Image from 'next/image'
+import Link from 'next/link'
+import { useTranslation } from 'react-i18next'
+import Badge from '@/app/components/base/badge'
+import Button from '@/app/components/base/button'
+import CustomDialog from '@/app/components/base/dialog'
+import Divider from '@/app/components/base/divider'
+import { AlertTriangle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback'
+import Tooltip from '@/app/components/base/tooltip'
+import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
+import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
+import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector'
+import { useDocLink } from '@/context/i18n'
+import { ChunkingMode } from '@/models/datasets'
+import { cn } from '@/utils/classnames'
+import { indexMethodIcon } from '../../icons'
+import { IndexingType } from '../hooks'
+import s from '../index.module.css'
+import { OptionCard } from './option-card'
+
+type IndexingModeSectionProps = {
+  // State
+  indexType: IndexingType
+  hasSetIndexType: boolean
+  docForm: ChunkingMode
+  embeddingModel: DefaultModel
+  embeddingModelList?: Model[]
+  retrievalConfig: RetrievalConfig
+  showMultiModalTip: boolean
+  // Flags
+  isModelAndRetrievalConfigDisabled: boolean
+  datasetId?: string
+  // Modal state
+  isQAConfirmDialogOpen: boolean
+  // Actions
+  onIndexTypeChange: (type: IndexingType) => void
+  onEmbeddingModelChange: (model: DefaultModel) => void
+  onRetrievalConfigChange: (config: RetrievalConfig) => void
+  onQAConfirmDialogClose: () => void
+  onQAConfirmDialogConfirm: () => void
+}
+
+export const IndexingModeSection: FC<IndexingModeSectionProps> = ({
+  indexType,
+  hasSetIndexType,
+  docForm,
+  embeddingModel,
+  embeddingModelList,
+  retrievalConfig,
+  showMultiModalTip,
+  isModelAndRetrievalConfigDisabled,
+  datasetId,
+  isQAConfirmDialogOpen,
+  onIndexTypeChange,
+  onEmbeddingModelChange,
+  onRetrievalConfigChange,
+  onQAConfirmDialogClose,
+  onQAConfirmDialogConfirm,
+}) => {
+  const { t } = useTranslation()
+  const docLink = useDocLink()
+
+  const getIndexingTechnique = () => indexType
+
+  return (
+    <>
+      {/* Index Mode */}
+      <div className="system-md-semibold mb-1 text-text-secondary">
+        {t('stepTwo.indexMode', { ns: 'datasetCreation' })}
+      </div>
+      <div className="flex items-center gap-2">
+        {/* Qualified option */}
+        {(!hasSetIndexType || (hasSetIndexType && indexType === IndexingType.QUALIFIED)) && (
+          <OptionCard
+            className="flex-1 self-stretch"
+            title={(
+              <div className="flex items-center">
+                {t('stepTwo.qualified', { ns: 'datasetCreation' })}
+                <Badge
+                  className={cn(
+                    'ml-1 h-[18px]',
+                    (!hasSetIndexType && indexType === IndexingType.QUALIFIED)
+                      ? 'border-text-accent-secondary text-text-accent-secondary'
+                      : '',
+                  )}
+                  uppercase
+                >
+                  {t('stepTwo.recommend', { ns: 'datasetCreation' })}
+                </Badge>
+                <span className="ml-auto">
+                  {!hasSetIndexType && <span className={cn(s.radio)} />}
+                </span>
+              </div>
+            )}
+            description={t('stepTwo.qualifiedTip', { ns: 'datasetCreation' })}
+            icon={<Image src={indexMethodIcon.high_quality} alt="" />}
+            isActive={!hasSetIndexType && indexType === IndexingType.QUALIFIED}
+            disabled={hasSetIndexType}
+            onSwitched={() => onIndexTypeChange(IndexingType.QUALIFIED)}
+          />
+        )}
+
+        {/* Economical option */}
+        {(!hasSetIndexType || (hasSetIndexType && indexType === IndexingType.ECONOMICAL)) && (
+          <>
+            <CustomDialog show={isQAConfirmDialogOpen} onClose={onQAConfirmDialogClose} className="w-[432px]">
+              <header className="mb-4 pt-6">
+                <h2 className="text-lg font-semibold text-text-primary">
+                  {t('stepTwo.qaSwitchHighQualityTipTitle', { ns: 'datasetCreation' })}
+                </h2>
+                <p className="mt-2 text-sm font-normal text-text-secondary">
+                  {t('stepTwo.qaSwitchHighQualityTipContent', { ns: 'datasetCreation' })}
+                </p>
+              </header>
+              <div className="flex gap-2 pb-6">
+                <Button className="ml-auto" onClick={onQAConfirmDialogClose}>
+                  {t('stepTwo.cancel', { ns: 'datasetCreation' })}
+                </Button>
+                <Button variant="primary" onClick={onQAConfirmDialogConfirm}>
+                  {t('stepTwo.switch', { ns: 'datasetCreation' })}
+                </Button>
+              </div>
+            </CustomDialog>
+            <Tooltip
+              popupContent={(
+                <div className="rounded-lg border-components-panel-border bg-components-tooltip-bg p-3 text-xs font-medium text-text-secondary shadow-lg">
+                  {docForm === ChunkingMode.qa
+                    ? t('stepTwo.notAvailableForQA', { ns: 'datasetCreation' })
+                    : t('stepTwo.notAvailableForParentChild', { ns: 'datasetCreation' })}
+                </div>
+              )}
+              noDecoration
+              position="top"
+              asChild={false}
+              triggerClassName="flex-1 self-stretch"
+            >
+              <OptionCard
+                className="h-full"
+                title={t('stepTwo.economical', { ns: 'datasetCreation' })}
+                description={t('stepTwo.economicalTip', { ns: 'datasetCreation' })}
+                icon={<Image src={indexMethodIcon.economical} alt="" />}
+                isActive={!hasSetIndexType && indexType === IndexingType.ECONOMICAL}
+                disabled={hasSetIndexType || docForm !== ChunkingMode.text}
+                onSwitched={() => onIndexTypeChange(IndexingType.ECONOMICAL)}
+              />
+            </Tooltip>
+          </>
+        )}
+      </div>
+
+      {/* High quality tip */}
+      {!hasSetIndexType && indexType === IndexingType.QUALIFIED && (
+        <div className="mt-2 flex h-10 items-center gap-x-0.5 overflow-hidden rounded-xl border-[0.5px] border-components-panel-border bg-components-panel-bg-blur p-2 shadow-xs backdrop-blur-[5px]">
+          <div className="absolute bottom-0 left-0 right-0 top-0 bg-dataset-warning-message-bg opacity-40"></div>
+          <div className="p-1">
+            <AlertTriangle className="size-4 text-text-warning-secondary" />
+          </div>
+          <span className="system-xs-medium text-text-primary">
+            {t('stepTwo.highQualityTip', { ns: 'datasetCreation' })}
+          </span>
+        </div>
+      )}
+
+      {/* Economical index setting tip */}
+      {hasSetIndexType && indexType === IndexingType.ECONOMICAL && (
+        <div className="system-xs-medium mt-2 text-text-tertiary">
+          {t('stepTwo.indexSettingTip', { ns: 'datasetCreation' })}
+          <Link className="text-text-accent" href={`/datasets/${datasetId}/settings`}>
+            {t('stepTwo.datasetSettingLink', { ns: 'datasetCreation' })}
+          </Link>
+        </div>
+      )}
+
+      {/* Embedding model */}
+      {indexType === IndexingType.QUALIFIED && (
+        <div className="mt-5">
+          <div className={cn('system-md-semibold mb-1 text-text-secondary', datasetId && 'flex items-center justify-between')}>
+            {t('form.embeddingModel', { ns: 'datasetSettings' })}
+          </div>
+          <ModelSelector
+            readonly={isModelAndRetrievalConfigDisabled}
+            triggerClassName={isModelAndRetrievalConfigDisabled ? 'opacity-50' : ''}
+            defaultModel={embeddingModel}
+            modelList={embeddingModelList ?? []}
+            onSelect={onEmbeddingModelChange}
+          />
+          {isModelAndRetrievalConfigDisabled && (
+            <div className="system-xs-medium mt-2 text-text-tertiary">
+              {t('stepTwo.indexSettingTip', { ns: 'datasetCreation' })}
+              <Link className="text-text-accent" href={`/datasets/${datasetId}/settings`}>
+                {t('stepTwo.datasetSettingLink', { ns: 'datasetCreation' })}
+              </Link>
+            </div>
+          )}
+        </div>
+      )}
+
+      <Divider className="my-5" />
+
+      {/* Retrieval Method Config */}
+      <div>
+        {!isModelAndRetrievalConfigDisabled
+          ? (
+              <div className="mb-1">
+                <div className="system-md-semibold mb-0.5 text-text-secondary">
+                  {t('form.retrievalSetting.title', { ns: 'datasetSettings' })}
+                </div>
+                <div className="body-xs-regular text-text-tertiary">
+                  <a
+                    target="_blank"
+                    rel="noopener noreferrer"
+                    href={docLink('/guides/knowledge-base/create-knowledge-and-upload-documents')}
+                    className="text-text-accent"
+                  >
+                    {t('form.retrievalSetting.learnMore', { ns: 'datasetSettings' })}
+                  </a>
+                  {t('form.retrievalSetting.longDescription', { ns: 'datasetSettings' })}
+                </div>
+              </div>
+            )
+          : (
+              <div className={cn('system-md-semibold mb-0.5 text-text-secondary', 'flex items-center justify-between')}>
+                <div>{t('form.retrievalSetting.title', { ns: 'datasetSettings' })}</div>
+              </div>
+            )}
+
+        <div>
+          {getIndexingTechnique() === IndexingType.QUALIFIED
+            ? (
+                <RetrievalMethodConfig
+                  disabled={isModelAndRetrievalConfigDisabled}
+                  value={retrievalConfig}
+                  onChange={onRetrievalConfigChange}
+                  showMultiModalTip={showMultiModalTip}
+                />
+              )
+            : (
+                <EconomicalRetrievalMethodConfig
+                  disabled={isModelAndRetrievalConfigDisabled}
+                  value={retrievalConfig}
+                  onChange={onRetrievalConfigChange}
+                />
+              )}
+        </div>
+      </div>
+    </>
+  )
+}
--- a/web/app/components/datasets/create/step-two/components/inputs.tsx
+++ b/web/app/components/datasets/create/step-two/components/inputs.tsx
--- a/web/app/components/datasets/create/step-two/components/option-card.tsx
+++ b/web/app/components/datasets/create/step-two/components/option-card.tsx
--- a/web/app/components/datasets/create/step-two/components/parent-child-options.tsx
+++ b/web/app/components/datasets/create/step-two/components/parent-child-options.tsx
@@ -0,0 +1,191 @@
+'use client'
+
+import type { FC } from 'react'
+import type { ParentChildConfig } from '../hooks'
+import type { ParentMode, PreProcessingRule } from '@/models/datasets'
+import { RiSearchEyeLine } from '@remixicon/react'
+import Image from 'next/image'
+import { useTranslation } from 'react-i18next'
+import Button from '@/app/components/base/button'
+import Checkbox from '@/app/components/base/checkbox'
+import Divider from '@/app/components/base/divider'
+import { ParentChildChunk } from '@/app/components/base/icons/src/vender/knowledge'
+import RadioCard from '@/app/components/base/radio-card'
+import { ChunkingMode } from '@/models/datasets'
+import FileList from '../../assets/file-list-3-fill.svg'
+import Note from '../../assets/note-mod.svg'
+import BlueEffect from '../../assets/option-card-effect-blue.svg'
+import s from '../index.module.css'
+import { DelimiterInput, MaxLengthInput } from './inputs'
+import { OptionCard } from './option-card'
+
+type TextLabelProps = {
+  children: React.ReactNode
+}
+
+const TextLabel: FC<TextLabelProps> = ({ children }) => {
+  return <label className="system-sm-semibold text-text-secondary">{children}</label>
+}
+
+type ParentChildOptionsProps = {
+  // State
+  parentChildConfig: ParentChildConfig
+  rules: PreProcessingRule[]
+  currentDocForm: ChunkingMode
+  // Flags
+  isActive: boolean
+  isInUpload: boolean
+  isNotUploadInEmptyDataset: boolean
+  // Actions
+  onDocFormChange: (form: ChunkingMode) => void
+  onChunkForContextChange: (mode: ParentMode) => void
+  onParentDelimiterChange: (value: string) => void
+  onParentMaxLengthChange: (value: number) => void
+  onChildDelimiterChange: (value: string) => void
+  onChildMaxLengthChange: (value: number) => void
+  onRuleToggle: (id: string) => void
+  onPreview: () => void
+  onReset: () => void
+}
+
+export const ParentChildOptions: FC<ParentChildOptionsProps> = ({
+  parentChildConfig,
+  rules,
+  currentDocForm: _currentDocForm,
+  isActive,
+  isInUpload,
+  isNotUploadInEmptyDataset,
+  onDocFormChange,
+  onChunkForContextChange,
+  onParentDelimiterChange,
+  onParentMaxLengthChange,
+  onChildDelimiterChange,
+  onChildMaxLengthChange,
+  onRuleToggle,
+  onPreview,
+  onReset,
+}) => {
+  const { t } = useTranslation()
+
+  const getRuleName = (key: string): string => {
+    const ruleNameMap: Record<string, string> = {
+      remove_extra_spaces: t('stepTwo.removeExtraSpaces', { ns: 'datasetCreation' }),
+      remove_urls_emails: t('stepTwo.removeUrlEmails', { ns: 'datasetCreation' }),
+      remove_stopwords: t('stepTwo.removeStopwords', { ns: 'datasetCreation' }),
+    }
+    return ruleNameMap[key] ?? key
+  }
+
+  return (
+    <OptionCard
+      title={t('stepTwo.parentChild', { ns: 'datasetCreation' })}
+      icon={<ParentChildChunk className="h-[20px] w-[20px]" />}
+      effectImg={BlueEffect.src}
+      className="text-util-colors-blue-light-blue-light-500"
+      activeHeaderClassName="bg-dataset-option-card-blue-gradient"
+      description={t('stepTwo.parentChildTip', { ns: 'datasetCreation' })}
+      isActive={isActive}
+      onSwitched={() => onDocFormChange(ChunkingMode.parentChild)}
+      actions={(
+        <>
+          <Button variant="secondary-accent" onClick={onPreview}>
+            <RiSearchEyeLine className="mr-0.5 h-4 w-4" />
+            {t('stepTwo.previewChunk', { ns: 'datasetCreation' })}
+          </Button>
+          <Button variant="ghost" onClick={onReset}>
+            {t('stepTwo.reset', { ns: 'datasetCreation' })}
+          </Button>
+        </>
+      )}
+      noHighlight={isInUpload && isNotUploadInEmptyDataset}
+    >
+      <div className="flex flex-col gap-4">
+        {/* Parent chunk for context */}
+        <div>
+          <div className="flex items-center gap-x-2">
+            <div className="inline-flex shrink-0">
+              <TextLabel>{t('stepTwo.parentChunkForContext', { ns: 'datasetCreation' })}</TextLabel>
+            </div>
+            <Divider className="grow" bgStyle="gradient" />
+          </div>
+          <RadioCard
+            className="mt-1"
+            icon={<Image src={Note} alt="" />}
+            title={t('stepTwo.paragraph', { ns: 'datasetCreation' })}
+            description={t('stepTwo.paragraphTip', { ns: 'datasetCreation' })}
+            isChosen={parentChildConfig.chunkForContext === 'paragraph'}
+            onChosen={() => onChunkForContextChange('paragraph')}
+            chosenConfig={(
+              <div className="flex gap-3">
+                <DelimiterInput
+                  value={parentChildConfig.parent.delimiter}
+                  tooltip={t('stepTwo.parentChildDelimiterTip', { ns: 'datasetCreation' })!}
+                  onChange={e => onParentDelimiterChange(e.target.value)}
+                />
+                <MaxLengthInput
+                  unit="characters"
+                  value={parentChildConfig.parent.maxLength}
+                  onChange={onParentMaxLengthChange}
+                />
+              </div>
+            )}
+          />
+          <RadioCard
+            className="mt-2"
+            icon={<Image src={FileList} alt="" />}
+            title={t('stepTwo.fullDoc', { ns: 'datasetCreation' })}
+            description={t('stepTwo.fullDocTip', { ns: 'datasetCreation' })}
+            onChosen={() => onChunkForContextChange('full-doc')}
+            isChosen={parentChildConfig.chunkForContext === 'full-doc'}
+          />
+        </div>
+
+        {/* Child chunk for retrieval */}
+        <div>
+          <div className="flex items-center gap-x-2">
+            <div className="inline-flex shrink-0">
+              <TextLabel>{t('stepTwo.childChunkForRetrieval', { ns: 'datasetCreation' })}</TextLabel>
+            </div>
+            <Divider className="grow" bgStyle="gradient" />
+          </div>
+          <div className="mt-1 flex gap-3">
+            <DelimiterInput
+              value={parentChildConfig.child.delimiter}
+              tooltip={t('stepTwo.parentChildChunkDelimiterTip', { ns: 'datasetCreation' })!}
+              onChange={e => onChildDelimiterChange(e.target.value)}
+            />
+            <MaxLengthInput
+              unit="characters"
+              value={parentChildConfig.child.maxLength}
+              onChange={onChildMaxLengthChange}
+            />
+          </div>
+        </div>
+
+        {/* Rules */}
+        <div>
+          <div className="flex items-center gap-x-2">
+            <div className="inline-flex shrink-0">
+              <TextLabel>{t('stepTwo.rules', { ns: 'datasetCreation' })}</TextLabel>
+            </div>
+            <Divider className="grow" bgStyle="gradient" />
+          </div>
+          <div className="mt-1">
+            {rules.map(rule => (
+              <div
+                key={rule.id}
+                className={s.ruleItem}
+                onClick={() => onRuleToggle(rule.id)}
+              >
+                <Checkbox checked={rule.enabled} />
+                <label className="system-sm-regular ml-2 cursor-pointer text-text-secondary">
+                  {getRuleName(rule.id)}
+                </label>
+              </div>
+            ))}
+          </div>
+        </div>
+      </div>
+    </OptionCard>
+  )
+}
--- a/web/app/components/datasets/create/step-two/components/preview-panel.tsx
+++ b/web/app/components/datasets/create/step-two/components/preview-panel.tsx
@@ -0,0 +1,171 @@
+'use client'
+
+import type { FC } from 'react'
+import type { ParentChildConfig } from '../hooks'
+import type { DataSourceType, FileIndexingEstimateResponse } from '@/models/datasets'
+import { RiSearchEyeLine } from '@remixicon/react'
+import { noop } from 'es-toolkit/function'
+import { useTranslation } from 'react-i18next'
+import Badge from '@/app/components/base/badge'
+import FloatRightContainer from '@/app/components/base/float-right-container'
+import { SkeletonContainer, SkeletonPoint, SkeletonRectangle, SkeletonRow } from '@/app/components/base/skeleton'
+import { FULL_DOC_PREVIEW_LENGTH } from '@/config'
+import { ChunkingMode } from '@/models/datasets'
+import { cn } from '@/utils/classnames'
+import { ChunkContainer, QAPreview } from '../../../chunk'
+import PreviewDocumentPicker from '../../../common/document-picker/preview-document-picker'
+import { PreviewSlice } from '../../../formatted-text/flavours/preview-slice'
+import { FormattedText } from '../../../formatted-text/formatted'
+import PreviewContainer from '../../../preview/container'
+import { PreviewHeader } from '../../../preview/header'
+
+type PreviewPanelProps = {
+  // State
+  isMobile: boolean
+  dataSourceType: DataSourceType
+  currentDocForm: ChunkingMode
+  estimate?: FileIndexingEstimateResponse
+  parentChildConfig: ParentChildConfig
+  isSetting?: boolean
+  // Picker
+  pickerFiles: Array<{ id: string, name: string, extension: string }>
+  pickerValue: { id: string, name: string, extension: string }
+  // Mutation state
+  isIdle: boolean
+  isPending: boolean
+  // Actions
+  onPickerChange: (selected: { id: string, name: string }) => void
+}
+
+export const PreviewPanel: FC<PreviewPanelProps> = ({
+  isMobile,
+  dataSourceType: _dataSourceType,
+  currentDocForm,
+  estimate,
+  parentChildConfig,
+  isSetting,
+  pickerFiles,
+  pickerValue,
+  isIdle,
+  isPending,
+  onPickerChange,
+}) => {
+  const { t } = useTranslation()
+
+  return (
+    <FloatRightContainer isMobile={isMobile} isOpen={true} onClose={noop} footer={null}>
+      <PreviewContainer
+        header={(
+          <PreviewHeader title={t('stepTwo.preview', { ns: 'datasetCreation' })}>
+            <div className="flex items-center gap-1">
+              <PreviewDocumentPicker
+                files={pickerFiles as Array<Required<{ id: string, name: string, extension: string }>>}
+                onChange={onPickerChange}
+                value={isSetting ? pickerFiles[0] : pickerValue}
+              />
+              {currentDocForm !== ChunkingMode.qa && (
+                <Badge
+                  text={t('stepTwo.previewChunkCount', {
+                    ns: 'datasetCreation',
+                    count: estimate?.total_segments || 0,
+                  }) as string}
+                />
+              )}
+            </div>
+          </PreviewHeader>
+        )}
+        className={cn('relative flex h-full w-1/2 shrink-0 p-4 pr-0', isMobile && 'w-full max-w-[524px]')}
+        mainClassName="space-y-6"
+      >
+        {/* QA Preview */}
+        {currentDocForm === ChunkingMode.qa && estimate?.qa_preview && (
+          estimate.qa_preview.map((item, index) => (
+            <ChunkContainer
+              key={item.question}
+              label={`Chunk-${index + 1}`}
+              characterCount={item.question.length + item.answer.length}
+            >
+              <QAPreview qa={item} />
+            </ChunkContainer>
+          ))
+        )}
+
+        {/* Text Preview */}
+        {currentDocForm === ChunkingMode.text && estimate?.preview && (
+          estimate.preview.map((item, index) => (
+            <ChunkContainer
+              key={item.content}
+              label={`Chunk-${index + 1}`}
+              characterCount={item.content.length}
+            >
+              {item.content}
+            </ChunkContainer>
+          ))
+        )}
+
+        {/* Parent-Child Preview */}
+        {currentDocForm === ChunkingMode.parentChild && estimate?.preview && (
+          estimate.preview.map((item, index) => {
+            const indexForLabel = index + 1
+            const childChunks = parentChildConfig.chunkForContext === 'full-doc'
+              ? item.child_chunks.slice(0, FULL_DOC_PREVIEW_LENGTH)
+              : item.child_chunks
+            return (
+              <ChunkContainer
+                key={item.content}
+                label={`Chunk-${indexForLabel}`}
+                characterCount={item.content.length}
+              >
+                <FormattedText>
+                  {childChunks.map((child, childIndex) => {
+                    const childIndexForLabel = childIndex + 1
+                    return (
+                      <PreviewSlice
+                        key={`C-${childIndexForLabel}-${child}`}
+                        label={`C-${childIndexForLabel}`}
+                        text={child}
+                        tooltip={`Child-chunk-${childIndexForLabel} · ${child.length} Characters`}
+                        labelInnerClassName="text-[10px] font-semibold align-bottom leading-7"
+                        dividerClassName="leading-7"
+                      />
+                    )
+                  })}
+                </FormattedText>
+              </ChunkContainer>
+            )
+          })
+        )}
+
+        {/* Idle State */}
+        {isIdle && (
+          <div className="flex h-full w-full items-center justify-center">
+            <div className="flex flex-col items-center justify-center gap-3">
+              <RiSearchEyeLine className="size-10 text-text-empty-state-icon" />
+              <p className="text-sm text-text-tertiary">
+                {t('stepTwo.previewChunkTip', { ns: 'datasetCreation' })}
+              </p>
+            </div>
+          </div>
+        )}
+
+        {/* Loading State */}
+        {isPending && (
+          <div className="space-y-6">
+            {Array.from({ length: 10 }, (_, i) => (
+              <SkeletonContainer key={i}>
+                <SkeletonRow>
+                  <SkeletonRectangle className="w-20" />
+                  <SkeletonPoint />
+                  <SkeletonRectangle className="w-24" />
+                </SkeletonRow>
+                <SkeletonRectangle className="w-full" />
+                <SkeletonRectangle className="w-full" />
+                <SkeletonRectangle className="w-[422px]" />
+              </SkeletonContainer>
+            ))}
+          </div>
+        )}
+      </PreviewContainer>
+    </FloatRightContainer>
+  )
+}
--- a/web/app/components/datasets/create/step-two/components/step-two-footer.tsx
+++ b/web/app/components/datasets/create/step-two/components/step-two-footer.tsx
@@ -0,0 +1,58 @@
+'use client'
+
+import type { FC } from 'react'
+import { RiArrowLeftLine } from '@remixicon/react'
+import { useTranslation } from 'react-i18next'
+import Button from '@/app/components/base/button'
+
+type StepTwoFooterProps = {
+  isSetting?: boolean
+  isCreating: boolean
+  onPrevious: () => void
+  onCreate: () => void
+  onCancel?: () => void
+}
+
+export const StepTwoFooter: FC<StepTwoFooterProps> = ({
+  isSetting,
+  isCreating,
+  onPrevious,
+  onCreate,
+  onCancel,
+}) => {
+  const { t } = useTranslation()
+
+  if (!isSetting) {
+    return (
+      <div className="mt-8 flex items-center py-2">
+        <Button onClick={onPrevious}>
+          <RiArrowLeftLine className="mr-1 h-4 w-4" />
+          {t('stepTwo.previousStep', { ns: 'datasetCreation' })}
+        </Button>
+        <Button
+          className="ml-auto"
+          loading={isCreating}
+          variant="primary"
+          onClick={onCreate}
+        >
+          {t('stepTwo.nextStep', { ns: 'datasetCreation' })}
+        </Button>
+      </div>
+    )
+  }
+
+  return (
+    <div className="mt-8 flex items-center py-2">
+      <Button
+        loading={isCreating}
+        variant="primary"
+        onClick={onCreate}
+      >
+        {t('stepTwo.save', { ns: 'datasetCreation' })}
+      </Button>
+      <Button className="ml-2" onClick={onCancel}>
+        {t('stepTwo.cancel', { ns: 'datasetCreation' })}
+      </Button>
+    </div>
+  )
+}
--- a/web/app/components/datasets/create/step-two/hooks/escape.ts
+++ b/web/app/components/datasets/create/step-two/hooks/escape.ts
--- a/web/app/components/datasets/create/step-two/hooks/index.ts
+++ b/web/app/components/datasets/create/step-two/hooks/index.ts
@@ -0,0 +1,14 @@
+export { useDocumentCreation } from './use-document-creation'
+export type { DocumentCreation, ValidationParams } from './use-document-creation'
+
+export { IndexingType, useIndexingConfig } from './use-indexing-config'
+export type { IndexingConfig } from './use-indexing-config'
+
+export { useIndexingEstimate } from './use-indexing-estimate'
+export type { IndexingEstimate } from './use-indexing-estimate'
+
+export { usePreviewState } from './use-preview-state'
+export type { PreviewState } from './use-preview-state'
+
+export { DEFAULT_MAXIMUM_CHUNK_LENGTH, DEFAULT_OVERLAP, DEFAULT_SEGMENT_IDENTIFIER, defaultParentChildConfig, MAXIMUM_CHUNK_TOKEN_LENGTH, useSegmentationState } from './use-segmentation-state'
+export type { ParentChildConfig, SegmentationState } from './use-segmentation-state'
--- a/web/app/components/datasets/create/step-two/hooks/unescape.ts
+++ b/web/app/components/datasets/create/step-two/hooks/unescape.ts
--- a/web/app/components/datasets/create/step-two/hooks/use-document-creation.ts
+++ b/web/app/components/datasets/create/step-two/hooks/use-document-creation.ts
@@ -0,0 +1,279 @@
+import type { DefaultModel, Model } from '@/app/components/header/account-setting/model-provider-page/declarations'
+import type { NotionPage } from '@/models/common'
+import type {
+  ChunkingMode,
+  CrawlOptions,
+  CrawlResultItem,
+  CreateDocumentReq,
+  createDocumentResponse,
+  CustomFile,
+  FullDocumentDetail,
+  ProcessRule,
+} from '@/models/datasets'
+import type { RetrievalConfig, RETRIEVE_METHOD } from '@/types/app'
+import { useCallback } from 'react'
+import { useTranslation } from 'react-i18next'
+import { trackEvent } from '@/app/components/base/amplitude'
+import Toast from '@/app/components/base/toast'
+import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
+import { DataSourceProvider } from '@/models/common'
+import {
+  DataSourceType,
+} from '@/models/datasets'
+import { getNotionInfo, getWebsiteInfo, useCreateDocument, useCreateFirstDocument } from '@/service/knowledge/use-create-dataset'
+import { useInvalidDatasetList } from '@/service/knowledge/use-dataset'
+import { IndexingType } from './use-indexing-config'
+import { MAXIMUM_CHUNK_TOKEN_LENGTH } from './use-segmentation-state'
+
+export type UseDocumentCreationOptions = {
+  datasetId?: string
+  isSetting?: boolean
+  documentDetail?: FullDocumentDetail
+  dataSourceType: DataSourceType
+  files: CustomFile[]
+  notionPages: NotionPage[]
+  notionCredentialId: string
+  websitePages: CrawlResultItem[]
+  crawlOptions?: CrawlOptions
+  websiteCrawlProvider?: DataSourceProvider
+  websiteCrawlJobId?: string
+  // Callbacks
+  onStepChange?: (delta: number) => void
+  updateIndexingTypeCache?: (type: string) => void
+  updateResultCache?: (res: createDocumentResponse) => void
+  updateRetrievalMethodCache?: (method: RETRIEVE_METHOD | '') => void
+  onSave?: () => void
+  mutateDatasetRes?: () => void
+}
+
+export type ValidationParams = {
+  segmentationType: string
+  maxChunkLength: number
+  limitMaxChunkLength: number
+  overlap: number
+  indexType: IndexingType
+  embeddingModel: DefaultModel
+  rerankModelList: Model[]
+  retrievalConfig: RetrievalConfig
+}
+
+export const useDocumentCreation = (options: UseDocumentCreationOptions) => {
+  const { t } = useTranslation()
+  const {
+    datasetId,
+    isSetting,
+    documentDetail,
+    dataSourceType,
+    files,
+    notionPages,
+    notionCredentialId,
+    websitePages,
+    crawlOptions,
+    websiteCrawlProvider = DataSourceProvider.jinaReader,
+    websiteCrawlJobId = '',
+    onStepChange,
+    updateIndexingTypeCache,
+    updateResultCache,
+    updateRetrievalMethodCache,
+    onSave,
+    mutateDatasetRes,
+  } = options
+
+  const createFirstDocumentMutation = useCreateFirstDocument()
+  const createDocumentMutation = useCreateDocument(datasetId!)
+  const invalidDatasetList = useInvalidDatasetList()
+
+  const isCreating = createFirstDocumentMutation.isPending || createDocumentMutation.isPending
+
+  // Validate creation params
+  const validateParams = useCallback((params: ValidationParams): boolean => {
+    const {
+      segmentationType,
+      maxChunkLength,
+      limitMaxChunkLength,
+      overlap,
+      indexType,
+      embeddingModel,
+      rerankModelList,
+      retrievalConfig,
+    } = params
+
+    if (segmentationType === 'general' && overlap > maxChunkLength) {
+      Toast.notify({ type: 'error', message: t('stepTwo.overlapCheck', { ns: 'datasetCreation' }) })
+      return false
+    }
+
+    if (segmentationType === 'general' && maxChunkLength > limitMaxChunkLength) {
+      Toast.notify({
+        type: 'error',
+        message: t('stepTwo.maxLengthCheck', { ns: 'datasetCreation', limit: limitMaxChunkLength }),
+      })
+      return false
+    }
+
+    if (!isSetting) {
+      if (indexType === IndexingType.QUALIFIED && (!embeddingModel.model || !embeddingModel.provider)) {
+        Toast.notify({
+          type: 'error',
+          message: t('datasetConfig.embeddingModelRequired', { ns: 'appDebug' }),
+        })
+        return false
+      }
+
+      if (!isReRankModelSelected({
+        rerankModelList,
+        retrievalConfig,
+        indexMethod: indexType,
+      })) {
+        Toast.notify({ type: 'error', message: t('datasetConfig.rerankModelRequired', { ns: 'appDebug' }) })
+        return false
+      }
+    }
+
+    return true
+  }, [t, isSetting])
+
+  // Build creation params
+  const buildCreationParams = useCallback((
+    currentDocForm: ChunkingMode,
+    docLanguage: string,
+    processRule: ProcessRule,
+    retrievalConfig: RetrievalConfig,
+    embeddingModel: DefaultModel,
+    indexingTechnique: string,
+  ): CreateDocumentReq | null => {
+    if (isSetting) {
+      return {
+        original_document_id: documentDetail?.id,
+        doc_form: currentDocForm,
+        doc_language: docLanguage,
+        process_rule: processRule,
+        retrieval_model: retrievalConfig,
+        embedding_model: embeddingModel.model,
+        embedding_model_provider: embeddingModel.provider,
+        indexing_technique: indexingTechnique,
+      } as CreateDocumentReq
+    }
+
+    const params: CreateDocumentReq = {
+      data_source: {
+        type: dataSourceType,
+        info_list: {
+          data_source_type: dataSourceType,
+        },
+      },
+      indexing_technique: indexingTechnique,
+      process_rule: processRule,
+      doc_form: currentDocForm,
+      doc_language: docLanguage,
+      retrieval_model: retrievalConfig,
+      embedding_model: embeddingModel.model,
+      embedding_model_provider: embeddingModel.provider,
+    } as CreateDocumentReq
+
+    // Add data source specific info
+    if (dataSourceType === DataSourceType.FILE) {
+      params.data_source!.info_list.file_info_list = {
+        file_ids: files.map(file => file.id || '').filter(Boolean),
+      }
+    }
+    if (dataSourceType === DataSourceType.NOTION)
+      params.data_source!.info_list.notion_info_list = getNotionInfo(notionPages, notionCredentialId)
+
+    if (dataSourceType === DataSourceType.WEB) {
+      params.data_source!.info_list.website_info_list = getWebsiteInfo({
+        websiteCrawlProvider,
+        websiteCrawlJobId,
+        websitePages,
+        crawlOptions,
+      })
+    }
+
+    return params
+  }, [
+    isSetting,
+    documentDetail,
+    dataSourceType,
+    files,
+    notionPages,
+    notionCredentialId,
+    websitePages,
+    websiteCrawlProvider,
+    websiteCrawlJobId,
+    crawlOptions,
+  ])
+
+  // Execute creation
+  const executeCreation = useCallback(async (
+    params: CreateDocumentReq,
+    indexType: IndexingType,
+    retrievalConfig: RetrievalConfig,
+  ) => {
+    if (!datasetId) {
+      await createFirstDocumentMutation.mutateAsync(params, {
+        onSuccess(data) {
+          updateIndexingTypeCache?.(indexType)
+          updateResultCache?.(data)
+          updateRetrievalMethodCache?.(retrievalConfig.search_method as RETRIEVE_METHOD)
+        },
+      })
+    }
+    else {
+      await createDocumentMutation.mutateAsync(params, {
+        onSuccess(data) {
+          updateIndexingTypeCache?.(indexType)
+          updateResultCache?.(data)
+          updateRetrievalMethodCache?.(retrievalConfig.search_method as RETRIEVE_METHOD)
+        },
+      })
+    }
+
+    mutateDatasetRes?.()
+    invalidDatasetList()
+
+    trackEvent('create_datasets', {
+      data_source_type: dataSourceType,
+      indexing_technique: indexType,
+    })
+
+    onStepChange?.(+1)
+
+    if (isSetting)
+      onSave?.()
+  }, [
+    datasetId,
+    createFirstDocumentMutation,
+    createDocumentMutation,
+    updateIndexingTypeCache,
+    updateResultCache,
+    updateRetrievalMethodCache,
+    mutateDatasetRes,
+    invalidDatasetList,
+    dataSourceType,
+    onStepChange,
+    isSetting,
+    onSave,
+  ])
+
+  // Validate preview params
+  const validatePreviewParams = useCallback((maxChunkLength: number): boolean => {
+    if (maxChunkLength > MAXIMUM_CHUNK_TOKEN_LENGTH) {
+      Toast.notify({
+        type: 'error',
+        message: t('stepTwo.maxLengthCheck', { ns: 'datasetCreation', limit: MAXIMUM_CHUNK_TOKEN_LENGTH }),
+      })
+      return false
+    }
+    return true
+  }, [t])
+
+  return {
+    isCreating,
+    validateParams,
+    buildCreationParams,
+    executeCreation,
+    validatePreviewParams,
+  }
+}
+
+export type DocumentCreation = ReturnType<typeof useDocumentCreation>
--- a/web/app/components/datasets/create/step-two/hooks/use-indexing-config.ts
+++ b/web/app/components/datasets/create/step-two/hooks/use-indexing-config.ts
@@ -0,0 +1,143 @@
+import type { DefaultModel } from '@/app/components/header/account-setting/model-provider-page/declarations'
+import type { RetrievalConfig } from '@/types/app'
+import { useEffect, useMemo, useState } from 'react'
+import { checkShowMultiModalTip } from '@/app/components/datasets/settings/utils'
+import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
+import { useDefaultModel, useModelList, useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
+import { RETRIEVE_METHOD } from '@/types/app'
+
+export enum IndexingType {
+  QUALIFIED = 'high_quality',
+  ECONOMICAL = 'economy',
+}
+
+const DEFAULT_RETRIEVAL_CONFIG: RetrievalConfig = {
+  search_method: RETRIEVE_METHOD.semantic,
+  reranking_enable: false,
+  reranking_model: {
+    reranking_provider_name: '',
+    reranking_model_name: '',
+  },
+  top_k: 3,
+  score_threshold_enabled: false,
+  score_threshold: 0.5,
+}
+
+export type UseIndexingConfigOptions = {
+  initialIndexType?: IndexingType
+  initialEmbeddingModel?: DefaultModel
+  initialRetrievalConfig?: RetrievalConfig
+  isAPIKeySet: boolean
+  hasSetIndexType: boolean
+}
+
+export const useIndexingConfig = (options: UseIndexingConfigOptions) => {
+  const {
+    initialIndexType,
+    initialEmbeddingModel,
+    initialRetrievalConfig,
+    isAPIKeySet,
+    hasSetIndexType,
+  } = options
+
+  // Rerank model
+  const {
+    modelList: rerankModelList,
+    defaultModel: rerankDefaultModel,
+    currentModel: isRerankDefaultModelValid,
+  } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
+
+  // Embedding model list
+  const { data: embeddingModelList } = useModelList(ModelTypeEnum.textEmbedding)
+  const { data: defaultEmbeddingModel } = useDefaultModel(ModelTypeEnum.textEmbedding)
+
+  // Index type state
+  const [indexType, setIndexType] = useState<IndexingType>(() => {
+    if (initialIndexType)
+      return initialIndexType
+    return isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL
+  })
+
+  // Embedding model state
+  const [embeddingModel, setEmbeddingModel] = useState<DefaultModel>(
+    initialEmbeddingModel ?? {
+      provider: defaultEmbeddingModel?.provider.provider || '',
+      model: defaultEmbeddingModel?.model || '',
+    },
+  )
+
+  // Retrieval config state
+  const [retrievalConfig, setRetrievalConfig] = useState<RetrievalConfig>(
+    initialRetrievalConfig ?? DEFAULT_RETRIEVAL_CONFIG,
+  )
+
+  // Sync retrieval config with rerank model when available
+  useEffect(() => {
+    if (initialRetrievalConfig)
+      return
+
+    setRetrievalConfig({
+      search_method: RETRIEVE_METHOD.semantic,
+      reranking_enable: !!isRerankDefaultModelValid,
+      reranking_model: {
+        reranking_provider_name: isRerankDefaultModelValid ? rerankDefaultModel?.provider.provider ?? '' : '',
+        reranking_model_name: isRerankDefaultModelValid ? rerankDefaultModel?.model ?? '' : '',
+      },
+      top_k: 3,
+      score_threshold_enabled: false,
+      score_threshold: 0.5,
+    })
+  }, [rerankDefaultModel, isRerankDefaultModelValid, initialRetrievalConfig])
+
+  // Sync index type with props
+  useEffect(() => {
+    if (initialIndexType)
+      setIndexType(initialIndexType)
+    else
+      setIndexType(isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL)
+  }, [isAPIKeySet, initialIndexType])
+
+  // Show multimodal tip
+  const showMultiModalTip = useMemo(() => {
+    return checkShowMultiModalTip({
+      embeddingModel,
+      rerankingEnable: retrievalConfig.reranking_enable,
+      rerankModel: {
+        rerankingProviderName: retrievalConfig.reranking_model.reranking_provider_name,
+        rerankingModelName: retrievalConfig.reranking_model.reranking_model_name,
+      },
+      indexMethod: indexType,
+      embeddingModelList,
+      rerankModelList,
+    })
+  }, [embeddingModel, retrievalConfig, indexType, embeddingModelList, rerankModelList])
+
+  // Get effective indexing technique
+  const getIndexingTechnique = () => initialIndexType || indexType
+
+  return {
+    // Index type
+    indexType,
+    setIndexType,
+    hasSetIndexType,
+    getIndexingTechnique,
+
+    // Embedding model
+    embeddingModel,
+    setEmbeddingModel,
+    embeddingModelList,
+    defaultEmbeddingModel,
+
+    // Retrieval config
+    retrievalConfig,
+    setRetrievalConfig,
+    rerankModelList,
+    rerankDefaultModel,
+    isRerankDefaultModelValid,
+
+    // Computed
+    showMultiModalTip,
+  }
+}
+
+export type IndexingConfig = ReturnType<typeof useIndexingConfig>
--- a/web/app/components/datasets/create/step-two/hooks/use-indexing-estimate.ts
+++ b/web/app/components/datasets/create/step-two/hooks/use-indexing-estimate.ts
@@ -0,0 +1,123 @@
+import type { IndexingType } from './use-indexing-config'
+import type { NotionPage } from '@/models/common'
+import type { ChunkingMode, CrawlOptions, CrawlResultItem, CustomFile, ProcessRule } from '@/models/datasets'
+import { useCallback } from 'react'
+import { DataSourceProvider } from '@/models/common'
+import { DataSourceType } from '@/models/datasets'
+import {
+  useFetchFileIndexingEstimateForFile,
+  useFetchFileIndexingEstimateForNotion,
+  useFetchFileIndexingEstimateForWeb,
+} from '@/service/knowledge/use-create-dataset'
+
+export type UseIndexingEstimateOptions = {
+  dataSourceType: DataSourceType
+  datasetId?: string
+  // Document settings
+  currentDocForm: ChunkingMode
+  docLanguage: string
+  // File data source
+  files: CustomFile[]
+  previewFileName?: string
+  // Notion data source
+  previewNotionPage: NotionPage
+  notionCredentialId: string
+  // Website data source
+  previewWebsitePage: CrawlResultItem
+  crawlOptions?: CrawlOptions
+  websiteCrawlProvider?: DataSourceProvider
+  websiteCrawlJobId?: string
+  // Processing
+  indexingTechnique: IndexingType
+  processRule: ProcessRule
+}
+
+export const useIndexingEstimate = (options: UseIndexingEstimateOptions) => {
+  const {
+    dataSourceType,
+    datasetId,
+    currentDocForm,
+    docLanguage,
+    files,
+    previewFileName,
+    previewNotionPage,
+    notionCredentialId,
+    previewWebsitePage,
+    crawlOptions,
+    websiteCrawlProvider,
+    websiteCrawlJobId,
+    indexingTechnique,
+    processRule,
+  } = options
+
+  // File indexing estimate
+  const fileQuery = useFetchFileIndexingEstimateForFile({
+    docForm: currentDocForm,
+    docLanguage,
+    dataSourceType: DataSourceType.FILE,
+    files: previewFileName
+      ? [files.find(file => file.name === previewFileName)!]
+      : files,
+    indexingTechnique,
+    processRule,
+    dataset_id: datasetId!,
+  })
+
+  // Notion indexing estimate
+  const notionQuery = useFetchFileIndexingEstimateForNotion({
+    docForm: currentDocForm,
+    docLanguage,
+    dataSourceType: DataSourceType.NOTION,
+    notionPages: [previewNotionPage],
+    indexingTechnique,
+    processRule,
+    dataset_id: datasetId || '',
+    credential_id: notionCredentialId,
+  })
+
+  // Website indexing estimate
+  const websiteQuery = useFetchFileIndexingEstimateForWeb({
+    docForm: currentDocForm,
+    docLanguage,
+    dataSourceType: DataSourceType.WEB,
+    websitePages: [previewWebsitePage],
+    crawlOptions,
+    websiteCrawlProvider: websiteCrawlProvider ?? DataSourceProvider.jinaReader,
+    websiteCrawlJobId: websiteCrawlJobId ?? '',
+    indexingTechnique,
+    processRule,
+    dataset_id: datasetId || '',
+  })
+
+  // Get current mutation based on data source type
+  const getCurrentMutation = useCallback(() => {
+    if (dataSourceType === DataSourceType.FILE)
+      return fileQuery
+    if (dataSourceType === DataSourceType.NOTION)
+      return notionQuery
+    return websiteQuery
+  }, [dataSourceType, fileQuery, notionQuery, websiteQuery])
+
+  const currentMutation = getCurrentMutation()
+
+  // Trigger estimate fetch
+  const fetchEstimate = useCallback(() => {
+    if (dataSourceType === DataSourceType.FILE)
+      fileQuery.mutate()
+    else if (dataSourceType === DataSourceType.NOTION)
+      notionQuery.mutate()
+    else
+      websiteQuery.mutate()
+  }, [dataSourceType, fileQuery, notionQuery, websiteQuery])
+
+  return {
+    currentMutation,
+    estimate: currentMutation.data,
+    isIdle: currentMutation.isIdle,
+    isPending: currentMutation.isPending,
+    fetchEstimate,
+    reset: currentMutation.reset,
+  }
+}
+
+export type IndexingEstimate = ReturnType<typeof useIndexingEstimate>
--- a/web/app/components/datasets/create/step-two/hooks/use-preview-state.ts
+++ b/web/app/components/datasets/create/step-two/hooks/use-preview-state.ts
@@ -0,0 +1,127 @@
+import type { NotionPage } from '@/models/common'
+import type { CrawlResultItem, CustomFile, DocumentItem, FullDocumentDetail } from '@/models/datasets'
+import { useCallback, useState } from 'react'
+import { DataSourceType } from '@/models/datasets'
+
+export type UsePreviewStateOptions = {
+  dataSourceType: DataSourceType
+  files: CustomFile[]
+  notionPages: NotionPage[]
+  websitePages: CrawlResultItem[]
+  documentDetail?: FullDocumentDetail
+  datasetId?: string
+}
+
+export const usePreviewState = (options: UsePreviewStateOptions) => {
+  const {
+    dataSourceType,
+    files,
+    notionPages,
+    websitePages,
+    documentDetail,
+    datasetId,
+  } = options
+
+  // File preview state
+  const [previewFile, setPreviewFile] = useState<DocumentItem>(
+    (datasetId && documentDetail)
+      ? documentDetail.file
+      : files[0],
+  )
+
+  // Notion page preview state
+  const [previewNotionPage, setPreviewNotionPage] = useState<NotionPage>(
+    (datasetId && documentDetail)
+      ? documentDetail.notion_page
+      : notionPages[0],
+  )
+
+  // Website page preview state
+  const [previewWebsitePage, setPreviewWebsitePage] = useState<CrawlResultItem>(
+    (datasetId && documentDetail)
+      ? documentDetail.website_page
+      : websitePages[0],
+  )
+
+  // Get preview items for document picker based on data source type
+  const getPreviewPickerItems = useCallback(() => {
+    if (dataSourceType === DataSourceType.FILE) {
+      return files as Array<Required<CustomFile>>
+    }
+    if (dataSourceType === DataSourceType.NOTION) {
+      return notionPages.map(page => ({
+        id: page.page_id,
+        name: page.page_name,
+        extension: 'md',
+      }))
+    }
+    if (dataSourceType === DataSourceType.WEB) {
+      return websitePages.map(page => ({
+        id: page.source_url,
+        name: page.title,
+        extension: 'md',
+      }))
+    }
+    return []
+  }, [dataSourceType, files, notionPages, websitePages])
+
+  // Get current preview value for picker
+  const getPreviewPickerValue = useCallback(() => {
+    if (dataSourceType === DataSourceType.FILE) {
+      return previewFile as Required<CustomFile>
+    }
+    if (dataSourceType === DataSourceType.NOTION) {
+      return {
+        id: previewNotionPage?.page_id || '',
+        name: previewNotionPage?.page_name || '',
+        extension: 'md',
+      }
+    }
+    if (dataSourceType === DataSourceType.WEB) {
+      return {
+        id: previewWebsitePage?.source_url || '',
+        name: previewWebsitePage?.title || '',
+        extension: 'md',
+      }
+    }
+    return { id: '', name: '', extension: '' }
+  }, [dataSourceType, previewFile, previewNotionPage, previewWebsitePage])
+
+  // Handle preview change
+  const handlePreviewChange = useCallback((selected: { id: string, name: string }) => {
+    if (dataSourceType === DataSourceType.FILE) {
+      setPreviewFile(selected as DocumentItem)
+    }
+    else if (dataSourceType === DataSourceType.NOTION) {
+      const selectedPage = notionPages.find(page => page.page_id === selected.id)
+      if (selectedPage)
+        setPreviewNotionPage(selectedPage)
+    }
+    else if (dataSourceType === DataSourceType.WEB) {
+      const selectedPage = websitePages.find(page => page.source_url === selected.id)
+      if (selectedPage)
+        setPreviewWebsitePage(selectedPage)
+    }
+  }, [dataSourceType, notionPages, websitePages])
+
+  return {
+    // File preview
+    previewFile,
+    setPreviewFile,
+
+    // Notion preview
+    previewNotionPage,
+    setPreviewNotionPage,
+
+    // Website preview
+    previewWebsitePage,
+    setPreviewWebsitePage,
+
+    // Picker helpers
+    getPreviewPickerItems,
+    getPreviewPickerValue,
+    handlePreviewChange,
+  }
+}
+
+export type PreviewState = ReturnType<typeof usePreviewState>
--- a/web/app/components/datasets/create/step-two/hooks/use-segmentation-state.ts
+++ b/web/app/components/datasets/create/step-two/hooks/use-segmentation-state.ts
@@ -0,0 +1,222 @@
+import type { ParentMode, PreProcessingRule, ProcessRule, Rules } from '@/models/datasets'
+import { useCallback, useState } from 'react'
+import { ChunkingMode, ProcessMode } from '@/models/datasets'
+import escape from './escape'
+import unescape from './unescape'
+
+// Constants
+export const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n'
+export const DEFAULT_MAXIMUM_CHUNK_LENGTH = 1024
+export const DEFAULT_OVERLAP = 50
+export const MAXIMUM_CHUNK_TOKEN_LENGTH = Number.parseInt(
+  globalThis.document?.body?.getAttribute('data-public-indexing-max-segmentation-tokens-length') || '4000',
+  10,
+)
+
+export type ParentChildConfig = {
+  chunkForContext: ParentMode
+  parent: {
+    delimiter: string
+    maxLength: number
+  }
+  child: {
+    delimiter: string
+    maxLength: number
+  }
+}
+
+export const defaultParentChildConfig: ParentChildConfig = {
+  chunkForContext: 'paragraph',
+  parent: {
+    delimiter: '\\n\\n',
+    maxLength: 1024,
+  },
+  child: {
+    delimiter: '\\n',
+    maxLength: 512,
+  },
+}
+
+export type UseSegmentationStateOptions = {
+  initialSegmentationType?: ProcessMode
+}
+
+export const useSegmentationState = (options: UseSegmentationStateOptions = {}) => {
+  const { initialSegmentationType } = options
+
+  // Segmentation type (general or parent-child)
+  const [segmentationType, setSegmentationType] = useState<ProcessMode>(
+    initialSegmentationType ?? ProcessMode.general,
+  )
+
+  // General chunking settings
+  const [segmentIdentifier, doSetSegmentIdentifier] = useState(DEFAULT_SEGMENT_IDENTIFIER)
+  const [maxChunkLength, setMaxChunkLength] = useState(DEFAULT_MAXIMUM_CHUNK_LENGTH)
+  const [limitMaxChunkLength, setLimitMaxChunkLength] = useState(MAXIMUM_CHUNK_TOKEN_LENGTH)
+  const [overlap, setOverlap] = useState(DEFAULT_OVERLAP)
+
+  // Pre-processing rules
+  const [rules, setRules] = useState<PreProcessingRule[]>([])
+  const [defaultConfig, setDefaultConfig] = useState<Rules>()
+
+  // Parent-child config
+  const [parentChildConfig, setParentChildConfig] = useState<ParentChildConfig>(defaultParentChildConfig)
+
+  // Escaped segment identifier setter
+  const setSegmentIdentifier = useCallback((value: string, canEmpty?: boolean) => {
+    if (value) {
+      doSetSegmentIdentifier(escape(value))
+    }
+    else {
+      doSetSegmentIdentifier(canEmpty ? '' : DEFAULT_SEGMENT_IDENTIFIER)
+    }
+  }, [])
+
+  // Rule toggle handler
+  const toggleRule = useCallback((id: string) => {
+    setRules(prev => prev.map(rule =>
+      rule.id === id ? { ...rule, enabled: !rule.enabled } : rule,
+    ))
+  }, [])
+
+  // Reset to defaults
+  const resetToDefaults = useCallback(() => {
+    if (defaultConfig) {
+      setSegmentIdentifier(defaultConfig.segmentation.separator)
+      setMaxChunkLength(defaultConfig.segmentation.max_tokens)
+      setOverlap(defaultConfig.segmentation.chunk_overlap!)
+      setRules(defaultConfig.pre_processing_rules)
+    }
+    setParentChildConfig(defaultParentChildConfig)
+  }, [defaultConfig, setSegmentIdentifier])
+
+  // Apply config from document detail
+  const applyConfigFromRules = useCallback((rulesConfig: Rules, isHierarchical: boolean) => {
+    const separator = rulesConfig.segmentation.separator
+    const max = rulesConfig.segmentation.max_tokens
+    const chunkOverlap = rulesConfig.segmentation.chunk_overlap
+
+    setSegmentIdentifier(separator)
+    setMaxChunkLength(max)
+    setOverlap(chunkOverlap!)
+    setRules(rulesConfig.pre_processing_rules)
+    setDefaultConfig(rulesConfig)
+
+    if (isHierarchical) {
+      setParentChildConfig({
+        chunkForContext: rulesConfig.parent_mode || 'paragraph',
+        parent: {
+          delimiter: escape(rulesConfig.segmentation.separator),
+          maxLength: rulesConfig.segmentation.max_tokens,
+        },
+        child: {
+          delimiter: escape(rulesConfig.subchunk_segmentation!.separator),
+          maxLength: rulesConfig.subchunk_segmentation!.max_tokens,
+        },
+      })
+    }
+  }, [setSegmentIdentifier])
+
+  // Get process rule for API
+  const getProcessRule = useCallback((docForm: ChunkingMode): ProcessRule => {
+    if (docForm === ChunkingMode.parentChild) {
+      return {
+        rules: {
+          pre_processing_rules: rules,
+          segmentation: {
+            separator: unescape(parentChildConfig.parent.delimiter),
+            max_tokens: parentChildConfig.parent.maxLength,
+          },
+          parent_mode: parentChildConfig.chunkForContext,
+          subchunk_segmentation: {
+            separator: unescape(parentChildConfig.child.delimiter),
+            max_tokens: parentChildConfig.child.maxLength,
+          },
+        },
+        mode: 'hierarchical',
+      } as ProcessRule
+    }
+
+    return {
+      rules: {
+        pre_processing_rules: rules,
+        segmentation: {
+          separator: unescape(segmentIdentifier),
+          max_tokens: maxChunkLength,
+          chunk_overlap: overlap,
+        },
+      },
+      mode: segmentationType,
+    } as ProcessRule
+  }, [rules, parentChildConfig, segmentIdentifier, maxChunkLength, overlap, segmentationType])
+
+  // Update parent config field
+  const updateParentConfig = useCallback((field: 'delimiter' | 'maxLength', value: string | number) => {
+    setParentChildConfig((prev) => {
+      let newValue: string | number
+      if (field === 'delimiter')
+        newValue = value ? escape(value as string) : ''
+      else
+        newValue = value
+      return {
+        ...prev,
+        parent: { ...prev.parent, [field]: newValue },
+      }
+    })
+  }, [])
+
+  // Update child config field
+  const updateChildConfig = useCallback((field: 'delimiter' | 'maxLength', value: string | number) => {
+    setParentChildConfig((prev) => {
+      let newValue: string | number
+      if (field === 'delimiter')
+        newValue = value ? escape(value as string) : ''
+      else
+        newValue = value
+      return {
+        ...prev,
+        child: { ...prev.child, [field]: newValue },
+      }
+    })
+  }, [])
+
+  // Set chunk for context mode
+  const setChunkForContext = useCallback((mode: ParentMode) => {
+    setParentChildConfig(prev => ({ ...prev, chunkForContext: mode }))
+  }, [])
+
+  return {
+    // General chunking state
+    segmentationType,
+    setSegmentationType,
+    segmentIdentifier,
+    setSegmentIdentifier,
+    maxChunkLength,
+    setMaxChunkLength,
+    limitMaxChunkLength,
+    setLimitMaxChunkLength,
+    overlap,
+    setOverlap,
+
+    // Rules
+    rules,
+    setRules,
+    defaultConfig,
+    setDefaultConfig,
+    toggleRule,
+
+    // Parent-child config
+    parentChildConfig,
+    setParentChildConfig,
+    updateParentConfig,
+    updateChildConfig,
+    setChunkForContext,
+
+    // Actions
+    resetToDefaults,
+    applyConfigFromRules,
+    getProcessRule,
+  }
+}
+
+export type SegmentationState = ReturnType<typeof useSegmentationState>
--- a/web/app/components/datasets/create/step-two/index.spec.tsx
+++ b/web/app/components/datasets/create/step-two/index.spec.tsx
--- a/web/app/components/datasets/create/step-two/index.tsx
+++ b/web/app/components/datasets/create/step-two/index.tsx
--- a/web/app/components/datasets/create/step-two/types.ts
+++ b/web/app/components/datasets/create/step-two/types.ts
@@ -0,0 +1,28 @@
+import type { IndexingType } from './hooks'
+import type { DataSourceProvider, NotionPage } from '@/models/common'
+import type { CrawlOptions, CrawlResultItem, createDocumentResponse, CustomFile, DataSourceType, FullDocumentDetail } from '@/models/datasets'
+import type { RETRIEVE_METHOD } from '@/types/app'
+
+export type StepTwoProps = {
+  isSetting?: boolean
+  documentDetail?: FullDocumentDetail
+  isAPIKeySet: boolean
+  onSetting: () => void
+  datasetId?: string
+  indexingType?: IndexingType
+  retrievalMethod?: string
+  dataSourceType: DataSourceType
+  files: CustomFile[]
+  notionPages?: NotionPage[]
+  notionCredentialId: string
+  websitePages?: CrawlResultItem[]
+  crawlOptions?: CrawlOptions
+  websiteCrawlProvider?: DataSourceProvider
+  websiteCrawlJobId?: string
+  onStepChange?: (delta: number) => void
+  updateIndexingTypeCache?: (type: string) => void
+  updateRetrievalMethodCache?: (method: RETRIEVE_METHOD | '') => void
+  updateResultCache?: (res: createDocumentResponse) => void
+  onSave?: () => void
+  onCancel?: () => void
+}
--- a/web/app/components/datasets/documents/components/documents-header.tsx
+++ b/web/app/components/datasets/documents/components/documents-header.tsx
@@ -0,0 +1,201 @@
+'use client'
+import type { FC } from 'react'
+import type { Item } from '@/app/components/base/select'
+import type { BuiltInMetadataItem, MetadataItemWithValueLength } from '@/app/components/datasets/metadata/types'
+import type { SortType } from '@/service/datasets'
+import { PlusIcon } from '@heroicons/react/24/solid'
+import { RiDraftLine, RiExternalLinkLine } from '@remixicon/react'
+import { useMemo } from 'react'
+import { useTranslation } from 'react-i18next'
+import Button from '@/app/components/base/button'
+import Chip from '@/app/components/base/chip'
+import Input from '@/app/components/base/input'
+import Sort from '@/app/components/base/sort'
+import AutoDisabledDocument from '@/app/components/datasets/common/document-status-with-action/auto-disabled-document'
+import IndexFailed from '@/app/components/datasets/common/document-status-with-action/index-failed'
+import StatusWithAction from '@/app/components/datasets/common/document-status-with-action/status-with-action'
+import DatasetMetadataDrawer from '@/app/components/datasets/metadata/metadata-dataset/dataset-metadata-drawer'
+import { useDocLink } from '@/context/i18n'
+import { DataSourceType } from '@/models/datasets'
+import { useIndexStatus } from '../status-item/hooks'
+
+type DocumentsHeaderProps = {
+  // Dataset info
+  datasetId: string
+  dataSourceType?: DataSourceType
+  embeddingAvailable: boolean
+  isFreePlan: boolean
+
+  // Filter & sort
+  statusFilterValue: string
+  sortValue: SortType
+  inputValue: string
+  onStatusFilterChange: (value: string) => void
+  onStatusFilterClear: () => void
+  onSortChange: (value: string) => void
+  onInputChange: (value: string) => void
+
+  // Metadata modal
+  isShowEditMetadataModal: boolean
+  showEditMetadataModal: () => void
+  hideEditMetadataModal: () => void
+  datasetMetaData?: MetadataItemWithValueLength[]
+  builtInMetaData?: BuiltInMetadataItem[]
+  builtInEnabled: boolean
+  onAddMetaData: (payload: BuiltInMetadataItem) => Promise<void>
+  onRenameMetaData: (payload: MetadataItemWithValueLength) => Promise<void>
+  onDeleteMetaData: (metaDataId: string) => Promise<void>
+  onBuiltInEnabledChange: (enabled: boolean) => void
+
+  // Actions
+  onAddDocument: () => void
+}
+
+const DocumentsHeader: FC<DocumentsHeaderProps> = ({
+  datasetId,
+  dataSourceType,
+  embeddingAvailable,
+  isFreePlan,
+  statusFilterValue,
+  sortValue,
+  inputValue,
+  onStatusFilterChange,
+  onStatusFilterClear,
+  onSortChange,
+  onInputChange,
+  isShowEditMetadataModal,
+  showEditMetadataModal,
+  hideEditMetadataModal,
+  datasetMetaData,
+  builtInMetaData,
+  builtInEnabled,
+  onAddMetaData,
+  onRenameMetaData,
+  onDeleteMetaData,
+  onBuiltInEnabledChange,
+  onAddDocument,
+}) => {
+  const { t } = useTranslation()
+  const docLink = useDocLink()
+  const DOC_INDEX_STATUS_MAP = useIndexStatus()
+
+  const isDataSourceNotion = dataSourceType === DataSourceType.NOTION
+  const isDataSourceWeb = dataSourceType === DataSourceType.WEB
+
+  const statusFilterItems: Item[] = useMemo(() => [
+    { value: 'all', name: t('list.index.all', { ns: 'datasetDocuments' }) as string },
+    { value: 'queuing', name: DOC_INDEX_STATUS_MAP.queuing.text },
+    { value: 'indexing', name: DOC_INDEX_STATUS_MAP.indexing.text },
+    { value: 'paused', name: DOC_INDEX_STATUS_MAP.paused.text },
+    { value: 'error', name: DOC_INDEX_STATUS_MAP.error.text },
+    { value: 'available', name: DOC_INDEX_STATUS_MAP.available.text },
+    { value: 'enabled', name: DOC_INDEX_STATUS_MAP.enabled.text },
+    { value: 'disabled', name: DOC_INDEX_STATUS_MAP.disabled.text },
+    { value: 'archived', name: DOC_INDEX_STATUS_MAP.archived.text },
+  ], [DOC_INDEX_STATUS_MAP, t])
+
+  const sortItems: Item[] = useMemo(() => [
+    { value: 'created_at', name: t('list.sort.uploadTime', { ns: 'datasetDocuments' }) as string },
+    { value: 'hit_count', name: t('list.sort.hitCount', { ns: 'datasetDocuments' }) as string },
+  ], [t])
+
+  // Determine add button text based on data source type
+  const addButtonText = useMemo(() => {
+    if (isDataSourceNotion)
+      return t('list.addPages', { ns: 'datasetDocuments' })
+    if (isDataSourceWeb)
+      return t('list.addUrl', { ns: 'datasetDocuments' })
+    return t('list.addFile', { ns: 'datasetDocuments' })
+  }, [isDataSourceNotion, isDataSourceWeb, t])
+
+  return (
+    <>
+      {/* Title section */}
+      <div className="flex flex-col justify-center gap-1 px-6 pt-4">
+        <h1 className="text-base font-semibold text-text-primary">
+          {t('list.title', { ns: 'datasetDocuments' })}
+        </h1>
+        <div className="flex items-center space-x-0.5 text-sm font-normal text-text-tertiary">
+          <span>{t('list.desc', { ns: 'datasetDocuments' })}</span>
+          <a
+            className="flex items-center text-text-accent"
+            target="_blank"
+            rel="noopener noreferrer"
+            href={docLink('/guides/knowledge-base/integrate-knowledge-within-application')}
+          >
+            <span>{t('list.learnMore', { ns: 'datasetDocuments' })}</span>
+            <RiExternalLinkLine className="h-3 w-3" />
+          </a>
+        </div>
+      </div>
+
+      {/* Toolbar section */}
+      <div className="flex flex-wrap items-center justify-between px-6 pt-4">
+        {/* Left: Filters */}
+        <div className="flex items-center gap-2">
+          <Chip
+            className="w-[160px]"
+            showLeftIcon={false}
+            value={statusFilterValue}
+            items={statusFilterItems}
+            onSelect={item => onStatusFilterChange(item?.value ? String(item.value) : '')}
+            onClear={onStatusFilterClear}
+          />
+          <Input
+            showLeftIcon
+            showClearIcon
+            wrapperClassName="!w-[200px]"
+            value={inputValue}
+            onChange={e => onInputChange(e.target.value)}
+            onClear={() => onInputChange('')}
+          />
+          <div className="h-3.5 w-px bg-divider-regular"></div>
+          <Sort
+            order={sortValue.startsWith('-') ? '-' : ''}
+            value={sortValue.replace('-', '')}
+            items={sortItems}
+            onSelect={value => onSortChange(String(value))}
+          />
+        </div>
+
+        {/* Right: Actions */}
+        <div className="flex !h-8 items-center justify-center gap-2">
+          {!isFreePlan && <AutoDisabledDocument datasetId={datasetId} />}
+          <IndexFailed datasetId={datasetId} />
+          {!embeddingAvailable && (
+            <StatusWithAction
+              type="warning"
+              description={t('embeddingModelNotAvailable', { ns: 'dataset' })}
+            />
+          )}
+          {embeddingAvailable && (
+            <Button variant="secondary" className="shrink-0" onClick={showEditMetadataModal}>
+              <RiDraftLine className="mr-1 size-4" />
+              {t('metadata.metadata', { ns: 'dataset' })}
+            </Button>
+          )}
+          {isShowEditMetadataModal && (
+            <DatasetMetadataDrawer
+              userMetadata={datasetMetaData ?? []}
+              onClose={hideEditMetadataModal}
+              onAdd={onAddMetaData}
+              onRename={onRenameMetaData}
+              onRemove={onDeleteMetaData}
+              builtInMetadata={builtInMetaData ?? []}
+              isBuiltInEnabled={builtInEnabled}
+              onIsBuiltInEnabledChange={onBuiltInEnabledChange}
+            />
+          )}
+          {embeddingAvailable && (
+            <Button variant="primary" onClick={onAddDocument} className="shrink-0">
+              <PlusIcon className="mr-2 h-4 w-4 stroke-current" />
+              {addButtonText}
+            </Button>
+          )}
+        </div>
+      </div>
+    </>
+  )
+}
+
+export default DocumentsHeader
--- a/web/app/components/datasets/documents/components/empty-element.tsx
+++ b/web/app/components/datasets/documents/components/empty-element.tsx
@@ -0,0 +1,41 @@
+'use client'
+import type { FC } from 'react'
+import { PlusIcon } from '@heroicons/react/24/solid'
+import { useTranslation } from 'react-i18next'
+import Button from '@/app/components/base/button'
+import s from '../style.module.css'
+import { FolderPlusIcon, NotionIcon, ThreeDotsIcon } from './icons'
+
+type EmptyElementProps = {
+  canAdd: boolean
+  onClick: () => void
+  type?: 'upload' | 'sync'
+}
+
+const EmptyElement: FC<EmptyElementProps> = ({ canAdd = true, onClick, type = 'upload' }) => {
+  const { t } = useTranslation()
+  return (
+    <div className={s.emptyWrapper}>
+      <div className={s.emptyElement}>
+        <div className={s.emptySymbolIconWrapper}>
+          {type === 'upload' ? <FolderPlusIcon /> : <NotionIcon />}
+        </div>
+        <span className={s.emptyTitle}>
+          {t('list.empty.title', { ns: 'datasetDocuments' })}
+          <ThreeDotsIcon className="relative -left-1.5 -top-3 inline" />
+        </span>
+        <div className={s.emptyTip}>
+          {t(`list.empty.${type}.tip`, { ns: 'datasetDocuments' })}
+        </div>
+        {type === 'upload' && canAdd && (
+          <Button onClick={onClick} className={s.addFileBtn} variant="secondary-accent">
+            <PlusIcon className={s.plusIcon} />
+            {t('list.addFile', { ns: 'datasetDocuments' })}
+          </Button>
+        )}
+      </div>
+    </div>
+  )
+}
+
+export default EmptyElement
--- a/web/app/components/datasets/documents/components/icons.tsx
+++ b/web/app/components/datasets/documents/components/icons.tsx
@@ -0,0 +1,34 @@
+import type * as React from 'react'
+
+export const FolderPlusIcon = ({ className }: React.SVGProps<SVGElement>) => {
+  return (
+    <svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
+      <path d="M10.8332 5.83333L9.90355 3.9741C9.63601 3.439 9.50222 3.17144 9.30265 2.97597C9.12615 2.80311 8.91344 2.67164 8.6799 2.59109C8.41581 2.5 8.11668 2.5 7.51841 2.5H4.33317C3.39975 2.5 2.93304 2.5 2.57652 2.68166C2.26292 2.84144 2.00795 3.09641 1.84816 3.41002C1.6665 3.76654 1.6665 4.23325 1.6665 5.16667V5.83333M1.6665 5.83333H14.3332C15.7333 5.83333 16.4334 5.83333 16.9681 6.10582C17.4386 6.3455 17.821 6.72795 18.0607 7.19836C18.3332 7.73314 18.3332 8.4332 18.3332 9.83333V13.5C18.3332 14.9001 18.3332 15.6002 18.0607 16.135C17.821 16.6054 17.4386 16.9878 16.9681 17.2275C16.4334 17.5 15.7333 17.5 14.3332 17.5H5.6665C4.26637 17.5 3.56631 17.5 3.03153 17.2275C2.56112 16.9878 2.17867 16.6054 1.93899 16.135C1.6665 15.6002 1.6665 14.9001 1.6665 13.5V5.83333ZM9.99984 14.1667V9.16667M7.49984 11.6667H12.4998" stroke="#667085" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
+    </svg>
+  )
+}
+
+export const ThreeDotsIcon = ({ className }: React.SVGProps<SVGElement>) => {
+  return (
+    <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
+      <path d="M5 6.5V5M8.93934 7.56066L10 6.5M10.0103 11.5H11.5103" stroke="#374151" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" />
+    </svg>
+  )
+}
+
+export const NotionIcon = ({ className }: React.SVGProps<SVGElement>) => {
+  return (
+    <svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
+      <g clipPath="url(#clip0_2164_11263)">
+        <path fillRule="evenodd" clipRule="evenodd" d="M3.5725 18.2611L1.4229 15.5832C0.905706 14.9389 0.625 14.1466 0.625 13.3312V3.63437C0.625 2.4129 1.60224 1.39936 2.86295 1.31328L12.8326 0.632614C13.5569 0.583164 14.2768 0.775682 14.8717 1.17794L18.3745 3.5462C19.0015 3.97012 19.375 4.66312 19.375 5.40266V16.427C19.375 17.6223 18.4141 18.6121 17.1798 18.688L6.11458 19.3692C5.12958 19.4298 4.17749 19.0148 3.5725 18.2611Z" fill="white" />
+        <path d="M7.03006 8.48669V8.35974C7.03006 8.03794 7.28779 7.77104 7.61997 7.74886L10.0396 7.58733L13.3857 12.5147V8.19009L12.5244 8.07528V8.01498C12.5244 7.68939 12.788 7.42074 13.1244 7.4035L15.326 7.29073V7.60755C15.326 7.75628 15.2154 7.88349 15.0638 7.90913L14.534 7.99874V15.0023L13.8691 15.231C13.3136 15.422 12.6952 15.2175 12.3772 14.7377L9.12879 9.83574V14.5144L10.1287 14.7057L10.1147 14.7985C10.0711 15.089 9.82028 15.3087 9.51687 15.3222L7.03006 15.4329C6.99718 15.1205 7.23132 14.841 7.55431 14.807L7.88143 14.7727V8.53453L7.03006 8.48669Z" fill="black" />
+        <path fillRule="evenodd" clipRule="evenodd" d="M12.9218 1.85424L2.95217 2.53491C2.35499 2.57568 1.89209 3.05578 1.89209 3.63437V13.3312C1.89209 13.8748 2.07923 14.403 2.42402 14.8325L4.57362 17.5104C4.92117 17.9434 5.46812 18.1818 6.03397 18.147L17.0991 17.4658C17.6663 17.4309 18.1078 16.9762 18.1078 16.427V5.40266C18.1078 5.06287 17.9362 4.74447 17.6481 4.54969L14.1453 2.18143C13.7883 1.94008 13.3564 1.82457 12.9218 1.85424ZM3.44654 3.78562C3.30788 3.68296 3.37387 3.46909 3.54806 3.4566L12.9889 2.77944C13.2897 2.75787 13.5886 2.8407 13.8318 3.01305L15.7261 4.35508C15.798 4.40603 15.7642 4.51602 15.6752 4.52086L5.67742 5.0646C5.37485 5.08106 5.0762 4.99217 4.83563 4.81406L3.44654 3.78562ZM5.20848 6.76919C5.20848 6.4444 5.47088 6.1761 5.80642 6.15783L16.3769 5.58216C16.7039 5.56435 16.9792 5.81583 16.9792 6.13239V15.6783C16.9792 16.0025 16.7177 16.2705 16.3829 16.2896L5.8793 16.8872C5.51537 16.9079 5.20848 16.6283 5.20848 16.2759V6.76919Z" fill="black" />
+      </g>
+      <defs>
+        <clipPath id="clip0_2164_11263">
+          <rect width="20" height="20" fill="white" />
+        </clipPath>
+      </defs>
+    </svg>
+  )
+}
--- a/web/app/components/datasets/documents/components/list.tsx
+++ b/web/app/components/datasets/documents/components/list.tsx
@@ -16,13 +16,16 @@ import * as React from 'react'
 import { useCallback, useEffect, useMemo, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import Checkbox from '@/app/components/base/checkbox'
+import FileTypeIcon from '@/app/components/base/file-uploader/file-type-icon'
 import NotionIcon from '@/app/components/base/notion-icon'
 import Pagination from '@/app/components/base/pagination'
 import Toast from '@/app/components/base/toast'
 import Tooltip from '@/app/components/base/tooltip'
+import ChunkingModeLabel from '@/app/components/datasets/common/chunking-mode-label'
 import { normalizeStatusForQuery } from '@/app/components/datasets/documents/status-filter'
 import { extensionToFileType } from '@/app/components/datasets/hit-testing/utils/extension-to-file-type'
 import EditMetadataBatchModal from '@/app/components/datasets/metadata/edit-metadata-batch/modal'
+import useBatchEditDocumentMetadata from '@/app/components/datasets/metadata/hooks/use-batch-edit-document-metadata'
 import { useDatasetDetailContextWithSelector as useDatasetDetailContext } from '@/context/dataset-detail'
 import useTimestamp from '@/hooks/use-timestamp'
 import { ChunkingMode, DataSourceType, DocumentActionType } from '@/models/datasets'
@@ -31,14 +34,11 @@ import { useDocumentArchive, useDocumentBatchRetryIndex, useDocumentDelete, useD
 import { asyncRunSafe } from '@/utils'
 import { cn } from '@/utils/classnames'
 import { formatNumber } from '@/utils/format'
-import FileTypeIcon from '../../base/file-uploader/file-type-icon'
-import ChunkingModeLabel from '../common/chunking-mode-label'
-import useBatchEditDocumentMetadata from '../metadata/hooks/use-batch-edit-document-metadata'
-import BatchAction from './detail/completed/common/batch-action'
+import BatchAction from '../detail/completed/common/batch-action'
+import StatusItem from '../status-item'
+import s from '../style.module.css'
 import Operations from './operations'
 import RenameModal from './rename-modal'
-import StatusItem from './status-item'
-import s from './style.module.css'

 export const renderTdValue = (value: string | number | null, isEmptyStyle = false) => {
  return (
--- a/web/app/components/datasets/documents/components/operations.tsx
+++ b/web/app/components/datasets/documents/components/operations.tsx
@@ -1,4 +1,4 @@
-import type { OperationName } from './types'
+import type { OperationName } from '../types'
 import type { CommonResponse } from '@/models/common'
 import {
  RiArchive2Line,
@@ -17,6 +17,12 @@ import * as React from 'react'
 import { useCallback, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import { useContext } from 'use-context-selector'
+import Confirm from '@/app/components/base/confirm'
+import Divider from '@/app/components/base/divider'
+import CustomPopover from '@/app/components/base/popover'
+import Switch from '@/app/components/base/switch'
+import { ToastContext } from '@/app/components/base/toast'
+import Tooltip from '@/app/components/base/tooltip'
 import { DataSourceType, DocumentActionType } from '@/models/datasets'
 import {
  useDocumentArchive,
@@ -31,14 +37,8 @@ import {
 } from '@/service/knowledge/use-document'
 import { asyncRunSafe } from '@/utils'
 import { cn } from '@/utils/classnames'
-import Confirm from '../../base/confirm'
-import Divider from '../../base/divider'
-import CustomPopover from '../../base/popover'
-import Switch from '../../base/switch'
-import { ToastContext } from '../../base/toast'
-import Tooltip from '../../base/tooltip'
+import s from '../style.module.css'
 import RenameModal from './rename-modal'
-import s from './style.module.css'

 type OperationsProps = {
  embeddingAvailable: boolean
--- a/web/app/components/datasets/documents/components/rename-modal.tsx
+++ b/web/app/components/datasets/documents/components/rename-modal.tsx
@@ -7,8 +7,8 @@ import { useTranslation } from 'react-i18next'
 import Button from '@/app/components/base/button'
 import Input from '@/app/components/base/input'
 import Modal from '@/app/components/base/modal'
+import Toast from '@/app/components/base/toast'
 import { renameDocumentName } from '@/service/datasets'
-import Toast from '../../base/toast'

 type Props = {
  datasetId: string
--- a/web/app/components/datasets/documents/detail/index.tsx
+++ b/web/app/components/datasets/documents/detail/index.tsx
@@ -18,7 +18,7 @@ import { useDocumentDetail, useDocumentMetadata, useInvalidDocumentList } from '
 import { useCheckSegmentBatchImportProgress, useChildSegmentListKey, useSegmentBatchImport, useSegmentListKey } from '@/service/knowledge/use-segment'
 import { useInvalid } from '@/service/use-base'
 import { cn } from '@/utils/classnames'
-import Operations from '../operations'
+import Operations from '../components/operations'
 import StatusItem from '../status-item'
 import BatchModal from './batch-modal'
 import Completed from './completed'
--- a/web/app/components/datasets/documents/hooks/use-documents-page-state.ts
+++ b/web/app/components/datasets/documents/hooks/use-documents-page-state.ts
@@ -0,0 +1,197 @@
+import type { DocumentListResponse } from '@/models/datasets'
+import type { SortType } from '@/service/datasets'
+import { useDebounce, useDebounceFn } from 'ahooks'
+import { useCallback, useEffect, useMemo, useState } from 'react'
+import { normalizeStatusForQuery, sanitizeStatusValue } from '../status-filter'
+import useDocumentListQueryState from './use-document-list-query-state'
+
+/**
+ * Custom hook to manage documents page state including:
+ * - Search state (input value, debounced search value)
+ * - Filter state (status filter, sort value)
+ * - Pagination state (current page, limit)
+ * - Selection state (selected document ids)
+ * - Polling state (timer control for auto-refresh)
+ */
+export function useDocumentsPageState() {
+  const { query, updateQuery } = useDocumentListQueryState()
+
+  // Search state
+  const [inputValue, setInputValue] = useState<string>('')
+  const [searchValue, setSearchValue] = useState<string>('')
+  const debouncedSearchValue = useDebounce(searchValue, { wait: 500 })
+
+  // Filter & sort state
+  const [statusFilterValue, setStatusFilterValue] = useState<string>(() => sanitizeStatusValue(query.status))
+  const [sortValue, setSortValue] = useState<SortType>(query.sort)
+  const normalizedStatusFilterValue = useMemo(
+    () => normalizeStatusForQuery(statusFilterValue),
+    [statusFilterValue],
+  )
+
+  // Pagination state
+  const [currPage, setCurrPage] = useState<number>(query.page - 1)
+  const [limit, setLimit] = useState<number>(query.limit)
+
+  // Selection state
+  const [selectedIds, setSelectedIds] = useState<string[]>([])
+
+  // Polling state
+  const [timerCanRun, setTimerCanRun] = useState(true)
+
+  // Initialize search value from URL on mount
+  useEffect(() => {
+    if (query.keyword) {
+      setInputValue(query.keyword)
+      setSearchValue(query.keyword)
+    }
+  }, []) // Only run on mount
+
+  // Sync local state with URL query changes
+  useEffect(() => {
+    setCurrPage(query.page - 1)
+    setLimit(query.limit)
+    if (query.keyword !== searchValue) {
+      setInputValue(query.keyword)
+      setSearchValue(query.keyword)
+    }
+    setStatusFilterValue((prev) => {
+      const nextValue = sanitizeStatusValue(query.status)
+      return prev === nextValue ? prev : nextValue
+    })
+    setSortValue(query.sort)
+  }, [query])
+
+  // Update URL when search changes
+  useEffect(() => {
+    if (debouncedSearchValue !== query.keyword) {
+      setCurrPage(0)
+      updateQuery({ keyword: debouncedSearchValue, page: 1 })
+    }
+  }, [debouncedSearchValue, query.keyword, updateQuery])
+
+  // Clear selection when search changes
+  useEffect(() => {
+    if (searchValue !== query.keyword)
+      setSelectedIds([])
+  }, [searchValue, query.keyword])
+
+  // Clear selection when status filter changes
+  useEffect(() => {
+    setSelectedIds([])
+  }, [normalizedStatusFilterValue])
+
+  // Page change handler
+  const handlePageChange = useCallback((newPage: number) => {
+    setCurrPage(newPage)
+    updateQuery({ page: newPage + 1 })
+  }, [updateQuery])
+
+  // Limit change handler
+  const handleLimitChange = useCallback((newLimit: number) => {
+    setLimit(newLimit)
+    setCurrPage(0)
+    updateQuery({ limit: newLimit, page: 1 })
+  }, [updateQuery])
+
+  // Debounced search handler
+  const { run: handleSearch } = useDebounceFn(() => {
+    setSearchValue(inputValue)
+  }, { wait: 500 })
+
+  // Input change handler
+  const handleInputChange = useCallback((value: string) => {
+    setInputValue(value)
+    handleSearch()
+  }, [handleSearch])
+
+  // Status filter change handler
+  const handleStatusFilterChange = useCallback((value: string) => {
+    const selectedValue = sanitizeStatusValue(value)
+    setStatusFilterValue(selectedValue)
+    setCurrPage(0)
+    updateQuery({ status: selectedValue, page: 1 })
+  }, [updateQuery])
+
+  // Status filter clear handler
+  const handleStatusFilterClear = useCallback(() => {
+    if (statusFilterValue === 'all')
+      return
+    setStatusFilterValue('all')
+    setCurrPage(0)
+    updateQuery({ status: 'all', page: 1 })
+  }, [statusFilterValue, updateQuery])
+
+  // Sort change handler
+  const handleSortChange = useCallback((value: string) => {
+    const next = value as SortType
+    if (next === sortValue)
+      return
+    setSortValue(next)
+    setCurrPage(0)
+    updateQuery({ sort: next, page: 1 })
+  }, [sortValue, updateQuery])
+
+  // Update polling state based on documents response
+  const updatePollingState = useCallback((documentsRes: DocumentListResponse | undefined) => {
+    if (!documentsRes?.data)
+      return
+
+    let completedNum = 0
+    documentsRes.data.forEach((documentItem) => {
+      const { indexing_status } = documentItem
+      const isEmbedded = indexing_status === 'completed' || indexing_status === 'paused' || indexing_status === 'error'
+      if (isEmbedded)
+        completedNum++
+    })
+
+    const hasIncompleteDocuments = completedNum !== documentsRes.data.length
+    const transientStatuses = ['queuing', 'indexing', 'paused']
+    const shouldForcePolling = normalizedStatusFilterValue === 'all'
+      ? false
+      : transientStatuses.includes(normalizedStatusFilterValue)
+    setTimerCanRun(shouldForcePolling || hasIncompleteDocuments)
+  }, [normalizedStatusFilterValue])
+
+  // Adjust page when total pages change
+  const adjustPageForTotal = useCallback((documentsRes: DocumentListResponse | undefined) => {
+    if (!documentsRes)
+      return
+    const totalPages = Math.ceil(documentsRes.total / limit)
+    if (currPage > 0 && currPage + 1 > totalPages)
+      handlePageChange(totalPages > 0 ? totalPages - 1 : 0)
+  }, [limit, currPage, handlePageChange])
+
+  return {
+    // Search state
+    inputValue,
+    searchValue,
+    debouncedSearchValue,
+    handleInputChange,
+
+    // Filter & sort state
+    statusFilterValue,
+    sortValue,
+    normalizedStatusFilterValue,
+    handleStatusFilterChange,
+    handleStatusFilterClear,
+    handleSortChange,
+
+    // Pagination state
+    currPage,
+    limit,
+    handlePageChange,
+    handleLimitChange,
+
+    // Selection state
+    selectedIds,
+    setSelectedIds,
+
+    // Polling state
+    timerCanRun,
+    updatePollingState,
+    adjustPageForTotal,
+  }
+}
+
+export default useDocumentsPageState
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Coding On Star	c49e0d38ed	Merge branch 'main' into refactor/documents	2026-01-13 15:09:39 +08:00
CodingOnStar	b56777914d	fix(web): enhance security for external links and improve pagination	2026-01-13 15:09:08 +08:00
呆萌闷油瓶	9be863fefa	fix: missing content if assistant message with tool_calls (#30083 ) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-01-13 12:46:33 +08:00
Coding On Star	8f43629cd8	fix(amplitude): update sessionReplaySampleRate default value to 0.5 (#30880 ) Co-authored-by: CodingOnStar <hanxujiang@dify.ai>	2026-01-13 12:26:50 +08:00
wangxiaolei	9ee71902c1	fix: fix formatNumber accuracy (#30877 )	2026-01-13 11:51:15 +08:00
hsiong	a012c87445	fix: entrypoint.sh overrides NEXT_PUBLIC_TEXT_GENERATION_TIMEOUT_MS when TEXT_GENERATION_TIMEOUT_MS is unset (#30864 ) (#30865 )	2026-01-13 10:12:51 +08:00
heyszt	450578d4c0	feat(ops): set root span kind for AliyunTrace to enable service-level metrics aggregation (#30728 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-01-13 10:12:00 +08:00
非法操作	837237aa6d	fix: use node factory for single-step workflow nodes (#30859 )	2026-01-13 10:11:18 +08:00
CodingOnStar	532073175a	feat(web): refactor documents component structure and enhance functionality - Extracted and modularized components for better organization: DocumentsHeader, EmptyElement, List, and Operations. - Introduced a custom hook, useDocumentsPageState, to manage document page state including search, filter, and pagination. - Updated the main Documents component to utilize the new structure and improve readability. - Enhanced the user interface with new icons and improved button functionalities for document management.	2026-01-12 16:25:30 +08:00
QuantumGhost	b63dfbf654	fix(api): defer streaming response until referenced variables are updated (#30832 ) Some checks failed autofix.ci / autofix (push) Has been cancelled Details Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Has been cancelled Details Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Has been cancelled Details Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Has been cancelled Details Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Has been cancelled Details Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled Details Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled Details Main CI Pipeline / Check Changed Files (push) Has been cancelled Details Main CI Pipeline / API Tests (push) Has been cancelled Details Main CI Pipeline / Web Tests (push) Has been cancelled Details Main CI Pipeline / Style Check (push) Has been cancelled Details Main CI Pipeline / VDB Tests (push) Has been cancelled Details Main CI Pipeline / DB Migration Test (push) Has been cancelled Details Mark stale issues and pull requests / stale (push) Has been cancelled Details Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-01-12 16:23:18 +08:00
非法操作	51ea87ab85	feat: clear free plan workflow run logs (#29494 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>	2026-01-12 15:57:40 +08:00
Stephen Zhou	00698e41b7	build: limit esbuild, glob, docker base version to avoid cve (#30848 )	2026-01-12 15:33:20 +08:00
QuantumGhost	df938a4543	ci: add HITL test env deployment action (#30846 )	2026-01-12 15:07:53 +08:00
yyh	9161936f41	refactor(web): extract isServer/isClient utility & upgrade Node.js to 22.12.0 (#30803 ) Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com>	2026-01-12 12:57:43 +08:00
Lemonadeccc	f9a21b56ab	feat: add block-no-verify hook for Claude Code (#30839 )	2026-01-12 12:56:05 +08:00
Stephen Zhou	220e1df847	docs(web): add corepack recommendation (#30837 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-01-12 12:44:30 +08:00
dependabot[bot]	8cfdde594c	chore(deps-dev): bump tos from 2.7.2 to 2.9.0 in /api (#30834 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-01-12 12:44:21 +08:00
dependabot[bot]	31a8fd810c	chore(deps-dev): bump @storybook/react from 9.1.13 to 9.1.17 in /web (#30833 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-01-12 12:44:11 +08:00
yihong	9fad97ec9b	fix: drop useless pyrefly in ci (#30826 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2026-01-12 09:45:49 +08:00
wangxiaolei	0c2729d9b3	fix: fix refresh token deadlock (#30828 )	2026-01-12 09:35:31 +08:00
wangxiaolei	a2e03b811e	fix: Broken import in .storybook/preview.tsx (#30812 ) Some checks failed autofix.ci / autofix (push) Has been cancelled Details Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Has been cancelled Details Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Has been cancelled Details Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Has been cancelled Details Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Has been cancelled Details Main CI Pipeline / Check Changed Files (push) Has been cancelled Details Main CI Pipeline / Style Check (push) Has been cancelled Details Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled Details Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled Details Main CI Pipeline / API Tests (push) Has been cancelled Details Main CI Pipeline / Web Tests (push) Has been cancelled Details Main CI Pipeline / VDB Tests (push) Has been cancelled Details Main CI Pipeline / DB Migration Test (push) Has been cancelled Details Mark stale issues and pull requests / stale (push) Has been cancelled Details	2026-01-10 19:49:23 +08:00
-LAN-	1e10bf525c	refactor(models): Refine MessageAgentThought SQLAlchemy typing (#27749 ) Co-authored-by: Asuka Minato <i@asukaminato.eu.org> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-01-10 17:17:45 +09:00
Stephen Zhou	8b1af36d94	feat(web): migrate PWA to Serwist (#30808 )	2026-01-10 17:16:18 +09:00
wangxiaolei	0711dd4159	feat: enhance start node object value check (#30732 ) Some checks failed autofix.ci / autofix (push) Has been cancelled Details Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Has been cancelled Details Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Has been cancelled Details Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Has been cancelled Details Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Has been cancelled Details Main CI Pipeline / Check Changed Files (push) Has been cancelled Details Main CI Pipeline / Style Check (push) Has been cancelled Details Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled Details Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled Details Main CI Pipeline / API Tests (push) Has been cancelled Details Main CI Pipeline / Web Tests (push) Has been cancelled Details Main CI Pipeline / VDB Tests (push) Has been cancelled Details Main CI Pipeline / DB Migration Test (push) Has been cancelled Details Mark stale issues and pull requests / stale (push) Has been cancelled Details	2026-01-09 16:13:17 +08:00
QuantumGhost	ae0a26f5b6	revert: "fix: fix assign value stand as default (#30651 )" (#30717 ) The original fix seems correct on its own. However, for chatflows with multiple answer nodes, the `message_replace` command only preserves the output of the last executed answer node.	2026-01-09 16:08:24 +08:00
Stephen Zhou	d4432ed80f	refactor: marketplace state management (#30702 ) Some checks failed autofix.ci / autofix (push) Has been cancelled Details Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Has been cancelled Details Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Has been cancelled Details Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Has been cancelled Details Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Has been cancelled Details Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled Details Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled Details Main CI Pipeline / Check Changed Files (push) Has been cancelled Details Main CI Pipeline / API Tests (push) Has been cancelled Details Main CI Pipeline / Web Tests (push) Has been cancelled Details Main CI Pipeline / Style Check (push) Has been cancelled Details Main CI Pipeline / VDB Tests (push) Has been cancelled Details Main CI Pipeline / DB Migration Test (push) Has been cancelled Details Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-09 14:31:24 +08:00
lif	9d9f027246	fix(web): invalidate app list cache after deleting app from detail page (#30751 ) Signed-off-by: majiayu000 <1835304752@qq.com>	2026-01-09 14:08:37 +08:00
wangxiaolei	77f097ce76	fix: fix enhance app mode check (#30758 )	2026-01-09 14:07:40 +08:00
Maries	7843afc91c	feat(workflows): add agent-dev deploy workflow (#30774 )	2026-01-09 13:55:49 +08:00
Coding On Star	98df99b0ca	feat(embedding-process): implement embedding process components and polling logic (#30622 ) Co-authored-by: CodingOnStar <hanxujiang@dify.ai>	2026-01-09 10:21:27 +08:00
Coding On Star	9848823dcd	feat: implement step two of dataset creation with comprehensive UI components and hooks (#30681 ) Co-authored-by: CodingOnStar <hanxujiang@dify.ai>	2026-01-09 10:21:18 +08:00
github-actions[bot]	5ad2385799	chore(i18n): sync translations with en-US (#30750 ) Some checks failed autofix.ci / autofix (push) Has been cancelled Details Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Has been cancelled Details Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Has been cancelled Details Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Has been cancelled Details Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Has been cancelled Details Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled Details Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled Details Main CI Pipeline / Check Changed Files (push) Has been cancelled Details Main CI Pipeline / API Tests (push) Has been cancelled Details Main CI Pipeline / Web Tests (push) Has been cancelled Details Main CI Pipeline / Style Check (push) Has been cancelled Details Main CI Pipeline / VDB Tests (push) Has been cancelled Details Main CI Pipeline / DB Migration Test (push) Has been cancelled Details Mark stale issues and pull requests / stale (push) Has been cancelled Details Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com>	2026-01-08 22:53:04 +08:00
yyh	7774a1312e	fix(ci): use repository_dispatch for i18n sync workflow (#30744 )	2026-01-08 21:28:49 +08:00
MkDev11	91d44719f4	fix(web): resolve chat message loading race conditions and infinite loops (#30695 ) Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>	2026-01-08 18:05:32 +08:00
xuwei95	b2cbeeae92	fix(web): restrict postMessage targetOrigin from wildcard to specific origins (#30690 ) Co-authored-by: XW <wei.xu1@wiz.ai>	2026-01-08 17:23:27 +08:00