Merge branch 'main' into refactor/migrate-react-window-to-tanstack-virtual

Merge remote-tracking branch 'origin/main' into refactor/migrate-react-window-to-tanstack-virtual
# Conflicts: # web/pnpm-lock.yaml
2026-02-04 23:13:57 +00:00 · 2026-02-01 14:42:12 +08:00 · 2026-01-30 18:10:09 +08:00 · 2026-01-29 12:34:49 +08:00 · 2026-01-22 10:07:43 +08:00 · 2026-01-21 16:26:20 +08:00
32 changed files with 1364 additions and 1584 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -9,9 +9,6 @@
 # CODEOWNERS file
 /.github/CODEOWNERS @laipz8200 @crazywoola

-# Agents
-/.agents/skills/ @hyoban
-
 # Docs
 /docs/ @crazywoola

--- a/api/.importlinter
+++ b/api/.importlinter
@@ -112,6 +112,7 @@ ignore_imports =
    core.workflow.nodes.datasource.datasource_node -> models.model
    core.workflow.nodes.datasource.datasource_node -> models.tools
    core.workflow.nodes.datasource.datasource_node -> services.datasource_provider_service
+    core.workflow.nodes.document_extractor.node -> configs
    core.workflow.nodes.document_extractor.node -> core.file.file_manager
    core.workflow.nodes.document_extractor.node -> core.helper.ssrf_proxy
    core.workflow.nodes.http_request.entities -> configs
--- a/api/controllers/console/app/annotation.py
+++ b/api/controllers/console/app/annotation.py
@@ -107,11 +107,10 @@ class AnnotationReplyActionApi(Resource):
    def post(self, app_id, action: Literal["enable", "disable"]):
        app_id = str(app_id)
        args = AnnotationReplyPayload.model_validate(console_ns.payload)
-        match action:
-            case "enable":
-                result = AppAnnotationService.enable_app_annotation(args.model_dump(), app_id)
-            case "disable":
-                result = AppAnnotationService.disable_app_annotation(app_id)
+        if action == "enable":
+            result = AppAnnotationService.enable_app_annotation(args.model_dump(), app_id)
+        elif action == "disable":
+            result = AppAnnotationService.disable_app_annotation(app_id)
        return result, 200


--- a/api/controllers/console/auth/oauth_server.py
+++ b/api/controllers/console/auth/oauth_server.py
@@ -155,43 +155,43 @@ class OAuthServerUserTokenApi(Resource):
            grant_type = OAuthGrantType(payload.grant_type)
        except ValueError:
            raise BadRequest("invalid grant_type")
-        match grant_type:
-            case OAuthGrantType.AUTHORIZATION_CODE:
-                if not payload.code:
-                    raise BadRequest("code is required")

-                if payload.client_secret != oauth_provider_app.client_secret:
-                    raise BadRequest("client_secret is invalid")
+        if grant_type == OAuthGrantType.AUTHORIZATION_CODE:
+            if not payload.code:
+                raise BadRequest("code is required")

-                if payload.redirect_uri not in oauth_provider_app.redirect_uris:
-                    raise BadRequest("redirect_uri is invalid")
+            if payload.client_secret != oauth_provider_app.client_secret:
+                raise BadRequest("client_secret is invalid")

-                access_token, refresh_token = OAuthServerService.sign_oauth_access_token(
-                    grant_type, code=payload.code, client_id=oauth_provider_app.client_id
-                )
-                return jsonable_encoder(
-                    {
-                        "access_token": access_token,
-                        "token_type": "Bearer",
-                        "expires_in": OAUTH_ACCESS_TOKEN_EXPIRES_IN,
-                        "refresh_token": refresh_token,
-                    }
-                )
-            case OAuthGrantType.REFRESH_TOKEN:
-                if not payload.refresh_token:
-                    raise BadRequest("refresh_token is required")
+            if payload.redirect_uri not in oauth_provider_app.redirect_uris:
+                raise BadRequest("redirect_uri is invalid")

-                access_token, refresh_token = OAuthServerService.sign_oauth_access_token(
-                    grant_type, refresh_token=payload.refresh_token, client_id=oauth_provider_app.client_id
-                )
-                return jsonable_encoder(
-                    {
-                        "access_token": access_token,
-                        "token_type": "Bearer",
-                        "expires_in": OAUTH_ACCESS_TOKEN_EXPIRES_IN,
-                        "refresh_token": refresh_token,
-                    }
-                )
+            access_token, refresh_token = OAuthServerService.sign_oauth_access_token(
+                grant_type, code=payload.code, client_id=oauth_provider_app.client_id
+            )
+            return jsonable_encoder(
+                {
+                    "access_token": access_token,
+                    "token_type": "Bearer",
+                    "expires_in": OAUTH_ACCESS_TOKEN_EXPIRES_IN,
+                    "refresh_token": refresh_token,
+                }
+            )
+        elif grant_type == OAuthGrantType.REFRESH_TOKEN:
+            if not payload.refresh_token:
+                raise BadRequest("refresh_token is required")
+
+            access_token, refresh_token = OAuthServerService.sign_oauth_access_token(
+                grant_type, refresh_token=payload.refresh_token, client_id=oauth_provider_app.client_id
+            )
+            return jsonable_encoder(
+                {
+                    "access_token": access_token,
+                    "token_type": "Bearer",
+                    "expires_in": OAUTH_ACCESS_TOKEN_EXPIRES_IN,
+                    "refresh_token": refresh_token,
+                }
+            )


@console_ns.route("/oauth/provider/account")
--- a/api/controllers/console/datasets/datasets_document.py
+++ b/api/controllers/console/datasets/datasets_document.py
@@ -1339,18 +1339,6 @@ class DocumentGenerateSummaryApi(Resource):
            missing_ids = set(document_list) - found_ids
            raise NotFound(f"Some documents not found: {list(missing_ids)}")

-        # Update need_summary to True for documents that don't have it set
-        # This handles the case where documents were created when summary_index_setting was disabled
-        documents_to_update = [doc for doc in documents if not doc.need_summary and doc.doc_form != "qa_model"]
-
-        if documents_to_update:
-            document_ids_to_update = [str(doc.id) for doc in documents_to_update]
-            DocumentService.update_documents_need_summary(
-                dataset_id=dataset_id,
-                document_ids=document_ids_to_update,
-                need_summary=True,
-            )
-
        # Dispatch async tasks for each document
        for document in documents:
            # Skip qa_model documents as they don't generate summaries
--- a/api/core/app/apps/common/workflow_response_converter.py
+++ b/api/core/app/apps/common/workflow_response_converter.py
@@ -250,7 +250,7 @@ class WorkflowResponseConverter:
            data=WorkflowFinishStreamResponse.Data(
                id=run_id,
                workflow_id=workflow_id,
-                status=status,
+                status=status.value,
                outputs=encoded_outputs,
                error=error,
                elapsed_time=elapsed_time,
@@ -340,13 +340,13 @@ class WorkflowResponseConverter:
        metadata = self._merge_metadata(event.execution_metadata, snapshot)

        if isinstance(event, QueueNodeSucceededEvent):
-            status = WorkflowNodeExecutionStatus.SUCCEEDED
+            status = WorkflowNodeExecutionStatus.SUCCEEDED.value
            error_message = event.error
        elif isinstance(event, QueueNodeFailedEvent):
-            status = WorkflowNodeExecutionStatus.FAILED
+            status = WorkflowNodeExecutionStatus.FAILED.value
            error_message = event.error
        else:
-            status = WorkflowNodeExecutionStatus.EXCEPTION
+            status = WorkflowNodeExecutionStatus.EXCEPTION.value
            error_message = event.error

        return NodeFinishStreamResponse(
@@ -413,7 +413,7 @@ class WorkflowResponseConverter:
                process_data_truncated=process_data_truncated,
                outputs=outputs,
                outputs_truncated=outputs_truncated,
-                status=WorkflowNodeExecutionStatus.RETRY,
+                status=WorkflowNodeExecutionStatus.RETRY.value,
                error=event.error,
                elapsed_time=elapsed_time,
                execution_metadata=metadata,
--- a/api/core/app/entities/task_entities.py
+++ b/api/core/app/entities/task_entities.py
@@ -7,7 +7,7 @@ from pydantic import BaseModel, ConfigDict, Field
 from core.model_runtime.entities.llm_entities import LLMResult, LLMUsage
 from core.rag.entities.citation_metadata import RetrievalSourceMetadata
 from core.workflow.entities import AgentNodeStrategyInit
-from core.workflow.enums import WorkflowExecutionStatus, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus
+from core.workflow.enums import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus


 class AnnotationReplyAccount(BaseModel):
@@ -223,7 +223,7 @@ class WorkflowFinishStreamResponse(StreamResponse):

        id: str
        workflow_id: str
-        status: WorkflowExecutionStatus
+        status: str
        outputs: Mapping[str, Any] | None = None
        error: str | None = None
        elapsed_time: float
@@ -311,7 +311,7 @@ class NodeFinishStreamResponse(StreamResponse):
        process_data_truncated: bool = False
        outputs: Mapping[str, Any] | None = None
        outputs_truncated: bool = True
-        status: WorkflowNodeExecutionStatus
+        status: str
        error: str | None = None
        elapsed_time: float
        execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] | None = None
@@ -375,7 +375,7 @@ class NodeRetryStreamResponse(StreamResponse):
        process_data_truncated: bool = False
        outputs: Mapping[str, Any] | None = None
        outputs_truncated: bool = False
-        status: WorkflowNodeExecutionStatus
+        status: str
        error: str | None = None
        elapsed_time: float
        execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] | None = None
@@ -719,7 +719,7 @@ class WorkflowAppBlockingResponse(AppBlockingResponse):

        id: str
        workflow_id: str
-        status: WorkflowExecutionStatus
+        status: str
        outputs: Mapping[str, Any] | None = None
        error: str | None = None
        elapsed_time: float
--- a/api/core/app/workflow/node_factory.py
+++ b/api/core/app/workflow/node_factory.py
@@ -1,5 +1,5 @@
 from collections.abc import Callable, Sequence
-from typing import TYPE_CHECKING, Any, cast, final
+from typing import TYPE_CHECKING, final

 from typing_extensions import override

@@ -15,7 +15,6 @@ from core.workflow.graph.graph import NodeFactory
 from core.workflow.nodes.base.node import Node
 from core.workflow.nodes.code.code_node import CodeNode
 from core.workflow.nodes.code.limits import CodeNodeLimits
-from core.workflow.nodes.document_extractor import DocumentExtractorNode, UnstructuredApiConfig
 from core.workflow.nodes.http_request.node import HttpRequestNode
 from core.workflow.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING
 from core.workflow.nodes.protocols import FileManagerProtocol, HttpClientProtocol
@@ -51,7 +50,6 @@ class DifyNodeFactory(NodeFactory):
        http_request_http_client: HttpClientProtocol | None = None,
        http_request_tool_file_manager_factory: Callable[[], ToolFileManager] = ToolFileManager,
        http_request_file_manager: FileManagerProtocol | None = None,
-        document_extractor_unstructured_api_config: UnstructuredApiConfig | None = None,
    ) -> None:
        self.graph_init_params = graph_init_params
        self.graph_runtime_state = graph_runtime_state
@@ -73,13 +71,6 @@ class DifyNodeFactory(NodeFactory):
        self._http_request_http_client = http_request_http_client or ssrf_proxy
        self._http_request_tool_file_manager_factory = http_request_tool_file_manager_factory
        self._http_request_file_manager = http_request_file_manager or file_manager
-        self._document_extractor_unstructured_api_config = (
-            document_extractor_unstructured_api_config
-            or UnstructuredApiConfig(
-                api_url=dify_config.UNSTRUCTURED_API_URL,
-                api_key=dify_config.UNSTRUCTURED_API_KEY or "",
-            )
-        )

    @override
    def create_node(self, node_config: NodeConfigDict) -> Node:
@@ -112,17 +103,13 @@ class DifyNodeFactory(NodeFactory):
        if not node_class:
            raise ValueError(f"No latest version class found for node type: {node_type}")

-        common_kwargs: dict[str, Any] = {
-            "id": node_id,
-            "config": node_config,
-            "graph_init_params": self.graph_init_params,
-            "graph_runtime_state": self.graph_runtime_state,
-        }
-
        # Create node instance
        if node_type == NodeType.CODE:
            return CodeNode(
-                **common_kwargs,
+                id=node_id,
+                config=node_config,
+                graph_init_params=self.graph_init_params,
+                graph_runtime_state=self.graph_runtime_state,
                code_executor=self._code_executor,
                code_providers=self._code_providers,
                code_limits=self._code_limits,
@@ -130,23 +117,27 @@ class DifyNodeFactory(NodeFactory):

        if node_type == NodeType.TEMPLATE_TRANSFORM:
            return TemplateTransformNode(
-                **common_kwargs,
+                id=node_id,
+                config=node_config,
+                graph_init_params=self.graph_init_params,
+                graph_runtime_state=self.graph_runtime_state,
                template_renderer=self._template_renderer,
            )

        if node_type == NodeType.HTTP_REQUEST:
            return HttpRequestNode(
-                **common_kwargs,
+                id=node_id,
+                config=node_config,
+                graph_init_params=self.graph_init_params,
+                graph_runtime_state=self.graph_runtime_state,
                http_client=self._http_request_http_client,
                tool_file_manager_factory=self._http_request_tool_file_manager_factory,
                file_manager=self._http_request_file_manager,
            )

-        if node_type == NodeType.DOCUMENT_EXTRACTOR:
-            document_extractor_class = cast(type[DocumentExtractorNode], node_class)
-            return document_extractor_class(
-                **common_kwargs,
-                unstructured_api_config=self._document_extractor_unstructured_api_config,
-            )
-
-        return node_class(**common_kwargs)
+        return node_class(
+            id=node_id,
+            config=node_config,
+            graph_init_params=self.graph_init_params,
+            graph_runtime_state=self.graph_runtime_state,
+        )
--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@@ -369,9 +369,7 @@ class IndexingRunner:
        # Generate summary preview
        summary_index_setting = tmp_processing_rule.get("summary_index_setting")
        if summary_index_setting and summary_index_setting.get("enable") and preview_texts:
-            preview_texts = index_processor.generate_summary_preview(
-                tenant_id, preview_texts, summary_index_setting, doc_language
-            )
+            preview_texts = index_processor.generate_summary_preview(tenant_id, preview_texts, summary_index_setting)

        return IndexingEstimate(total_segments=total_segments, preview=preview_texts)

--- a/api/core/llm_generator/prompts.py
+++ b/api/core/llm_generator/prompts.py
@@ -441,13 +441,11 @@ DEFAULT_GENERATOR_SUMMARY_PROMPT = (

 Requirements:
 1. Write a concise summary in plain text
-2. You must write in {language}. No language other than {language} should be used.
+2. Use the same language as the input content
 3. Focus on important facts, concepts, and details
 4. If images are included, describe their key information
 5. Do not use words like "好的", "ok", "I understand", "This text discusses", "The content mentions"
 6. Write directly without extra words
-7. If there is not enough content to generate a meaningful summary, 
-   return an empty string without any explanation or prompt

 Output only the summary text. Start summarizing now:

--- a/api/core/rag/index_processor/index_processor_base.py
+++ b/api/core/rag/index_processor/index_processor_base.py
@@ -48,22 +48,12 @@ class BaseIndexProcessor(ABC):

    @abstractmethod
    def generate_summary_preview(
-        self,
-        tenant_id: str,
-        preview_texts: list[PreviewDetail],
-        summary_index_setting: dict,
-        doc_language: str | None = None,
+        self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
    ) -> list[PreviewDetail]:
        """
        For each segment in preview_texts, generate a summary using LLM and attach it to the segment.
        The summary can be stored in a new attribute, e.g., summary.
        This method should be implemented by subclasses.
-
-        Args:
-            tenant_id: Tenant ID
-            preview_texts: List of preview details to generate summaries for
-            summary_index_setting: Summary index configuration
-            doc_language: Optional document language to ensure summary is generated in the correct language
        """
        raise NotImplementedError

--- a/api/core/rag/index_processor/processor/paragraph_index_processor.py
+++ b/api/core/rag/index_processor/processor/paragraph_index_processor.py
@@ -275,11 +275,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
            raise ValueError("Chunks is not a list")

    def generate_summary_preview(
-        self,
-        tenant_id: str,
-        preview_texts: list[PreviewDetail],
-        summary_index_setting: dict,
-        doc_language: str | None = None,
+        self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
    ) -> list[PreviewDetail]:
        """
        For each segment, concurrently call generate_summary to generate a summary
@@ -302,15 +298,11 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
            if flask_app:
                # Ensure Flask app context in worker thread
                with flask_app.app_context():
-                    summary, _ = self.generate_summary(
-                        tenant_id, preview.content, summary_index_setting, document_language=doc_language
-                    )
+                    summary, _ = self.generate_summary(tenant_id, preview.content, summary_index_setting)
                    preview.summary = summary
            else:
                # Fallback: try without app context (may fail)
-                summary, _ = self.generate_summary(
-                    tenant_id, preview.content, summary_index_setting, document_language=doc_language
-                )
+                summary, _ = self.generate_summary(tenant_id, preview.content, summary_index_setting)
                preview.summary = summary

        # Generate summaries concurrently using ThreadPoolExecutor
@@ -364,7 +356,6 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
        text: str,
        summary_index_setting: dict | None = None,
        segment_id: str | None = None,
-        document_language: str | None = None,
    ) -> tuple[str, LLMUsage]:
        """
        Generate summary for the given text using ModelInstance.invoke_llm and the default or custom summary prompt,
@@ -375,8 +366,6 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
            text: Text content to summarize
            summary_index_setting: Summary index configuration
            segment_id: Optional segment ID to fetch attachments from SegmentAttachmentBinding table
-            document_language: Optional document language (e.g., "Chinese", "English")
-                to ensure summary is generated in the correct language

        Returns:
            Tuple of (summary_content, llm_usage) where llm_usage is LLMUsage object
@@ -392,22 +381,8 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
            raise ValueError("model_name and model_provider_name are required in summary_index_setting")

        # Import default summary prompt
-        is_default_prompt = False
        if not summary_prompt:
            summary_prompt = DEFAULT_GENERATOR_SUMMARY_PROMPT
-            is_default_prompt = True
-
-        # Format prompt with document language only for default prompt
-        # Custom prompts are used as-is to avoid interfering with user-defined templates
-        # If document_language is provided, use it; otherwise, use "the same language as the input content"
-        # This is especially important for image-only chunks where text is empty or minimal
-        if is_default_prompt:
-            language_for_prompt = document_language or "the same language as the input content"
-            try:
-                summary_prompt = summary_prompt.format(language=language_for_prompt)
-            except KeyError:
-                # If default prompt doesn't have {language} placeholder, use it as-is
-                pass

        provider_manager = ProviderManager()
        provider_model_bundle = provider_manager.get_provider_model_bundle(
--- a/api/core/rag/index_processor/processor/parent_child_index_processor.py
+++ b/api/core/rag/index_processor/processor/parent_child_index_processor.py
@@ -358,11 +358,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
        }

    def generate_summary_preview(
-        self,
-        tenant_id: str,
-        preview_texts: list[PreviewDetail],
-        summary_index_setting: dict,
-        doc_language: str | None = None,
+        self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
    ) -> list[PreviewDetail]:
        """
        For each parent chunk in preview_texts, concurrently call generate_summary to generate a summary
@@ -393,7 +389,6 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
                        tenant_id=tenant_id,
                        text=preview.content,
                        summary_index_setting=summary_index_setting,
-                        document_language=doc_language,
                    )
                    preview.summary = summary
            else:
@@ -402,7 +397,6 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
                    tenant_id=tenant_id,
                    text=preview.content,
                    summary_index_setting=summary_index_setting,
-                    document_language=doc_language,
                )
                preview.summary = summary

--- a/api/core/rag/index_processor/processor/qa_index_processor.py
+++ b/api/core/rag/index_processor/processor/qa_index_processor.py
@@ -241,11 +241,7 @@ class QAIndexProcessor(BaseIndexProcessor):
        }

    def generate_summary_preview(
-        self,
-        tenant_id: str,
-        preview_texts: list[PreviewDetail],
-        summary_index_setting: dict,
-        doc_language: str | None = None,
+        self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
    ) -> list[PreviewDetail]:
        """
        QA model doesn't generate summaries, so this method returns preview_texts unchanged.
--- a/api/core/workflow/nodes/document_extractor/init.py
+++ b/api/core/workflow/nodes/document_extractor/init.py
@@ -1,4 +1,4 @@
-from .entities import DocumentExtractorNodeData, UnstructuredApiConfig
+from .entities import DocumentExtractorNodeData
 from .node import DocumentExtractorNode

-__all__ = ["DocumentExtractorNode", "DocumentExtractorNodeData", "UnstructuredApiConfig"]
+__all__ = ["DocumentExtractorNode", "DocumentExtractorNodeData"]
--- a/api/core/workflow/nodes/document_extractor/entities.py
+++ b/api/core/workflow/nodes/document_extractor/entities.py
@@ -1,14 +1,7 @@
 from collections.abc import Sequence
-from dataclasses import dataclass

 from core.workflow.nodes.base import BaseNodeData


 class DocumentExtractorNodeData(BaseNodeData):
    variable_selector: Sequence[str]
-
-
-@dataclass(frozen=True)
-class UnstructuredApiConfig:
-    api_url: str | None = None
-    api_key: str = ""
--- a/api/core/workflow/nodes/document_extractor/node.py
+++ b/api/core/workflow/nodes/document_extractor/node.py
@@ -5,7 +5,7 @@ import logging
 import os
 import tempfile
 from collections.abc import Mapping, Sequence
-from typing import TYPE_CHECKING, Any
+from typing import Any

 import charset_normalizer
 import docx
@@ -20,6 +20,7 @@ from docx.oxml.text.paragraph import CT_P
 from docx.table import Table
 from docx.text.paragraph import Paragraph

+from configs import dify_config
 from core.file import File, FileTransferMethod, file_manager
 from core.helper import ssrf_proxy
 from core.variables import ArrayFileSegment
@@ -28,15 +29,11 @@ from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus
 from core.workflow.node_events import NodeRunResult
 from core.workflow.nodes.base.node import Node

-from .entities import DocumentExtractorNodeData, UnstructuredApiConfig
+from .entities import DocumentExtractorNodeData
 from .exc import DocumentExtractorError, FileDownloadError, TextExtractionError, UnsupportedFileTypeError

 logger = logging.getLogger(__name__)

-if TYPE_CHECKING:
-    from core.workflow.entities import GraphInitParams
-    from core.workflow.runtime import GraphRuntimeState
-

 class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
    """
@@ -50,23 +47,6 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
    def version(cls) -> str:
        return "1"

-    def __init__(
-        self,
-        id: str,
-        config: Mapping[str, Any],
-        graph_init_params: "GraphInitParams",
-        graph_runtime_state: "GraphRuntimeState",
-        *,
-        unstructured_api_config: UnstructuredApiConfig | None = None,
-    ) -> None:
-        super().__init__(
-            id=id,
-            config=config,
-            graph_init_params=graph_init_params,
-            graph_runtime_state=graph_runtime_state,
-        )
-        self._unstructured_api_config = unstructured_api_config or UnstructuredApiConfig()
-
    def _run(self):
        variable_selector = self.node_data.variable_selector
        variable = self.graph_runtime_state.variable_pool.get(variable_selector)
@@ -84,10 +64,7 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):

        try:
            if isinstance(value, list):
-                extracted_text_list = [
-                    _extract_text_from_file(file, unstructured_api_config=self._unstructured_api_config)
-                    for file in value
-                ]
+                extracted_text_list = list(map(_extract_text_from_file, value))
                return NodeRunResult(
                    status=WorkflowNodeExecutionStatus.SUCCEEDED,
                    inputs=inputs,
@@ -95,7 +72,7 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
                    outputs={"text": ArrayStringSegment(value=extracted_text_list)},
                )
            elif isinstance(value, File):
-                extracted_text = _extract_text_from_file(value, unstructured_api_config=self._unstructured_api_config)
+                extracted_text = _extract_text_from_file(value)
                return NodeRunResult(
                    status=WorkflowNodeExecutionStatus.SUCCEEDED,
                    inputs=inputs,
@@ -126,12 +103,7 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
        return {node_id + ".files": typed_node_data.variable_selector}


-def _extract_text_by_mime_type(
-    *,
-    file_content: bytes,
-    mime_type: str,
-    unstructured_api_config: UnstructuredApiConfig,
-) -> str:
+def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str:
    """Extract text from a file based on its MIME type."""
    match mime_type:
        case "text/plain" | "text/html" | "text/htm" | "text/markdown" | "text/xml":
@@ -139,7 +111,7 @@ def _extract_text_by_mime_type(
        case "application/pdf":
            return _extract_text_from_pdf(file_content)
        case "application/msword":
-            return _extract_text_from_doc(file_content, unstructured_api_config=unstructured_api_config)
+            return _extract_text_from_doc(file_content)
        case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            return _extract_text_from_docx(file_content)
        case "text/csv":
@@ -147,11 +119,11 @@ def _extract_text_by_mime_type(
        case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | "application/vnd.ms-excel":
            return _extract_text_from_excel(file_content)
        case "application/vnd.ms-powerpoint":
-            return _extract_text_from_ppt(file_content, unstructured_api_config=unstructured_api_config)
+            return _extract_text_from_ppt(file_content)
        case "application/vnd.openxmlformats-officedocument.presentationml.presentation":
-            return _extract_text_from_pptx(file_content, unstructured_api_config=unstructured_api_config)
+            return _extract_text_from_pptx(file_content)
        case "application/epub+zip":
-            return _extract_text_from_epub(file_content, unstructured_api_config=unstructured_api_config)
+            return _extract_text_from_epub(file_content)
        case "message/rfc822":
            return _extract_text_from_eml(file_content)
        case "application/vnd.ms-outlook":
@@ -168,12 +140,7 @@ def _extract_text_by_mime_type(
            raise UnsupportedFileTypeError(f"Unsupported MIME type: {mime_type}")


-def _extract_text_by_file_extension(
-    *,
-    file_content: bytes,
-    file_extension: str,
-    unstructured_api_config: UnstructuredApiConfig,
-) -> str:
+def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str) -> str:
    """Extract text from a file based on its file extension."""
    match file_extension:
        case (
@@ -236,7 +203,7 @@ def _extract_text_by_file_extension(
        case ".pdf":
            return _extract_text_from_pdf(file_content)
        case ".doc":
-            return _extract_text_from_doc(file_content, unstructured_api_config=unstructured_api_config)
+            return _extract_text_from_doc(file_content)
        case ".docx":
            return _extract_text_from_docx(file_content)
        case ".csv":
@@ -244,11 +211,11 @@ def _extract_text_by_file_extension(
        case ".xls" | ".xlsx":
            return _extract_text_from_excel(file_content)
        case ".ppt":
-            return _extract_text_from_ppt(file_content, unstructured_api_config=unstructured_api_config)
+            return _extract_text_from_ppt(file_content)
        case ".pptx":
-            return _extract_text_from_pptx(file_content, unstructured_api_config=unstructured_api_config)
+            return _extract_text_from_pptx(file_content)
        case ".epub":
-            return _extract_text_from_epub(file_content, unstructured_api_config=unstructured_api_config)
+            return _extract_text_from_epub(file_content)
        case ".eml":
            return _extract_text_from_eml(file_content)
        case ".msg":
@@ -345,14 +312,14 @@ def _extract_text_from_pdf(file_content: bytes) -> str:
        raise TextExtractionError(f"Failed to extract text from PDF: {str(e)}") from e


-def _extract_text_from_doc(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str:
+def _extract_text_from_doc(file_content: bytes) -> str:
    """
    Extract text from a DOC file.
    """
    from unstructured.partition.api import partition_via_api

-    if not unstructured_api_config.api_url:
-        raise TextExtractionError("Unstructured API URL is not configured for DOC file processing.")
+    if not dify_config.UNSTRUCTURED_API_URL:
+        raise TextExtractionError("UNSTRUCTURED_API_URL must be set")

    try:
        with tempfile.NamedTemporaryFile(suffix=".doc", delete=False) as temp_file:
@@ -362,8 +329,8 @@ def _extract_text_from_doc(file_content: bytes, *, unstructured_api_config: Unst
                elements = partition_via_api(
                    file=file,
                    metadata_filename=temp_file.name,
-                    api_url=unstructured_api_config.api_url,
-                    api_key=unstructured_api_config.api_key,
+                    api_url=dify_config.UNSTRUCTURED_API_URL,
+                    api_key=dify_config.UNSTRUCTURED_API_KEY,  # type: ignore
                )
            os.unlink(temp_file.name)
        return "\n".join([getattr(element, "text", "") for element in elements])
@@ -453,20 +420,12 @@ def _download_file_content(file: File) -> bytes:
        raise FileDownloadError(f"Error downloading file: {str(e)}") from e


-def _extract_text_from_file(file: File, *, unstructured_api_config: UnstructuredApiConfig) -> str:
+def _extract_text_from_file(file: File):
    file_content = _download_file_content(file)
    if file.extension:
-        extracted_text = _extract_text_by_file_extension(
-            file_content=file_content,
-            file_extension=file.extension,
-            unstructured_api_config=unstructured_api_config,
-        )
+        extracted_text = _extract_text_by_file_extension(file_content=file_content, file_extension=file.extension)
    elif file.mime_type:
-        extracted_text = _extract_text_by_mime_type(
-            file_content=file_content,
-            mime_type=file.mime_type,
-            unstructured_api_config=unstructured_api_config,
-        )
+        extracted_text = _extract_text_by_mime_type(file_content=file_content, mime_type=file.mime_type)
    else:
        raise UnsupportedFileTypeError("Unable to determine file type: MIME type or file extension is missing")
    return extracted_text
@@ -558,12 +517,12 @@ def _extract_text_from_excel(file_content: bytes) -> str:
        raise TextExtractionError(f"Failed to extract text from Excel file: {str(e)}") from e


-def _extract_text_from_ppt(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str:
+def _extract_text_from_ppt(file_content: bytes) -> str:
    from unstructured.partition.api import partition_via_api
    from unstructured.partition.ppt import partition_ppt

    try:
-        if unstructured_api_config.api_url:
+        if dify_config.UNSTRUCTURED_API_URL:
            with tempfile.NamedTemporaryFile(suffix=".ppt", delete=False) as temp_file:
                temp_file.write(file_content)
                temp_file.flush()
@@ -571,8 +530,8 @@ def _extract_text_from_ppt(file_content: bytes, *, unstructured_api_config: Unst
                    elements = partition_via_api(
                        file=file,
                        metadata_filename=temp_file.name,
-                        api_url=unstructured_api_config.api_url,
-                        api_key=unstructured_api_config.api_key,
+                        api_url=dify_config.UNSTRUCTURED_API_URL,
+                        api_key=dify_config.UNSTRUCTURED_API_KEY,  # type: ignore
                    )
                os.unlink(temp_file.name)
        else:
@@ -584,12 +543,12 @@ def _extract_text_from_ppt(file_content: bytes, *, unstructured_api_config: Unst
        raise TextExtractionError(f"Failed to extract text from PPTX: {str(e)}") from e


-def _extract_text_from_pptx(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str:
+def _extract_text_from_pptx(file_content: bytes) -> str:
    from unstructured.partition.api import partition_via_api
    from unstructured.partition.pptx import partition_pptx

    try:
-        if unstructured_api_config.api_url:
+        if dify_config.UNSTRUCTURED_API_URL:
            with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as temp_file:
                temp_file.write(file_content)
                temp_file.flush()
@@ -597,8 +556,8 @@ def _extract_text_from_pptx(file_content: bytes, *, unstructured_api_config: Uns
                    elements = partition_via_api(
                        file=file,
                        metadata_filename=temp_file.name,
-                        api_url=unstructured_api_config.api_url,
-                        api_key=unstructured_api_config.api_key,
+                        api_url=dify_config.UNSTRUCTURED_API_URL,
+                        api_key=dify_config.UNSTRUCTURED_API_KEY,  # type: ignore
                    )
                os.unlink(temp_file.name)
        else:
@@ -609,12 +568,12 @@ def _extract_text_from_pptx(file_content: bytes, *, unstructured_api_config: Uns
        raise TextExtractionError(f"Failed to extract text from PPTX: {str(e)}") from e


-def _extract_text_from_epub(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str:
+def _extract_text_from_epub(file_content: bytes) -> str:
    from unstructured.partition.api import partition_via_api
    from unstructured.partition.epub import partition_epub

    try:
-        if unstructured_api_config.api_url:
+        if dify_config.UNSTRUCTURED_API_URL:
            with tempfile.NamedTemporaryFile(suffix=".epub", delete=False) as temp_file:
                temp_file.write(file_content)
                temp_file.flush()
@@ -622,8 +581,8 @@ def _extract_text_from_epub(file_content: bytes, *, unstructured_api_config: Uns
                    elements = partition_via_api(
                        file=file,
                        metadata_filename=temp_file.name,
-                        api_url=unstructured_api_config.api_url,
-                        api_key=unstructured_api_config.api_key,
+                        api_url=dify_config.UNSTRUCTURED_API_URL,
+                        api_key=dify_config.UNSTRUCTURED_API_KEY,  # type: ignore
                    )
                os.unlink(temp_file.name)
        else:
--- a/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py
+++ b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py
@@ -78,21 +78,12 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
                indexing_technique = node_data.indexing_technique or dataset.indexing_technique
                summary_index_setting = node_data.summary_index_setting or dataset.summary_index_setting

-                # Try to get document language if document_id is available
-                doc_language = None
-                document_id = variable_pool.get(["sys", SystemVariableKey.DOCUMENT_ID])
-                if document_id:
-                    document = db.session.query(Document).filter_by(id=document_id.value).first()
-                    if document and document.doc_language:
-                        doc_language = document.doc_language
-
                outputs = self._get_preview_output_with_summaries(
                    node_data.chunk_structure,
                    chunks,
                    dataset=dataset,
                    indexing_technique=indexing_technique,
                    summary_index_setting=summary_index_setting,
-                    doc_language=doc_language,
                )
                return NodeRunResult(
                    status=WorkflowNodeExecutionStatus.SUCCEEDED,
@@ -324,7 +315,6 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
        dataset: Dataset,
        indexing_technique: str | None = None,
        summary_index_setting: dict | None = None,
-        doc_language: str | None = None,
    ) -> Mapping[str, Any]:
        """
        Generate preview output with summaries for chunks in preview mode.
@@ -336,7 +326,6 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
            dataset: Dataset object (for tenant_id)
            indexing_technique: Indexing technique from node config or dataset
            summary_index_setting: Summary index setting from node config or dataset
-            doc_language: Optional document language to ensure summary is generated in the correct language
        """
        index_processor = IndexProcessorFactory(chunk_structure).init_index_processor()
        preview_output = index_processor.format_preview(chunks)
@@ -376,7 +365,6 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
                                tenant_id=dataset.tenant_id,
                                text=preview_item["content"],
                                summary_index_setting=summary_index_setting,
-                                document_language=doc_language,
                            )
                            if summary:
                                preview_item["summary"] = summary
@@ -386,7 +374,6 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
                            tenant_id=dataset.tenant_id,
                            text=preview_item["content"],
                            summary_index_setting=summary_index_setting,
-                            document_language=doc_language,
                        )
                        if summary:
                            preview_item["summary"] = summary
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dify-api"
-version = "1.11.4"
+version = "1.12.0"
 requires-python = ">=3.11,<3.13"

 dependencies = [
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@@ -16,7 +16,6 @@ from sqlalchemy.orm import Session
 from werkzeug.exceptions import Forbidden, NotFound

 from configs import dify_config
-from core.db.session_factory import session_factory
 from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
 from core.file import helpers as file_helpers
 from core.helper.name_generator import generate_incremental_name
@@ -1389,46 +1388,6 @@ class DocumentService:
        ).all()
        return documents

-    @staticmethod
-    def update_documents_need_summary(dataset_id: str, document_ids: Sequence[str], need_summary: bool = True) -> int:
-        """
-        Update need_summary field for multiple documents.
-
-        This method handles the case where documents were created when summary_index_setting was disabled,
-        and need to be updated when summary_index_setting is later enabled.
-
-        Args:
-            dataset_id: Dataset ID
-            document_ids: List of document IDs to update
-            need_summary: Value to set for need_summary field (default: True)
-
-        Returns:
-            Number of documents updated
-        """
-        if not document_ids:
-            return 0
-
-        document_id_list: list[str] = [str(document_id) for document_id in document_ids]
-
-        with session_factory.create_session() as session:
-            updated_count = (
-                session.query(Document)
-                .filter(
-                    Document.id.in_(document_id_list),
-                    Document.dataset_id == dataset_id,
-                    Document.doc_form != "qa_model",  # Skip qa_model documents
-                )
-                .update({Document.need_summary: need_summary}, synchronize_session=False)
-            )
-            session.commit()
-            logger.info(
-                "Updated need_summary to %s for %d documents in dataset %s",
-                need_summary,
-                updated_count,
-                dataset_id,
-            )
-            return updated_count
-
    @staticmethod
    def get_document_download_url(document: Document) -> str:
        """
--- a/api/services/rag_pipeline/rag_pipeline_transform_service.py
+++ b/api/services/rag_pipeline/rag_pipeline_transform_service.py
@@ -174,10 +174,6 @@ class RagPipelineTransformService:
        else:
            dataset.retrieval_model = knowledge_configuration.retrieval_model.model_dump()

-        # Copy summary_index_setting from dataset to knowledge_index node configuration
-        if dataset.summary_index_setting:
-            knowledge_configuration.summary_index_setting = dataset.summary_index_setting
-
        knowledge_configuration_dict.update(knowledge_configuration.model_dump())
        node["data"] = knowledge_configuration_dict
        return node
--- a/api/services/summary_index_service.py
+++ b/api/services/summary_index_service.py
@@ -49,18 +49,11 @@ class SummaryIndexService:
        # Use lazy import to avoid circular import
        from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor

-        # Get document language to ensure summary is generated in the correct language
-        # This is especially important for image-only chunks where text is empty or minimal
-        document_language = None
-        if segment.document and segment.document.doc_language:
-            document_language = segment.document.doc_language
-
        summary_content, usage = ParagraphIndexProcessor.generate_summary(
            tenant_id=dataset.tenant_id,
            text=segment.content,
            summary_index_setting=summary_index_setting,
            segment_id=segment.id,
-            document_language=document_language,
        )

        if not summary_content:
@@ -565,9 +558,6 @@ class SummaryIndexService:
                    )
                    session.add(summary_record)

-            # Commit the batch created records
-            session.commit()
-
    @staticmethod
    def update_summary_record_error(
        segment: DocumentSegment,
@@ -772,6 +762,7 @@ class SummaryIndexService:
                dataset=dataset,
                status="not_started",
            )
+            session.commit()  # Commit initial records

            summary_records = []

--- a/api/uv.lock
+++ b/api/uv.lock
@@ -1368,7 +1368,7 @@ wheels = [

 [[package]]
 name = "dify-api"
-version = "1.11.4"
+version = "1.12.0"
 source = { virtual = "." }
 dependencies = [
    { name = "aliyun-log-python-sdk" },
--- a/docker/docker-compose-template.yaml
+++ b/docker/docker-compose-template.yaml
@@ -21,7 +21,7 @@ services:

  # API service
  api:
-    image: langgenius/dify-api:1.11.4
+    image: langgenius/dify-api:1.12.0
    restart: always
    environment:
      # Use the shared environment variables.
@@ -63,7 +63,7 @@ services:
  # worker service
  # The Celery worker for processing all queues (dataset, workflow, mail, etc.)
  worker:
-    image: langgenius/dify-api:1.11.4
+    image: langgenius/dify-api:1.12.0
    restart: always
    environment:
      # Use the shared environment variables.
@@ -102,7 +102,7 @@ services:
  # worker_beat service
  # Celery beat for scheduling periodic tasks.
  worker_beat:
-    image: langgenius/dify-api:1.11.4
+    image: langgenius/dify-api:1.12.0
    restart: always
    environment:
      # Use the shared environment variables.
@@ -132,7 +132,7 @@ services:

  # Frontend web application.
  web:
-    image: langgenius/dify-web:1.11.4
+    image: langgenius/dify-web:1.12.0
    restart: always
    environment:
      CONSOLE_API_URL: ${CONSOLE_API_URL:-}
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -707,7 +707,7 @@ services:

  # API service
  api:
-    image: langgenius/dify-api:1.11.4
+    image: langgenius/dify-api:1.12.0
    restart: always
    environment:
      # Use the shared environment variables.
@@ -749,7 +749,7 @@ services:
  # worker service
  # The Celery worker for processing all queues (dataset, workflow, mail, etc.)
  worker:
-    image: langgenius/dify-api:1.11.4
+    image: langgenius/dify-api:1.12.0
    restart: always
    environment:
      # Use the shared environment variables.
@@ -788,7 +788,7 @@ services:
  # worker_beat service
  # Celery beat for scheduling periodic tasks.
  worker_beat:
-    image: langgenius/dify-api:1.11.4
+    image: langgenius/dify-api:1.12.0
    restart: always
    environment:
      # Use the shared environment variables.
@@ -818,7 +818,7 @@ services:

  # Frontend web application.
  web:
-    image: langgenius/dify-web:1.11.4
+    image: langgenius/dify-web:1.12.0
    restart: always
    environment:
      CONSOLE_API_URL: ${CONSOLE_API_URL:-}
--- a/web/app/components/base/notion-page-selector/page-selector/index.tsx
+++ b/web/app/components/base/notion-page-selector/page-selector/index.tsx
@@ -1,9 +1,8 @@
-import type { ListChildComponentProps } from 'react-window'
 import type { DataSourceNotionPage, DataSourceNotionPageMap } from '@/models/common'
 import { RiArrowDownSLine, RiArrowRightSLine } from '@remixicon/react'
-import { memo, useEffect, useMemo, useState } from 'react'
+import { useVirtualizer } from '@tanstack/react-virtual'
+import { memo, useCallback, useMemo, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
-import { areEqual, FixedSizeList as List } from 'react-window'
 import { cn } from '@/utils/classnames'
 import Checkbox from '../../checkbox'
 import NotionIcon from '../../notion-icon'
@@ -32,6 +31,22 @@ type NotionPageItem = {
  depth: number
 } & DataSourceNotionPage

+type ItemProps = {
+  virtualStart: number
+  virtualSize: number
+  current: NotionPageItem
+  onToggle: (pageId: string) => void
+  checkedIds: Set<string>
+  disabledCheckedIds: Set<string>
+  onCheck: (pageId: string) => void
+  canPreview?: boolean
+  onPreview: (pageId: string) => void
+  listMapWithChildrenAndDescendants: NotionPageTreeMap
+  searchValue: string
+  previewPageId: string
+  pagesMap: DataSourceNotionPageMap
+}
+
 const recursivePushInParentDescendants = (
  pagesMap: DataSourceNotionPageMap,
  listTreeMap: NotionPageTreeMap,
@@ -69,34 +84,22 @@ const recursivePushInParentDescendants = (
  }
 }

-const ItemComponent = ({ index, style, data }: ListChildComponentProps<{
-  dataList: NotionPageItem[]
-  handleToggle: (index: number) => void
-  checkedIds: Set<string>
-  disabledCheckedIds: Set<string>
-  handleCheck: (index: number) => void
-  canPreview?: boolean
-  handlePreview: (index: number) => void
-  listMapWithChildrenAndDescendants: NotionPageTreeMap
-  searchValue: string
-  previewPageId: string
-  pagesMap: DataSourceNotionPageMap
-}>) => {
+const ItemComponent = ({
+  virtualStart,
+  virtualSize,
+  current,
+  onToggle,
+  checkedIds,
+  disabledCheckedIds,
+  onCheck,
+  canPreview,
+  onPreview,
+  listMapWithChildrenAndDescendants,
+  searchValue,
+  previewPageId,
+  pagesMap,
+}: ItemProps) => {
  const { t } = useTranslation()
-  const {
-    dataList,
-    handleToggle,
-    checkedIds,
-    disabledCheckedIds,
-    handleCheck,
-    canPreview,
-    handlePreview,
-    listMapWithChildrenAndDescendants,
-    searchValue,
-    previewPageId,
-    pagesMap,
-  } = data
-  const current = dataList[index]
  const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[current.page_id]
  const hasChild = currentWithChildrenAndDescendants.descendants.size > 0
  const ancestors = currentWithChildrenAndDescendants.ancestors
@@ -109,7 +112,7 @@ const ItemComponent = ({ index, style, data }: ListChildComponentProps<{
        <div
          className="mr-1 flex h-5 w-5 shrink-0 items-center justify-center rounded-md hover:bg-components-button-ghost-bg-hover"
          style={{ marginLeft: current.depth * 8 }}
-          onClick={() => handleToggle(index)}
+          onClick={() => onToggle(current.page_id)}
        >
          {
            current.expand
@@ -132,15 +135,21 @@ const ItemComponent = ({ index, style, data }: ListChildComponentProps<{
  return (
    <div
      className={cn('group flex cursor-pointer items-center rounded-md pl-2 pr-[2px] hover:bg-state-base-hover', previewPageId === current.page_id && 'bg-state-base-hover')}
-      style={{ ...style, top: style.top as number + 8, left: 8, right: 8, width: 'calc(100% - 16px)' }}
+      style={{
+        position: 'absolute',
+        top: 0,
+        left: 8,
+        right: 8,
+        width: 'calc(100% - 16px)',
+        height: virtualSize,
+        transform: `translateY(${virtualStart + 8}px)`,
+      }}
    >
      <Checkbox
        className="mr-2 shrink-0"
        checked={checkedIds.has(current.page_id)}
        disabled={disabled}
-        onCheck={() => {
-          handleCheck(index)
-        }}
+        onCheck={() => onCheck(current.page_id)}
      />
      {!searchValue && renderArrow()}
      <NotionIcon
@@ -160,7 +169,7 @@ const ItemComponent = ({ index, style, data }: ListChildComponentProps<{
            className="ml-1 hidden h-6 shrink-0 cursor-pointer items-center rounded-md border-[0.5px] border-components-button-secondary-border bg-components-button-secondary-bg px-2 text-xs
            font-medium leading-4 text-components-button-secondary-text shadow-xs shadow-shadow-shadow-3 backdrop-blur-[10px]
            hover:border-components-button-secondary-border-hover hover:bg-components-button-secondary-bg-hover group-hover:flex"
-            onClick={() => handlePreview(index)}
+            onClick={() => onPreview(current.page_id)}
          >
            {t('dataSource.notion.selector.preview', { ns: 'common' })}
          </div>
@@ -179,7 +188,7 @@ const ItemComponent = ({ index, style, data }: ListChildComponentProps<{
    </div>
  )
 }
-const Item = memo(ItemComponent, areEqual)
+const Item = memo(ItemComponent)

 const PageSelector = ({
  value,
@@ -193,31 +202,10 @@ const PageSelector = ({
  onPreview,
 }: PageSelectorProps) => {
  const { t } = useTranslation()
-  const [dataList, setDataList] = useState<NotionPageItem[]>([])
+  const parentRef = useRef<HTMLDivElement>(null)
+  const [expandedIds, setExpandedIds] = useState<Set<string>>(() => new Set())
  const [localPreviewPageId, setLocalPreviewPageId] = useState('')

-  useEffect(() => {
-    setDataList(list.filter(item => item.parent_id === 'root' || !pagesMap[item.parent_id]).map((item) => {
-      return {
-        ...item,
-        expand: false,
-        depth: 0,
-      }
-    }))
-  }, [list])
-
-  const searchDataList = list.filter((item) => {
-    return item.page_name.includes(searchValue)
-  }).map((item) => {
-    return {
-      ...item,
-      expand: false,
-      depth: 0,
-    }
-  })
-  const currentDataList = searchValue ? searchDataList : dataList
-  const currentPreviewPageId = previewPageId === undefined ? localPreviewPageId : previewPageId
-
  const listMapWithChildrenAndDescendants = useMemo(() => {
    return list.reduce((prev: NotionPageTreeMap, next: DataSourceNotionPage) => {
      const pageId = next.page_id
@@ -229,47 +217,89 @@ const PageSelector = ({
    }, {})
  }, [list, pagesMap])

-  const handleToggle = (index: number) => {
-    const current = dataList[index]
-    const pageId = current.page_id
-    const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[pageId]
-    const descendantsIds = Array.from(currentWithChildrenAndDescendants.descendants)
-    const childrenIds = Array.from(currentWithChildrenAndDescendants.children)
-    let newDataList = []
-
-    if (current.expand) {
-      current.expand = false
-
-      newDataList = dataList.filter(item => !descendantsIds.includes(item.page_id))
+  const childrenByParent = useMemo(() => {
+    const map = new Map<string | null, DataSourceNotionPage[]>()
+    for (const item of list) {
+      const isRoot = item.parent_id === 'root' || !pagesMap[item.parent_id]
+      const parentKey = isRoot ? null : item.parent_id
+      const children = map.get(parentKey) || []
+      children.push(item)
+      map.set(parentKey, children)
    }
-    else {
-      current.expand = true
+    return map
+  }, [list, pagesMap])

-      newDataList = [
-        ...dataList.slice(0, index + 1),
-        ...childrenIds.map(item => ({
-          ...pagesMap[item],
-          expand: false,
-          depth: listMapWithChildrenAndDescendants[item].depth,
-        })),
-        ...dataList.slice(index + 1),
-      ]
+  const dataList = useMemo(() => {
+    const result: NotionPageItem[] = []
+
+    const buildVisibleList = (parentId: string | null, depth: number) => {
+      const items = childrenByParent.get(parentId) || []
+
+      for (const item of items) {
+        const isExpanded = expandedIds.has(item.page_id)
+        result.push({
+          ...item,
+          expand: isExpanded,
+          depth,
+        })
+        if (isExpanded) {
+          buildVisibleList(item.page_id, depth + 1)
+        }
+      }
    }
-    setDataList(newDataList)
-  }

-  const copyValue = new Set(value)
-  const handleCheck = (index: number) => {
-    const current = currentDataList[index]
-    const pageId = current.page_id
+    buildVisibleList(null, 0)
+    return result
+  }, [childrenByParent, expandedIds])
+
+  const searchDataList = useMemo(() => list.filter((item) => {
+    return item.page_name.includes(searchValue)
+  }).map((item) => {
+    return {
+      ...item,
+      expand: false,
+      depth: 0,
+    }
+  }), [list, searchValue])
+
+  const currentDataList = searchValue ? searchDataList : dataList
+  const currentPreviewPageId = previewPageId === undefined ? localPreviewPageId : previewPageId
+
+  const virtualizer = useVirtualizer({
+    count: currentDataList.length,
+    getScrollElement: () => parentRef.current,
+    estimateSize: () => 28,
+    overscan: 5,
+    getItemKey: index => currentDataList[index].page_id,
+  })
+
+  const handleToggle = useCallback((pageId: string) => {
+    setExpandedIds((prev) => {
+      const next = new Set(prev)
+      if (prev.has(pageId)) {
+        next.delete(pageId)
+        const descendants = listMapWithChildrenAndDescendants[pageId]?.descendants
+        if (descendants) {
+          for (const descendantId of descendants)
+            next.delete(descendantId)
+        }
+      }
+      else {
+        next.add(pageId)
+      }
+      return next
+    })
+  }, [listMapWithChildrenAndDescendants])
+
+  const handleCheck = useCallback((pageId: string) => {
    const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[pageId]
+    const copyValue = new Set(value)

    if (copyValue.has(pageId)) {
      if (!searchValue) {
        for (const item of currentWithChildrenAndDescendants.descendants)
          copyValue.delete(item)
      }
-
      copyValue.delete(pageId)
    }
    else {
@@ -277,22 +307,17 @@ const PageSelector = ({
        for (const item of currentWithChildrenAndDescendants.descendants)
          copyValue.add(item)
      }
-
      copyValue.add(pageId)
    }

-    onSelect(new Set(copyValue))
-  }
-
-  const handlePreview = (index: number) => {
-    const current = currentDataList[index]
-    const pageId = current.page_id
+    onSelect(copyValue)
+  }, [listMapWithChildrenAndDescendants, onSelect, searchValue, value])

+  const handlePreview = useCallback((pageId: string) => {
    setLocalPreviewPageId(pageId)
-
    if (onPreview)
      onPreview(pageId)
-  }
+  }, [onPreview])

  if (!currentDataList.length) {
    return (
@@ -303,29 +328,41 @@ const PageSelector = ({
  }

  return (
-    <List
+    <div
+      ref={parentRef}
      className="py-2"
-      height={296}
-      itemCount={currentDataList.length}
-      itemSize={28}
-      width="100%"
-      itemKey={(index, data) => data.dataList[index].page_id}
-      itemData={{
-        dataList: currentDataList,
-        handleToggle,
-        checkedIds: value,
-        disabledCheckedIds: disabledValue,
-        handleCheck,
-        canPreview,
-        handlePreview,
-        listMapWithChildrenAndDescendants,
-        searchValue,
-        previewPageId: currentPreviewPageId,
-        pagesMap,
-      }}
+      style={{ height: 296, width: '100%', overflow: 'auto' }}
    >
-      {Item}
-    </List>
+      <div
+        style={{
+          height: virtualizer.getTotalSize(),
+          width: '100%',
+          position: 'relative',
+        }}
+      >
+        {virtualizer.getVirtualItems().map((virtualRow) => {
+          const current = currentDataList[virtualRow.index]
+          return (
+            <Item
+              key={virtualRow.key}
+              virtualStart={virtualRow.start}
+              virtualSize={virtualRow.size}
+              current={current}
+              onToggle={handleToggle}
+              checkedIds={value}
+              disabledCheckedIds={disabledValue}
+              onCheck={handleCheck}
+              canPreview={canPreview}
+              onPreview={handlePreview}
+              listMapWithChildrenAndDescendants={listMapWithChildrenAndDescendants}
+              searchValue={searchValue}
+              previewPageId={currentPreviewPageId}
+              pagesMap={pagesMap}
+            />
+          )
+        })}
+      </div>
+    </div>
  )
 }

--- a/web/app/components/datasets/documents/create-from-pipeline/data-source/online-documents/page-selector/index.spec.tsx
+++ b/web/app/components/datasets/documents/create-from-pipeline/data-source/online-documents/page-selector/index.spec.tsx
@@ -11,21 +11,18 @@ import { recursivePushInParentDescendants } from './utils'

 // Note: react-i18next uses global mock from web/vitest.setup.ts

-// Mock react-window FixedSizeList - renders items directly for testing
-vi.mock('react-window', () => ({
-  FixedSizeList: ({ children: ItemComponent, itemCount, itemData, itemKey }: any) => (
-    <div data-testid="virtual-list">
-      {Array.from({ length: itemCount }).map((_, index) => (
-        <ItemComponent
-          key={itemKey?.(index, itemData) || index}
-          index={index}
-          style={{ top: index * 28, left: 0, right: 0, width: '100%', position: 'absolute' }}
-          data={itemData}
-        />
-      ))}
-    </div>
-  ),
-  areEqual: (prevProps: any, nextProps: any) => prevProps === nextProps,
+// Mock @tanstack/react-virtual useVirtualizer hook - renders items directly for testing
+vi.mock('@tanstack/react-virtual', () => ({
+  useVirtualizer: ({ count, getItemKey }: { count: number, getItemKey?: (index: number) => string }) => ({
+    getVirtualItems: () =>
+      Array.from({ length: count }).map((_, index) => ({
+        index,
+        key: getItemKey ? getItemKey(index) : index,
+        start: index * 28,
+        size: 28,
+      })),
+    getTotalSize: () => count * 28,
+  }),
 }))

 // Note: NotionIcon from @/app/components/base/ is NOT mocked - using real component per testing guidelines
@@ -119,7 +116,7 @@ describe('PageSelector', () => {
      render(<PageSelector {...props} />)

      // Assert
-      expect(screen.getByTestId('virtual-list')).toBeInTheDocument()
+      expect(screen.getByText('Test Page')).toBeInTheDocument()
    })

    it('should render empty state when list is empty', () => {
@@ -134,7 +131,7 @@ describe('PageSelector', () => {

      // Assert
      expect(screen.getByText('common.dataSource.notion.selector.noSearchResult')).toBeInTheDocument()
-      expect(screen.queryByTestId('virtual-list')).not.toBeInTheDocument()
+      expect(screen.queryByText('Test Page')).not.toBeInTheDocument()
    })

    it('should render items using FixedSizeList', () => {
@@ -1166,7 +1163,7 @@ describe('PageSelector', () => {
      render(<PageSelector {...props} />)

      // Assert
-      expect(screen.getByTestId('virtual-list')).toBeInTheDocument()
+      expect(screen.getByText('Test Page')).toBeInTheDocument()
    })

    it('should handle special characters in page name', () => {
@@ -1340,7 +1337,7 @@ describe('PageSelector', () => {
      render(<PageSelector {...props} />)

      // Assert
-      expect(screen.getByTestId('virtual-list')).toBeInTheDocument()
+      expect(screen.getByText('Test Page')).toBeInTheDocument()
      if (propVariation.canPreview)
        expect(screen.getByText('common.dataSource.notion.selector.preview')).toBeInTheDocument()
      else
--- a/web/app/components/datasets/documents/create-from-pipeline/data-source/online-documents/page-selector/index.tsx
+++ b/web/app/components/datasets/documents/create-from-pipeline/data-source/online-documents/page-selector/index.tsx
@@ -1,7 +1,7 @@
 import type { DataSourceNotionPage, DataSourceNotionPageMap } from '@/models/common'
-import { useCallback, useEffect, useMemo, useState } from 'react'
+import { useVirtualizer } from '@tanstack/react-virtual'
+import { useCallback, useMemo, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
-import { FixedSizeList as List } from 'react-window'
 import Item from './item'
 import { recursivePushInParentDescendants } from './utils'

@@ -45,29 +45,16 @@ const PageSelector = ({
  currentCredentialId,
 }: PageSelectorProps) => {
  const { t } = useTranslation()
-  const [dataList, setDataList] = useState<NotionPageItem[]>([])
+  const parentRef = useRef<HTMLDivElement>(null)
+  const [expandedIds, setExpandedIds] = useState<Set<string>>(() => new Set())
  const [currentPreviewPageId, setCurrentPreviewPageId] = useState('')
+  const prevCredentialIdRef = useRef(currentCredentialId)

-  useEffect(() => {
-    setDataList(list.filter(item => item.parent_id === 'root' || !pagesMap[item.parent_id]).map((item) => {
-      return {
-        ...item,
-        expand: false,
-        depth: 0,
-      }
-    }))
-  }, [currentCredentialId])
-
-  const searchDataList = list.filter((item) => {
-    return item.page_name.includes(searchValue)
-  }).map((item) => {
-    return {
-      ...item,
-      expand: false,
-      depth: 0,
-    }
-  })
-  const currentDataList = searchValue ? searchDataList : dataList
+  // Reset expanded state when credential changes (render-time detection)
+  if (prevCredentialIdRef.current !== currentCredentialId) {
+    prevCredentialIdRef.current = currentCredentialId
+    setExpandedIds(new Set())
+  }

  const listMapWithChildrenAndDescendants = useMemo(() => {
    return list.reduce((prev: NotionPageTreeMap, next: DataSourceNotionPage) => {
@@ -80,39 +67,86 @@ const PageSelector = ({
    }, {})
  }, [list, pagesMap])

-  const handleToggle = useCallback((index: number) => {
-    const current = dataList[index]
-    const pageId = current.page_id
-    const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[pageId]
-    const descendantsIds = Array.from(currentWithChildrenAndDescendants.descendants)
-    const childrenIds = Array.from(currentWithChildrenAndDescendants.children)
-    let newDataList = []
-
-    if (current.expand) {
-      current.expand = false
-
-      newDataList = dataList.filter(item => !descendantsIds.includes(item.page_id))
+  // Pre-build children index for O(1) lookup instead of O(n) filter
+  const childrenByParent = useMemo(() => {
+    const map = new Map<string | null, DataSourceNotionPage[]>()
+    for (const item of list) {
+      const isRoot = item.parent_id === 'root' || !pagesMap[item.parent_id]
+      const parentKey = isRoot ? null : item.parent_id
+      const children = map.get(parentKey) || []
+      children.push(item)
+      map.set(parentKey, children)
    }
-    else {
-      current.expand = true
+    return map
+  }, [list, pagesMap])

-      newDataList = [
-        ...dataList.slice(0, index + 1),
-        ...childrenIds.map(item => ({
-          ...pagesMap[item],
-          expand: false,
-          depth: listMapWithChildrenAndDescendants[item].depth,
-        })),
-        ...dataList.slice(index + 1),
-      ]
+  // Compute visible data list based on expanded state
+  const dataList = useMemo(() => {
+    const result: NotionPageItem[] = []
+
+    const buildVisibleList = (parentId: string | null, depth: number) => {
+      const items = childrenByParent.get(parentId) || []
+
+      for (const item of items) {
+        const isExpanded = expandedIds.has(item.page_id)
+        result.push({
+          ...item,
+          expand: isExpanded,
+          depth,
+        })
+        if (isExpanded) {
+          buildVisibleList(item.page_id, depth + 1)
+        }
+      }
    }
-    setDataList(newDataList)
-  }, [dataList, listMapWithChildrenAndDescendants, pagesMap])

-  const handleCheck = useCallback((index: number) => {
+    buildVisibleList(null, 0)
+    return result
+  }, [childrenByParent, expandedIds])
+
+  const searchDataList = useMemo(() => list.filter((item) => {
+    return item.page_name.includes(searchValue)
+  }).map((item) => {
+    return {
+      ...item,
+      expand: false,
+      depth: 0,
+    }
+  }), [list, searchValue])
+
+  const currentDataList = searchValue ? searchDataList : dataList
+
+  const virtualizer = useVirtualizer({
+    count: currentDataList.length,
+    getScrollElement: () => parentRef.current,
+    estimateSize: () => 28,
+    overscan: 5,
+    getItemKey: index => currentDataList[index].page_id,
+  })
+
+  // Stable callback - no dependencies on dataList
+  const handleToggle = useCallback((pageId: string) => {
+    setExpandedIds((prev) => {
+      const next = new Set(prev)
+      if (prev.has(pageId)) {
+        // Collapse: remove current and all descendants
+        next.delete(pageId)
+        const descendants = listMapWithChildrenAndDescendants[pageId]?.descendants
+        if (descendants) {
+          for (const descendantId of descendants)
+            next.delete(descendantId)
+        }
+      }
+      else {
+        next.add(pageId)
+      }
+      return next
+    })
+  }, [listMapWithChildrenAndDescendants])
+
+  // Stable callback - uses pageId parameter instead of index
+  const handleCheck = useCallback((pageId: string) => {
    const copyValue = new Set(checkedIds)
-    const current = currentDataList[index]
-    const pageId = current.page_id
    const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[pageId]

    if (copyValue.has(pageId)) {
@@ -120,7 +154,6 @@ const PageSelector = ({
        for (const item of currentWithChildrenAndDescendants.descendants)
          copyValue.delete(item)
      }
-
      copyValue.delete(pageId)
    }
    else {
@@ -138,18 +171,15 @@ const PageSelector = ({
      }
    }

-    onSelect(new Set(copyValue))
-  }, [currentDataList, isMultipleChoice, listMapWithChildrenAndDescendants, onSelect, searchValue, checkedIds])
-
-  const handlePreview = useCallback((index: number) => {
-    const current = currentDataList[index]
-    const pageId = current.page_id
+    onSelect(copyValue)
+  }, [checkedIds, isMultipleChoice, listMapWithChildrenAndDescendants, onSelect, searchValue])

+  // Stable callback
+  const handlePreview = useCallback((pageId: string) => {
    setCurrentPreviewPageId(pageId)
-
    if (onPreview)
      onPreview(pageId)
-  }, [currentDataList, onPreview])
+  }, [onPreview])

  if (!currentDataList.length) {
    return (
@@ -160,30 +190,42 @@ const PageSelector = ({
  }

  return (
-    <List
+    <div
+      ref={parentRef}
      className="py-2"
-      height={296}
-      itemCount={currentDataList.length}
-      itemSize={28}
-      width="100%"
-      itemKey={(index, data) => data.dataList[index].page_id}
-      itemData={{
-        dataList: currentDataList,
-        handleToggle,
-        checkedIds,
-        disabledCheckedIds: disabledValue,
-        handleCheck,
-        canPreview,
-        handlePreview,
-        listMapWithChildrenAndDescendants,
-        searchValue,
-        previewPageId: currentPreviewPageId,
-        pagesMap,
-        isMultipleChoice,
-      }}
+      style={{ height: 296, width: '100%', overflow: 'auto' }}
    >
-      {Item}
-    </List>
+      <div
+        style={{
+          height: virtualizer.getTotalSize(),
+          width: '100%',
+          position: 'relative',
+        }}
+      >
+        {virtualizer.getVirtualItems().map((virtualRow) => {
+          const current = currentDataList[virtualRow.index]
+          return (
+            <Item
+              key={virtualRow.key}
+              virtualStart={virtualRow.start}
+              virtualSize={virtualRow.size}
+              current={current}
+              onToggle={handleToggle}
+              checkedIds={checkedIds}
+              disabledCheckedIds={disabledValue}
+              onCheck={handleCheck}
+              canPreview={canPreview}
+              onPreview={handlePreview}
+              listMapWithChildrenAndDescendants={listMapWithChildrenAndDescendants}
+              searchValue={searchValue}
+              previewPageId={currentPreviewPageId}
+              pagesMap={pagesMap}
+              isMultipleChoice={isMultipleChoice}
+            />
+          )
+        })}
+      </div>
+    </div>
  )
 }

--- a/web/app/components/datasets/documents/create-from-pipeline/data-source/online-documents/page-selector/item.tsx
+++ b/web/app/components/datasets/documents/create-from-pipeline/data-source/online-documents/page-selector/item.tsx
@@ -1,9 +1,7 @@
-import type { ListChildComponentProps } from 'react-window'
 import type { DataSourceNotionPage, DataSourceNotionPageMap } from '@/models/common'
 import { RiArrowDownSLine, RiArrowRightSLine } from '@remixicon/react'
-import * as React from 'react'
+import { memo } from 'react'
 import { useTranslation } from 'react-i18next'
-import { areEqual } from 'react-window'
 import Checkbox from '@/app/components/base/checkbox'
 import NotionIcon from '@/app/components/base/notion-icon'
 import Radio from '@/app/components/base/radio/ui'
@@ -23,36 +21,40 @@ type NotionPageItem = {
  depth: number
 } & DataSourceNotionPage

-const Item = ({ index, style, data }: ListChildComponentProps<{
-  dataList: NotionPageItem[]
-  handleToggle: (index: number) => void
+type ItemProps = {
+  virtualStart: number
+  virtualSize: number
+  current: NotionPageItem
+  onToggle: (pageId: string) => void
  checkedIds: Set<string>
  disabledCheckedIds: Set<string>
-  handleCheck: (index: number) => void
+  onCheck: (pageId: string) => void
  canPreview?: boolean
-  handlePreview: (index: number) => void
+  onPreview: (pageId: string) => void
  listMapWithChildrenAndDescendants: NotionPageTreeMap
  searchValue: string
  previewPageId: string
  pagesMap: DataSourceNotionPageMap
  isMultipleChoice?: boolean
-}>) => {
+}
+
+const Item = ({
+  virtualStart,
+  virtualSize,
+  current,
+  onToggle,
+  checkedIds,
+  disabledCheckedIds,
+  onCheck,
+  canPreview,
+  onPreview,
+  listMapWithChildrenAndDescendants,
+  searchValue,
+  previewPageId,
+  pagesMap,
+  isMultipleChoice,
+}: ItemProps) => {
  const { t } = useTranslation()
-  const {
-    dataList,
-    handleToggle,
-    checkedIds,
-    disabledCheckedIds,
-    handleCheck,
-    canPreview,
-    handlePreview,
-    listMapWithChildrenAndDescendants,
-    searchValue,
-    previewPageId,
-    pagesMap,
-    isMultipleChoice,
-  } = data
-  const current = dataList[index]
  const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[current.page_id]
  const hasChild = currentWithChildrenAndDescendants.descendants.size > 0
  const ancestors = currentWithChildrenAndDescendants.ancestors
@@ -65,7 +67,7 @@ const Item = ({ index, style, data }: ListChildComponentProps<{
        <div
          className="mr-1 flex h-5 w-5 shrink-0 items-center justify-center rounded-md hover:bg-components-button-ghost-bg-hover"
          style={{ marginLeft: current.depth * 8 }}
-          onClick={() => handleToggle(index)}
+          onClick={() => onToggle(current.page_id)}
        >
          {
            current.expand
@@ -88,7 +90,15 @@ const Item = ({ index, style, data }: ListChildComponentProps<{
  return (
    <div
      className={cn('group flex cursor-pointer items-center rounded-md pl-2 pr-[2px] hover:bg-state-base-hover', previewPageId === current.page_id && 'bg-state-base-hover')}
-      style={{ ...style, top: style.top as number + 8, left: 8, right: 8, width: 'calc(100% - 16px)' }}
+      style={{
+        position: 'absolute',
+        top: 0,
+        left: 8,
+        right: 8,
+        width: 'calc(100% - 16px)',
+        height: virtualSize,
+        transform: `translateY(${virtualStart + 8}px)`,
+      }}
    >
      {isMultipleChoice
        ? (
@@ -96,9 +106,7 @@ const Item = ({ index, style, data }: ListChildComponentProps<{
              className="mr-2 shrink-0"
              checked={checkedIds.has(current.page_id)}
              disabled={disabled}
-              onCheck={() => {
-                handleCheck(index)
-              }}
+              onCheck={() => onCheck(current.page_id)}
            />
          )
        : (
@@ -106,9 +114,7 @@ const Item = ({ index, style, data }: ListChildComponentProps<{
              className="mr-2 shrink-0"
              isChecked={checkedIds.has(current.page_id)}
              disabled={disabled}
-              onCheck={() => {
-                handleCheck(index)
-              }}
+              onCheck={() => onCheck(current.page_id)}
            />
          )}
      {!searchValue && renderArrow()}
@@ -129,7 +135,7 @@ const Item = ({ index, style, data }: ListChildComponentProps<{
            className="ml-1 hidden h-6 shrink-0 cursor-pointer items-center rounded-md border-[0.5px] border-components-button-secondary-border bg-components-button-secondary-bg px-2 text-xs
            font-medium leading-4 text-components-button-secondary-text shadow-xs shadow-shadow-shadow-3 backdrop-blur-[10px]
            hover:border-components-button-secondary-border-hover hover:bg-components-button-secondary-bg-hover group-hover:flex"
-            onClick={() => handlePreview(index)}
+            onClick={() => onPreview(current.page_id)}
          >
            {t('dataSource.notion.selector.preview', { ns: 'common' })}
          </div>
@@ -149,4 +155,4 @@ const Item = ({ index, style, data }: ListChildComponentProps<{
  )
 }

-export default React.memo(Item, areEqual)
+export default memo(Item)
--- a/web/eslint-suppressions.json
+++ b/web/eslint-suppressions.json
@@ -1399,11 +1399,6 @@
      "count": 2
    }
  },
-  "app/components/base/notion-page-selector/page-selector/index.tsx": {
-    "react-hooks-extra/no-direct-set-state-in-use-effect": {
-      "count": 1
-    }
-  },
  "app/components/base/pagination/index.tsx": {
    "unicorn/prefer-number-properties": {
      "count": 1
@@ -1848,12 +1843,7 @@
  },
  "app/components/datasets/documents/create-from-pipeline/data-source/online-documents/page-selector/index.spec.tsx": {
    "ts/no-explicit-any": {
-      "count": 5
-    }
-  },
-  "app/components/datasets/documents/create-from-pipeline/data-source/online-documents/page-selector/index.tsx": {
-    "react-hooks-extra/no-direct-set-state-in-use-effect": {
-      "count": 1
+      "count": 2
    }
  },
  "app/components/datasets/documents/create-from-pipeline/data-source/online-drive/connect/index.spec.tsx": {
--- a/web/package.json
+++ b/web/package.json
@@ -1,7 +1,7 @@
 {
  "name": "dify-web",
  "type": "module",
-  "version": "1.11.4",
+  "version": "1.12.0",
  "private": true,
  "packageManager": "pnpm@10.27.0+sha512.72d699da16b1179c14ba9e64dc71c9a40988cbdc65c264cb0e489db7de917f20dcf4d64d8723625f2969ba52d4b7e2a1170682d9ac2a5dcaeaab732b7e16f04a",
  "imports": {
@@ -84,6 +84,7 @@
    "@tailwindcss/typography": "0.5.19",
    "@tanstack/react-form": "1.23.7",
    "@tanstack/react-query": "5.90.5",
+    "@tanstack/react-virtual": "3.13.18",
    "abcjs": "6.5.2",
    "ahooks": "3.9.5",
    "class-variance-authority": "0.7.1",
@@ -142,7 +143,6 @@
    "react-sortablejs": "6.1.4",
    "react-syntax-highlighter": "15.6.6",
    "react-textarea-autosize": "8.5.9",
-    "react-window": "1.8.11",
    "reactflow": "11.11.4",
    "rehype-katex": "7.0.1",
    "rehype-raw": "7.0.0",
@@ -199,7 +199,6 @@
    "@types/react-dom": "19.2.3",
    "@types/react-slider": "1.3.6",
    "@types/react-syntax-highlighter": "15.5.13",
-    "@types/react-window": "1.8.8",
    "@types/semver": "7.7.1",
    "@types/sortablejs": "1.15.8",
    "@types/uuid": "10.0.0",
--- a/web/pnpm-lock.yaml
+++ b/web/pnpm-lock.yaml
Author	SHA1	Message	Date
yyh	9ebc0cbe32	Merge branch 'main' into refactor/migrate-react-window-to-tanstack-virtual	2026-02-01 14:42:12 +08:00
yyh	e0554987c9	Merge remote-tracking branch 'origin/main' into refactor/migrate-react-window-to-tanstack-virtual # Conflicts: # web/pnpm-lock.yaml	2026-01-30 18:10:09 +08:00
yyh	817cd53143	Merge remote-tracking branch 'origin/main' into refactor/migrate-react-window-to-tanstack-virtual # Conflicts: # web/pnpm-lock.yaml	2026-01-29 12:34:49 +08:00
yyh	cc3ab30728	Merge branch 'main' into refactor/migrate-react-window-to-tanstack-virtual	2026-01-22 10:07:43 +08:00
yyh	e9462b7504	update	2026-01-21 16:26:20 +08:00
yyh	c5bd31b813	update	2026-01-21 16:22:01 +08:00
yyh	1a23951ae7	Merge remote-tracking branch 'origin/main' into refactor/migrate-react-window-to-tanstack-virtual	2026-01-21 16:17:20 +08:00
yyh	4d60a742dc	update	2026-01-21 16:16:52 +08:00
yyh	8cf99a85cb	migrate and remove react window	2026-01-21 15:52:18 +08:00
yyh	52a874df98	add tanstack react query and migrate page selector	2026-01-21 15:42:38 +08:00