Compare commits

..

10 Commits

Author SHA1 Message Date
yyh
9ebc0cbe32 Merge branch 'main' into refactor/migrate-react-window-to-tanstack-virtual 2026-02-01 14:42:12 +08:00
yyh
e0554987c9 Merge remote-tracking branch 'origin/main' into refactor/migrate-react-window-to-tanstack-virtual
# Conflicts:
#	web/pnpm-lock.yaml
2026-01-30 18:10:09 +08:00
yyh
817cd53143 Merge remote-tracking branch 'origin/main' into refactor/migrate-react-window-to-tanstack-virtual
# Conflicts:
#	web/pnpm-lock.yaml
2026-01-29 12:34:49 +08:00
yyh
cc3ab30728 Merge branch 'main' into refactor/migrate-react-window-to-tanstack-virtual 2026-01-22 10:07:43 +08:00
yyh
e9462b7504 update 2026-01-21 16:26:20 +08:00
yyh
c5bd31b813 update 2026-01-21 16:22:01 +08:00
yyh
1a23951ae7 Merge remote-tracking branch 'origin/main' into refactor/migrate-react-window-to-tanstack-virtual 2026-01-21 16:17:20 +08:00
yyh
4d60a742dc update 2026-01-21 16:16:52 +08:00
yyh
8cf99a85cb migrate and remove react window 2026-01-21 15:52:18 +08:00
yyh
52a874df98 add tanstack react query and migrate page selector 2026-01-21 15:42:38 +08:00
32 changed files with 1364 additions and 1584 deletions

3
.github/CODEOWNERS vendored
View File

@@ -9,9 +9,6 @@
# CODEOWNERS file
/.github/CODEOWNERS @laipz8200 @crazywoola
# Agents
/.agents/skills/ @hyoban
# Docs
/docs/ @crazywoola

View File

@@ -112,6 +112,7 @@ ignore_imports =
core.workflow.nodes.datasource.datasource_node -> models.model
core.workflow.nodes.datasource.datasource_node -> models.tools
core.workflow.nodes.datasource.datasource_node -> services.datasource_provider_service
core.workflow.nodes.document_extractor.node -> configs
core.workflow.nodes.document_extractor.node -> core.file.file_manager
core.workflow.nodes.document_extractor.node -> core.helper.ssrf_proxy
core.workflow.nodes.http_request.entities -> configs

View File

@@ -107,11 +107,10 @@ class AnnotationReplyActionApi(Resource):
def post(self, app_id, action: Literal["enable", "disable"]):
app_id = str(app_id)
args = AnnotationReplyPayload.model_validate(console_ns.payload)
match action:
case "enable":
result = AppAnnotationService.enable_app_annotation(args.model_dump(), app_id)
case "disable":
result = AppAnnotationService.disable_app_annotation(app_id)
if action == "enable":
result = AppAnnotationService.enable_app_annotation(args.model_dump(), app_id)
elif action == "disable":
result = AppAnnotationService.disable_app_annotation(app_id)
return result, 200

View File

@@ -155,43 +155,43 @@ class OAuthServerUserTokenApi(Resource):
grant_type = OAuthGrantType(payload.grant_type)
except ValueError:
raise BadRequest("invalid grant_type")
match grant_type:
case OAuthGrantType.AUTHORIZATION_CODE:
if not payload.code:
raise BadRequest("code is required")
if payload.client_secret != oauth_provider_app.client_secret:
raise BadRequest("client_secret is invalid")
if grant_type == OAuthGrantType.AUTHORIZATION_CODE:
if not payload.code:
raise BadRequest("code is required")
if payload.redirect_uri not in oauth_provider_app.redirect_uris:
raise BadRequest("redirect_uri is invalid")
if payload.client_secret != oauth_provider_app.client_secret:
raise BadRequest("client_secret is invalid")
access_token, refresh_token = OAuthServerService.sign_oauth_access_token(
grant_type, code=payload.code, client_id=oauth_provider_app.client_id
)
return jsonable_encoder(
{
"access_token": access_token,
"token_type": "Bearer",
"expires_in": OAUTH_ACCESS_TOKEN_EXPIRES_IN,
"refresh_token": refresh_token,
}
)
case OAuthGrantType.REFRESH_TOKEN:
if not payload.refresh_token:
raise BadRequest("refresh_token is required")
if payload.redirect_uri not in oauth_provider_app.redirect_uris:
raise BadRequest("redirect_uri is invalid")
access_token, refresh_token = OAuthServerService.sign_oauth_access_token(
grant_type, refresh_token=payload.refresh_token, client_id=oauth_provider_app.client_id
)
return jsonable_encoder(
{
"access_token": access_token,
"token_type": "Bearer",
"expires_in": OAUTH_ACCESS_TOKEN_EXPIRES_IN,
"refresh_token": refresh_token,
}
)
access_token, refresh_token = OAuthServerService.sign_oauth_access_token(
grant_type, code=payload.code, client_id=oauth_provider_app.client_id
)
return jsonable_encoder(
{
"access_token": access_token,
"token_type": "Bearer",
"expires_in": OAUTH_ACCESS_TOKEN_EXPIRES_IN,
"refresh_token": refresh_token,
}
)
elif grant_type == OAuthGrantType.REFRESH_TOKEN:
if not payload.refresh_token:
raise BadRequest("refresh_token is required")
access_token, refresh_token = OAuthServerService.sign_oauth_access_token(
grant_type, refresh_token=payload.refresh_token, client_id=oauth_provider_app.client_id
)
return jsonable_encoder(
{
"access_token": access_token,
"token_type": "Bearer",
"expires_in": OAUTH_ACCESS_TOKEN_EXPIRES_IN,
"refresh_token": refresh_token,
}
)
@console_ns.route("/oauth/provider/account")

View File

@@ -1339,18 +1339,6 @@ class DocumentGenerateSummaryApi(Resource):
missing_ids = set(document_list) - found_ids
raise NotFound(f"Some documents not found: {list(missing_ids)}")
# Update need_summary to True for documents that don't have it set
# This handles the case where documents were created when summary_index_setting was disabled
documents_to_update = [doc for doc in documents if not doc.need_summary and doc.doc_form != "qa_model"]
if documents_to_update:
document_ids_to_update = [str(doc.id) for doc in documents_to_update]
DocumentService.update_documents_need_summary(
dataset_id=dataset_id,
document_ids=document_ids_to_update,
need_summary=True,
)
# Dispatch async tasks for each document
for document in documents:
# Skip qa_model documents as they don't generate summaries

View File

@@ -250,7 +250,7 @@ class WorkflowResponseConverter:
data=WorkflowFinishStreamResponse.Data(
id=run_id,
workflow_id=workflow_id,
status=status,
status=status.value,
outputs=encoded_outputs,
error=error,
elapsed_time=elapsed_time,
@@ -340,13 +340,13 @@ class WorkflowResponseConverter:
metadata = self._merge_metadata(event.execution_metadata, snapshot)
if isinstance(event, QueueNodeSucceededEvent):
status = WorkflowNodeExecutionStatus.SUCCEEDED
status = WorkflowNodeExecutionStatus.SUCCEEDED.value
error_message = event.error
elif isinstance(event, QueueNodeFailedEvent):
status = WorkflowNodeExecutionStatus.FAILED
status = WorkflowNodeExecutionStatus.FAILED.value
error_message = event.error
else:
status = WorkflowNodeExecutionStatus.EXCEPTION
status = WorkflowNodeExecutionStatus.EXCEPTION.value
error_message = event.error
return NodeFinishStreamResponse(
@@ -413,7 +413,7 @@ class WorkflowResponseConverter:
process_data_truncated=process_data_truncated,
outputs=outputs,
outputs_truncated=outputs_truncated,
status=WorkflowNodeExecutionStatus.RETRY,
status=WorkflowNodeExecutionStatus.RETRY.value,
error=event.error,
elapsed_time=elapsed_time,
execution_metadata=metadata,

View File

@@ -7,7 +7,7 @@ from pydantic import BaseModel, ConfigDict, Field
from core.model_runtime.entities.llm_entities import LLMResult, LLMUsage
from core.rag.entities.citation_metadata import RetrievalSourceMetadata
from core.workflow.entities import AgentNodeStrategyInit
from core.workflow.enums import WorkflowExecutionStatus, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus
from core.workflow.enums import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus
class AnnotationReplyAccount(BaseModel):
@@ -223,7 +223,7 @@ class WorkflowFinishStreamResponse(StreamResponse):
id: str
workflow_id: str
status: WorkflowExecutionStatus
status: str
outputs: Mapping[str, Any] | None = None
error: str | None = None
elapsed_time: float
@@ -311,7 +311,7 @@ class NodeFinishStreamResponse(StreamResponse):
process_data_truncated: bool = False
outputs: Mapping[str, Any] | None = None
outputs_truncated: bool = True
status: WorkflowNodeExecutionStatus
status: str
error: str | None = None
elapsed_time: float
execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] | None = None
@@ -375,7 +375,7 @@ class NodeRetryStreamResponse(StreamResponse):
process_data_truncated: bool = False
outputs: Mapping[str, Any] | None = None
outputs_truncated: bool = False
status: WorkflowNodeExecutionStatus
status: str
error: str | None = None
elapsed_time: float
execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] | None = None
@@ -719,7 +719,7 @@ class WorkflowAppBlockingResponse(AppBlockingResponse):
id: str
workflow_id: str
status: WorkflowExecutionStatus
status: str
outputs: Mapping[str, Any] | None = None
error: str | None = None
elapsed_time: float

View File

@@ -1,5 +1,5 @@
from collections.abc import Callable, Sequence
from typing import TYPE_CHECKING, Any, cast, final
from typing import TYPE_CHECKING, final
from typing_extensions import override
@@ -15,7 +15,6 @@ from core.workflow.graph.graph import NodeFactory
from core.workflow.nodes.base.node import Node
from core.workflow.nodes.code.code_node import CodeNode
from core.workflow.nodes.code.limits import CodeNodeLimits
from core.workflow.nodes.document_extractor import DocumentExtractorNode, UnstructuredApiConfig
from core.workflow.nodes.http_request.node import HttpRequestNode
from core.workflow.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING
from core.workflow.nodes.protocols import FileManagerProtocol, HttpClientProtocol
@@ -51,7 +50,6 @@ class DifyNodeFactory(NodeFactory):
http_request_http_client: HttpClientProtocol | None = None,
http_request_tool_file_manager_factory: Callable[[], ToolFileManager] = ToolFileManager,
http_request_file_manager: FileManagerProtocol | None = None,
document_extractor_unstructured_api_config: UnstructuredApiConfig | None = None,
) -> None:
self.graph_init_params = graph_init_params
self.graph_runtime_state = graph_runtime_state
@@ -73,13 +71,6 @@ class DifyNodeFactory(NodeFactory):
self._http_request_http_client = http_request_http_client or ssrf_proxy
self._http_request_tool_file_manager_factory = http_request_tool_file_manager_factory
self._http_request_file_manager = http_request_file_manager or file_manager
self._document_extractor_unstructured_api_config = (
document_extractor_unstructured_api_config
or UnstructuredApiConfig(
api_url=dify_config.UNSTRUCTURED_API_URL,
api_key=dify_config.UNSTRUCTURED_API_KEY or "",
)
)
@override
def create_node(self, node_config: NodeConfigDict) -> Node:
@@ -112,17 +103,13 @@ class DifyNodeFactory(NodeFactory):
if not node_class:
raise ValueError(f"No latest version class found for node type: {node_type}")
common_kwargs: dict[str, Any] = {
"id": node_id,
"config": node_config,
"graph_init_params": self.graph_init_params,
"graph_runtime_state": self.graph_runtime_state,
}
# Create node instance
if node_type == NodeType.CODE:
return CodeNode(
**common_kwargs,
id=node_id,
config=node_config,
graph_init_params=self.graph_init_params,
graph_runtime_state=self.graph_runtime_state,
code_executor=self._code_executor,
code_providers=self._code_providers,
code_limits=self._code_limits,
@@ -130,23 +117,27 @@ class DifyNodeFactory(NodeFactory):
if node_type == NodeType.TEMPLATE_TRANSFORM:
return TemplateTransformNode(
**common_kwargs,
id=node_id,
config=node_config,
graph_init_params=self.graph_init_params,
graph_runtime_state=self.graph_runtime_state,
template_renderer=self._template_renderer,
)
if node_type == NodeType.HTTP_REQUEST:
return HttpRequestNode(
**common_kwargs,
id=node_id,
config=node_config,
graph_init_params=self.graph_init_params,
graph_runtime_state=self.graph_runtime_state,
http_client=self._http_request_http_client,
tool_file_manager_factory=self._http_request_tool_file_manager_factory,
file_manager=self._http_request_file_manager,
)
if node_type == NodeType.DOCUMENT_EXTRACTOR:
document_extractor_class = cast(type[DocumentExtractorNode], node_class)
return document_extractor_class(
**common_kwargs,
unstructured_api_config=self._document_extractor_unstructured_api_config,
)
return node_class(**common_kwargs)
return node_class(
id=node_id,
config=node_config,
graph_init_params=self.graph_init_params,
graph_runtime_state=self.graph_runtime_state,
)

View File

@@ -369,9 +369,7 @@ class IndexingRunner:
# Generate summary preview
summary_index_setting = tmp_processing_rule.get("summary_index_setting")
if summary_index_setting and summary_index_setting.get("enable") and preview_texts:
preview_texts = index_processor.generate_summary_preview(
tenant_id, preview_texts, summary_index_setting, doc_language
)
preview_texts = index_processor.generate_summary_preview(tenant_id, preview_texts, summary_index_setting)
return IndexingEstimate(total_segments=total_segments, preview=preview_texts)

View File

@@ -441,13 +441,11 @@ DEFAULT_GENERATOR_SUMMARY_PROMPT = (
Requirements:
1. Write a concise summary in plain text
2. You must write in {language}. No language other than {language} should be used.
2. Use the same language as the input content
3. Focus on important facts, concepts, and details
4. If images are included, describe their key information
5. Do not use words like "好的", "ok", "I understand", "This text discusses", "The content mentions"
6. Write directly without extra words
7. If there is not enough content to generate a meaningful summary,
return an empty string without any explanation or prompt
Output only the summary text. Start summarizing now:

View File

@@ -48,22 +48,12 @@ class BaseIndexProcessor(ABC):
@abstractmethod
def generate_summary_preview(
self,
tenant_id: str,
preview_texts: list[PreviewDetail],
summary_index_setting: dict,
doc_language: str | None = None,
self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
) -> list[PreviewDetail]:
"""
For each segment in preview_texts, generate a summary using LLM and attach it to the segment.
The summary can be stored in a new attribute, e.g., summary.
This method should be implemented by subclasses.
Args:
tenant_id: Tenant ID
preview_texts: List of preview details to generate summaries for
summary_index_setting: Summary index configuration
doc_language: Optional document language to ensure summary is generated in the correct language
"""
raise NotImplementedError

View File

@@ -275,11 +275,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
raise ValueError("Chunks is not a list")
def generate_summary_preview(
self,
tenant_id: str,
preview_texts: list[PreviewDetail],
summary_index_setting: dict,
doc_language: str | None = None,
self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
) -> list[PreviewDetail]:
"""
For each segment, concurrently call generate_summary to generate a summary
@@ -302,15 +298,11 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
if flask_app:
# Ensure Flask app context in worker thread
with flask_app.app_context():
summary, _ = self.generate_summary(
tenant_id, preview.content, summary_index_setting, document_language=doc_language
)
summary, _ = self.generate_summary(tenant_id, preview.content, summary_index_setting)
preview.summary = summary
else:
# Fallback: try without app context (may fail)
summary, _ = self.generate_summary(
tenant_id, preview.content, summary_index_setting, document_language=doc_language
)
summary, _ = self.generate_summary(tenant_id, preview.content, summary_index_setting)
preview.summary = summary
# Generate summaries concurrently using ThreadPoolExecutor
@@ -364,7 +356,6 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
text: str,
summary_index_setting: dict | None = None,
segment_id: str | None = None,
document_language: str | None = None,
) -> tuple[str, LLMUsage]:
"""
Generate summary for the given text using ModelInstance.invoke_llm and the default or custom summary prompt,
@@ -375,8 +366,6 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
text: Text content to summarize
summary_index_setting: Summary index configuration
segment_id: Optional segment ID to fetch attachments from SegmentAttachmentBinding table
document_language: Optional document language (e.g., "Chinese", "English")
to ensure summary is generated in the correct language
Returns:
Tuple of (summary_content, llm_usage) where llm_usage is LLMUsage object
@@ -392,22 +381,8 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
raise ValueError("model_name and model_provider_name are required in summary_index_setting")
# Import default summary prompt
is_default_prompt = False
if not summary_prompt:
summary_prompt = DEFAULT_GENERATOR_SUMMARY_PROMPT
is_default_prompt = True
# Format prompt with document language only for default prompt
# Custom prompts are used as-is to avoid interfering with user-defined templates
# If document_language is provided, use it; otherwise, use "the same language as the input content"
# This is especially important for image-only chunks where text is empty or minimal
if is_default_prompt:
language_for_prompt = document_language or "the same language as the input content"
try:
summary_prompt = summary_prompt.format(language=language_for_prompt)
except KeyError:
# If default prompt doesn't have {language} placeholder, use it as-is
pass
provider_manager = ProviderManager()
provider_model_bundle = provider_manager.get_provider_model_bundle(

View File

@@ -358,11 +358,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
}
def generate_summary_preview(
self,
tenant_id: str,
preview_texts: list[PreviewDetail],
summary_index_setting: dict,
doc_language: str | None = None,
self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
) -> list[PreviewDetail]:
"""
For each parent chunk in preview_texts, concurrently call generate_summary to generate a summary
@@ -393,7 +389,6 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
tenant_id=tenant_id,
text=preview.content,
summary_index_setting=summary_index_setting,
document_language=doc_language,
)
preview.summary = summary
else:
@@ -402,7 +397,6 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
tenant_id=tenant_id,
text=preview.content,
summary_index_setting=summary_index_setting,
document_language=doc_language,
)
preview.summary = summary

View File

@@ -241,11 +241,7 @@ class QAIndexProcessor(BaseIndexProcessor):
}
def generate_summary_preview(
self,
tenant_id: str,
preview_texts: list[PreviewDetail],
summary_index_setting: dict,
doc_language: str | None = None,
self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
) -> list[PreviewDetail]:
"""
QA model doesn't generate summaries, so this method returns preview_texts unchanged.

View File

@@ -1,4 +1,4 @@
from .entities import DocumentExtractorNodeData, UnstructuredApiConfig
from .entities import DocumentExtractorNodeData
from .node import DocumentExtractorNode
__all__ = ["DocumentExtractorNode", "DocumentExtractorNodeData", "UnstructuredApiConfig"]
__all__ = ["DocumentExtractorNode", "DocumentExtractorNodeData"]

View File

@@ -1,14 +1,7 @@
from collections.abc import Sequence
from dataclasses import dataclass
from core.workflow.nodes.base import BaseNodeData
class DocumentExtractorNodeData(BaseNodeData):
variable_selector: Sequence[str]
@dataclass(frozen=True)
class UnstructuredApiConfig:
api_url: str | None = None
api_key: str = ""

View File

@@ -5,7 +5,7 @@ import logging
import os
import tempfile
from collections.abc import Mapping, Sequence
from typing import TYPE_CHECKING, Any
from typing import Any
import charset_normalizer
import docx
@@ -20,6 +20,7 @@ from docx.oxml.text.paragraph import CT_P
from docx.table import Table
from docx.text.paragraph import Paragraph
from configs import dify_config
from core.file import File, FileTransferMethod, file_manager
from core.helper import ssrf_proxy
from core.variables import ArrayFileSegment
@@ -28,15 +29,11 @@ from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus
from core.workflow.node_events import NodeRunResult
from core.workflow.nodes.base.node import Node
from .entities import DocumentExtractorNodeData, UnstructuredApiConfig
from .entities import DocumentExtractorNodeData
from .exc import DocumentExtractorError, FileDownloadError, TextExtractionError, UnsupportedFileTypeError
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from core.workflow.entities import GraphInitParams
from core.workflow.runtime import GraphRuntimeState
class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
"""
@@ -50,23 +47,6 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
def version(cls) -> str:
return "1"
def __init__(
self,
id: str,
config: Mapping[str, Any],
graph_init_params: "GraphInitParams",
graph_runtime_state: "GraphRuntimeState",
*,
unstructured_api_config: UnstructuredApiConfig | None = None,
) -> None:
super().__init__(
id=id,
config=config,
graph_init_params=graph_init_params,
graph_runtime_state=graph_runtime_state,
)
self._unstructured_api_config = unstructured_api_config or UnstructuredApiConfig()
def _run(self):
variable_selector = self.node_data.variable_selector
variable = self.graph_runtime_state.variable_pool.get(variable_selector)
@@ -84,10 +64,7 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
try:
if isinstance(value, list):
extracted_text_list = [
_extract_text_from_file(file, unstructured_api_config=self._unstructured_api_config)
for file in value
]
extracted_text_list = list(map(_extract_text_from_file, value))
return NodeRunResult(
status=WorkflowNodeExecutionStatus.SUCCEEDED,
inputs=inputs,
@@ -95,7 +72,7 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
outputs={"text": ArrayStringSegment(value=extracted_text_list)},
)
elif isinstance(value, File):
extracted_text = _extract_text_from_file(value, unstructured_api_config=self._unstructured_api_config)
extracted_text = _extract_text_from_file(value)
return NodeRunResult(
status=WorkflowNodeExecutionStatus.SUCCEEDED,
inputs=inputs,
@@ -126,12 +103,7 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
return {node_id + ".files": typed_node_data.variable_selector}
def _extract_text_by_mime_type(
*,
file_content: bytes,
mime_type: str,
unstructured_api_config: UnstructuredApiConfig,
) -> str:
def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str:
"""Extract text from a file based on its MIME type."""
match mime_type:
case "text/plain" | "text/html" | "text/htm" | "text/markdown" | "text/xml":
@@ -139,7 +111,7 @@ def _extract_text_by_mime_type(
case "application/pdf":
return _extract_text_from_pdf(file_content)
case "application/msword":
return _extract_text_from_doc(file_content, unstructured_api_config=unstructured_api_config)
return _extract_text_from_doc(file_content)
case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
return _extract_text_from_docx(file_content)
case "text/csv":
@@ -147,11 +119,11 @@ def _extract_text_by_mime_type(
case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | "application/vnd.ms-excel":
return _extract_text_from_excel(file_content)
case "application/vnd.ms-powerpoint":
return _extract_text_from_ppt(file_content, unstructured_api_config=unstructured_api_config)
return _extract_text_from_ppt(file_content)
case "application/vnd.openxmlformats-officedocument.presentationml.presentation":
return _extract_text_from_pptx(file_content, unstructured_api_config=unstructured_api_config)
return _extract_text_from_pptx(file_content)
case "application/epub+zip":
return _extract_text_from_epub(file_content, unstructured_api_config=unstructured_api_config)
return _extract_text_from_epub(file_content)
case "message/rfc822":
return _extract_text_from_eml(file_content)
case "application/vnd.ms-outlook":
@@ -168,12 +140,7 @@ def _extract_text_by_mime_type(
raise UnsupportedFileTypeError(f"Unsupported MIME type: {mime_type}")
def _extract_text_by_file_extension(
*,
file_content: bytes,
file_extension: str,
unstructured_api_config: UnstructuredApiConfig,
) -> str:
def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str) -> str:
"""Extract text from a file based on its file extension."""
match file_extension:
case (
@@ -236,7 +203,7 @@ def _extract_text_by_file_extension(
case ".pdf":
return _extract_text_from_pdf(file_content)
case ".doc":
return _extract_text_from_doc(file_content, unstructured_api_config=unstructured_api_config)
return _extract_text_from_doc(file_content)
case ".docx":
return _extract_text_from_docx(file_content)
case ".csv":
@@ -244,11 +211,11 @@ def _extract_text_by_file_extension(
case ".xls" | ".xlsx":
return _extract_text_from_excel(file_content)
case ".ppt":
return _extract_text_from_ppt(file_content, unstructured_api_config=unstructured_api_config)
return _extract_text_from_ppt(file_content)
case ".pptx":
return _extract_text_from_pptx(file_content, unstructured_api_config=unstructured_api_config)
return _extract_text_from_pptx(file_content)
case ".epub":
return _extract_text_from_epub(file_content, unstructured_api_config=unstructured_api_config)
return _extract_text_from_epub(file_content)
case ".eml":
return _extract_text_from_eml(file_content)
case ".msg":
@@ -345,14 +312,14 @@ def _extract_text_from_pdf(file_content: bytes) -> str:
raise TextExtractionError(f"Failed to extract text from PDF: {str(e)}") from e
def _extract_text_from_doc(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str:
def _extract_text_from_doc(file_content: bytes) -> str:
"""
Extract text from a DOC file.
"""
from unstructured.partition.api import partition_via_api
if not unstructured_api_config.api_url:
raise TextExtractionError("Unstructured API URL is not configured for DOC file processing.")
if not dify_config.UNSTRUCTURED_API_URL:
raise TextExtractionError("UNSTRUCTURED_API_URL must be set")
try:
with tempfile.NamedTemporaryFile(suffix=".doc", delete=False) as temp_file:
@@ -362,8 +329,8 @@ def _extract_text_from_doc(file_content: bytes, *, unstructured_api_config: Unst
elements = partition_via_api(
file=file,
metadata_filename=temp_file.name,
api_url=unstructured_api_config.api_url,
api_key=unstructured_api_config.api_key,
api_url=dify_config.UNSTRUCTURED_API_URL,
api_key=dify_config.UNSTRUCTURED_API_KEY, # type: ignore
)
os.unlink(temp_file.name)
return "\n".join([getattr(element, "text", "") for element in elements])
@@ -453,20 +420,12 @@ def _download_file_content(file: File) -> bytes:
raise FileDownloadError(f"Error downloading file: {str(e)}") from e
def _extract_text_from_file(file: File, *, unstructured_api_config: UnstructuredApiConfig) -> str:
def _extract_text_from_file(file: File):
file_content = _download_file_content(file)
if file.extension:
extracted_text = _extract_text_by_file_extension(
file_content=file_content,
file_extension=file.extension,
unstructured_api_config=unstructured_api_config,
)
extracted_text = _extract_text_by_file_extension(file_content=file_content, file_extension=file.extension)
elif file.mime_type:
extracted_text = _extract_text_by_mime_type(
file_content=file_content,
mime_type=file.mime_type,
unstructured_api_config=unstructured_api_config,
)
extracted_text = _extract_text_by_mime_type(file_content=file_content, mime_type=file.mime_type)
else:
raise UnsupportedFileTypeError("Unable to determine file type: MIME type or file extension is missing")
return extracted_text
@@ -558,12 +517,12 @@ def _extract_text_from_excel(file_content: bytes) -> str:
raise TextExtractionError(f"Failed to extract text from Excel file: {str(e)}") from e
def _extract_text_from_ppt(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str:
def _extract_text_from_ppt(file_content: bytes) -> str:
from unstructured.partition.api import partition_via_api
from unstructured.partition.ppt import partition_ppt
try:
if unstructured_api_config.api_url:
if dify_config.UNSTRUCTURED_API_URL:
with tempfile.NamedTemporaryFile(suffix=".ppt", delete=False) as temp_file:
temp_file.write(file_content)
temp_file.flush()
@@ -571,8 +530,8 @@ def _extract_text_from_ppt(file_content: bytes, *, unstructured_api_config: Unst
elements = partition_via_api(
file=file,
metadata_filename=temp_file.name,
api_url=unstructured_api_config.api_url,
api_key=unstructured_api_config.api_key,
api_url=dify_config.UNSTRUCTURED_API_URL,
api_key=dify_config.UNSTRUCTURED_API_KEY, # type: ignore
)
os.unlink(temp_file.name)
else:
@@ -584,12 +543,12 @@ def _extract_text_from_ppt(file_content: bytes, *, unstructured_api_config: Unst
raise TextExtractionError(f"Failed to extract text from PPTX: {str(e)}") from e
def _extract_text_from_pptx(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str:
def _extract_text_from_pptx(file_content: bytes) -> str:
from unstructured.partition.api import partition_via_api
from unstructured.partition.pptx import partition_pptx
try:
if unstructured_api_config.api_url:
if dify_config.UNSTRUCTURED_API_URL:
with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as temp_file:
temp_file.write(file_content)
temp_file.flush()
@@ -597,8 +556,8 @@ def _extract_text_from_pptx(file_content: bytes, *, unstructured_api_config: Uns
elements = partition_via_api(
file=file,
metadata_filename=temp_file.name,
api_url=unstructured_api_config.api_url,
api_key=unstructured_api_config.api_key,
api_url=dify_config.UNSTRUCTURED_API_URL,
api_key=dify_config.UNSTRUCTURED_API_KEY, # type: ignore
)
os.unlink(temp_file.name)
else:
@@ -609,12 +568,12 @@ def _extract_text_from_pptx(file_content: bytes, *, unstructured_api_config: Uns
raise TextExtractionError(f"Failed to extract text from PPTX: {str(e)}") from e
def _extract_text_from_epub(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str:
def _extract_text_from_epub(file_content: bytes) -> str:
from unstructured.partition.api import partition_via_api
from unstructured.partition.epub import partition_epub
try:
if unstructured_api_config.api_url:
if dify_config.UNSTRUCTURED_API_URL:
with tempfile.NamedTemporaryFile(suffix=".epub", delete=False) as temp_file:
temp_file.write(file_content)
temp_file.flush()
@@ -622,8 +581,8 @@ def _extract_text_from_epub(file_content: bytes, *, unstructured_api_config: Uns
elements = partition_via_api(
file=file,
metadata_filename=temp_file.name,
api_url=unstructured_api_config.api_url,
api_key=unstructured_api_config.api_key,
api_url=dify_config.UNSTRUCTURED_API_URL,
api_key=dify_config.UNSTRUCTURED_API_KEY, # type: ignore
)
os.unlink(temp_file.name)
else:

View File

@@ -78,21 +78,12 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
indexing_technique = node_data.indexing_technique or dataset.indexing_technique
summary_index_setting = node_data.summary_index_setting or dataset.summary_index_setting
# Try to get document language if document_id is available
doc_language = None
document_id = variable_pool.get(["sys", SystemVariableKey.DOCUMENT_ID])
if document_id:
document = db.session.query(Document).filter_by(id=document_id.value).first()
if document and document.doc_language:
doc_language = document.doc_language
outputs = self._get_preview_output_with_summaries(
node_data.chunk_structure,
chunks,
dataset=dataset,
indexing_technique=indexing_technique,
summary_index_setting=summary_index_setting,
doc_language=doc_language,
)
return NodeRunResult(
status=WorkflowNodeExecutionStatus.SUCCEEDED,
@@ -324,7 +315,6 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
dataset: Dataset,
indexing_technique: str | None = None,
summary_index_setting: dict | None = None,
doc_language: str | None = None,
) -> Mapping[str, Any]:
"""
Generate preview output with summaries for chunks in preview mode.
@@ -336,7 +326,6 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
dataset: Dataset object (for tenant_id)
indexing_technique: Indexing technique from node config or dataset
summary_index_setting: Summary index setting from node config or dataset
doc_language: Optional document language to ensure summary is generated in the correct language
"""
index_processor = IndexProcessorFactory(chunk_structure).init_index_processor()
preview_output = index_processor.format_preview(chunks)
@@ -376,7 +365,6 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
tenant_id=dataset.tenant_id,
text=preview_item["content"],
summary_index_setting=summary_index_setting,
document_language=doc_language,
)
if summary:
preview_item["summary"] = summary
@@ -386,7 +374,6 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
tenant_id=dataset.tenant_id,
text=preview_item["content"],
summary_index_setting=summary_index_setting,
document_language=doc_language,
)
if summary:
preview_item["summary"] = summary

View File

@@ -1,6 +1,6 @@
[project]
name = "dify-api"
version = "1.11.4"
version = "1.12.0"
requires-python = ">=3.11,<3.13"
dependencies = [

View File

@@ -16,7 +16,6 @@ from sqlalchemy.orm import Session
from werkzeug.exceptions import Forbidden, NotFound
from configs import dify_config
from core.db.session_factory import session_factory
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
from core.file import helpers as file_helpers
from core.helper.name_generator import generate_incremental_name
@@ -1389,46 +1388,6 @@ class DocumentService:
).all()
return documents
@staticmethod
def update_documents_need_summary(dataset_id: str, document_ids: Sequence[str], need_summary: bool = True) -> int:
"""
Update need_summary field for multiple documents.
This method handles the case where documents were created when summary_index_setting was disabled,
and need to be updated when summary_index_setting is later enabled.
Args:
dataset_id: Dataset ID
document_ids: List of document IDs to update
need_summary: Value to set for need_summary field (default: True)
Returns:
Number of documents updated
"""
if not document_ids:
return 0
document_id_list: list[str] = [str(document_id) for document_id in document_ids]
with session_factory.create_session() as session:
updated_count = (
session.query(Document)
.filter(
Document.id.in_(document_id_list),
Document.dataset_id == dataset_id,
Document.doc_form != "qa_model", # Skip qa_model documents
)
.update({Document.need_summary: need_summary}, synchronize_session=False)
)
session.commit()
logger.info(
"Updated need_summary to %s for %d documents in dataset %s",
need_summary,
updated_count,
dataset_id,
)
return updated_count
@staticmethod
def get_document_download_url(document: Document) -> str:
"""

View File

@@ -174,10 +174,6 @@ class RagPipelineTransformService:
else:
dataset.retrieval_model = knowledge_configuration.retrieval_model.model_dump()
# Copy summary_index_setting from dataset to knowledge_index node configuration
if dataset.summary_index_setting:
knowledge_configuration.summary_index_setting = dataset.summary_index_setting
knowledge_configuration_dict.update(knowledge_configuration.model_dump())
node["data"] = knowledge_configuration_dict
return node

View File

@@ -49,18 +49,11 @@ class SummaryIndexService:
# Use lazy import to avoid circular import
from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor
# Get document language to ensure summary is generated in the correct language
# This is especially important for image-only chunks where text is empty or minimal
document_language = None
if segment.document and segment.document.doc_language:
document_language = segment.document.doc_language
summary_content, usage = ParagraphIndexProcessor.generate_summary(
tenant_id=dataset.tenant_id,
text=segment.content,
summary_index_setting=summary_index_setting,
segment_id=segment.id,
document_language=document_language,
)
if not summary_content:
@@ -565,9 +558,6 @@ class SummaryIndexService:
)
session.add(summary_record)
# Commit the batch created records
session.commit()
@staticmethod
def update_summary_record_error(
segment: DocumentSegment,
@@ -772,6 +762,7 @@ class SummaryIndexService:
dataset=dataset,
status="not_started",
)
session.commit() # Commit initial records
summary_records = []

2
api/uv.lock generated
View File

@@ -1368,7 +1368,7 @@ wheels = [
[[package]]
name = "dify-api"
version = "1.11.4"
version = "1.12.0"
source = { virtual = "." }
dependencies = [
{ name = "aliyun-log-python-sdk" },

View File

@@ -21,7 +21,7 @@ services:
# API service
api:
image: langgenius/dify-api:1.11.4
image: langgenius/dify-api:1.12.0
restart: always
environment:
# Use the shared environment variables.
@@ -63,7 +63,7 @@ services:
# worker service
# The Celery worker for processing all queues (dataset, workflow, mail, etc.)
worker:
image: langgenius/dify-api:1.11.4
image: langgenius/dify-api:1.12.0
restart: always
environment:
# Use the shared environment variables.
@@ -102,7 +102,7 @@ services:
# worker_beat service
# Celery beat for scheduling periodic tasks.
worker_beat:
image: langgenius/dify-api:1.11.4
image: langgenius/dify-api:1.12.0
restart: always
environment:
# Use the shared environment variables.
@@ -132,7 +132,7 @@ services:
# Frontend web application.
web:
image: langgenius/dify-web:1.11.4
image: langgenius/dify-web:1.12.0
restart: always
environment:
CONSOLE_API_URL: ${CONSOLE_API_URL:-}

View File

@@ -707,7 +707,7 @@ services:
# API service
api:
image: langgenius/dify-api:1.11.4
image: langgenius/dify-api:1.12.0
restart: always
environment:
# Use the shared environment variables.
@@ -749,7 +749,7 @@ services:
# worker service
# The Celery worker for processing all queues (dataset, workflow, mail, etc.)
worker:
image: langgenius/dify-api:1.11.4
image: langgenius/dify-api:1.12.0
restart: always
environment:
# Use the shared environment variables.
@@ -788,7 +788,7 @@ services:
# worker_beat service
# Celery beat for scheduling periodic tasks.
worker_beat:
image: langgenius/dify-api:1.11.4
image: langgenius/dify-api:1.12.0
restart: always
environment:
# Use the shared environment variables.
@@ -818,7 +818,7 @@ services:
# Frontend web application.
web:
image: langgenius/dify-web:1.11.4
image: langgenius/dify-web:1.12.0
restart: always
environment:
CONSOLE_API_URL: ${CONSOLE_API_URL:-}

View File

@@ -1,9 +1,8 @@
import type { ListChildComponentProps } from 'react-window'
import type { DataSourceNotionPage, DataSourceNotionPageMap } from '@/models/common'
import { RiArrowDownSLine, RiArrowRightSLine } from '@remixicon/react'
import { memo, useEffect, useMemo, useState } from 'react'
import { useVirtualizer } from '@tanstack/react-virtual'
import { memo, useCallback, useMemo, useRef, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { areEqual, FixedSizeList as List } from 'react-window'
import { cn } from '@/utils/classnames'
import Checkbox from '../../checkbox'
import NotionIcon from '../../notion-icon'
@@ -32,6 +31,22 @@ type NotionPageItem = {
depth: number
} & DataSourceNotionPage
type ItemProps = {
virtualStart: number
virtualSize: number
current: NotionPageItem
onToggle: (pageId: string) => void
checkedIds: Set<string>
disabledCheckedIds: Set<string>
onCheck: (pageId: string) => void
canPreview?: boolean
onPreview: (pageId: string) => void
listMapWithChildrenAndDescendants: NotionPageTreeMap
searchValue: string
previewPageId: string
pagesMap: DataSourceNotionPageMap
}
const recursivePushInParentDescendants = (
pagesMap: DataSourceNotionPageMap,
listTreeMap: NotionPageTreeMap,
@@ -69,34 +84,22 @@ const recursivePushInParentDescendants = (
}
}
const ItemComponent = ({ index, style, data }: ListChildComponentProps<{
dataList: NotionPageItem[]
handleToggle: (index: number) => void
checkedIds: Set<string>
disabledCheckedIds: Set<string>
handleCheck: (index: number) => void
canPreview?: boolean
handlePreview: (index: number) => void
listMapWithChildrenAndDescendants: NotionPageTreeMap
searchValue: string
previewPageId: string
pagesMap: DataSourceNotionPageMap
}>) => {
const ItemComponent = ({
virtualStart,
virtualSize,
current,
onToggle,
checkedIds,
disabledCheckedIds,
onCheck,
canPreview,
onPreview,
listMapWithChildrenAndDescendants,
searchValue,
previewPageId,
pagesMap,
}: ItemProps) => {
const { t } = useTranslation()
const {
dataList,
handleToggle,
checkedIds,
disabledCheckedIds,
handleCheck,
canPreview,
handlePreview,
listMapWithChildrenAndDescendants,
searchValue,
previewPageId,
pagesMap,
} = data
const current = dataList[index]
const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[current.page_id]
const hasChild = currentWithChildrenAndDescendants.descendants.size > 0
const ancestors = currentWithChildrenAndDescendants.ancestors
@@ -109,7 +112,7 @@ const ItemComponent = ({ index, style, data }: ListChildComponentProps<{
<div
className="mr-1 flex h-5 w-5 shrink-0 items-center justify-center rounded-md hover:bg-components-button-ghost-bg-hover"
style={{ marginLeft: current.depth * 8 }}
onClick={() => handleToggle(index)}
onClick={() => onToggle(current.page_id)}
>
{
current.expand
@@ -132,15 +135,21 @@ const ItemComponent = ({ index, style, data }: ListChildComponentProps<{
return (
<div
className={cn('group flex cursor-pointer items-center rounded-md pl-2 pr-[2px] hover:bg-state-base-hover', previewPageId === current.page_id && 'bg-state-base-hover')}
style={{ ...style, top: style.top as number + 8, left: 8, right: 8, width: 'calc(100% - 16px)' }}
style={{
position: 'absolute',
top: 0,
left: 8,
right: 8,
width: 'calc(100% - 16px)',
height: virtualSize,
transform: `translateY(${virtualStart + 8}px)`,
}}
>
<Checkbox
className="mr-2 shrink-0"
checked={checkedIds.has(current.page_id)}
disabled={disabled}
onCheck={() => {
handleCheck(index)
}}
onCheck={() => onCheck(current.page_id)}
/>
{!searchValue && renderArrow()}
<NotionIcon
@@ -160,7 +169,7 @@ const ItemComponent = ({ index, style, data }: ListChildComponentProps<{
className="ml-1 hidden h-6 shrink-0 cursor-pointer items-center rounded-md border-[0.5px] border-components-button-secondary-border bg-components-button-secondary-bg px-2 text-xs
font-medium leading-4 text-components-button-secondary-text shadow-xs shadow-shadow-shadow-3 backdrop-blur-[10px]
hover:border-components-button-secondary-border-hover hover:bg-components-button-secondary-bg-hover group-hover:flex"
onClick={() => handlePreview(index)}
onClick={() => onPreview(current.page_id)}
>
{t('dataSource.notion.selector.preview', { ns: 'common' })}
</div>
@@ -179,7 +188,7 @@ const ItemComponent = ({ index, style, data }: ListChildComponentProps<{
</div>
)
}
const Item = memo(ItemComponent, areEqual)
const Item = memo(ItemComponent)
const PageSelector = ({
value,
@@ -193,31 +202,10 @@ const PageSelector = ({
onPreview,
}: PageSelectorProps) => {
const { t } = useTranslation()
const [dataList, setDataList] = useState<NotionPageItem[]>([])
const parentRef = useRef<HTMLDivElement>(null)
const [expandedIds, setExpandedIds] = useState<Set<string>>(() => new Set())
const [localPreviewPageId, setLocalPreviewPageId] = useState('')
useEffect(() => {
setDataList(list.filter(item => item.parent_id === 'root' || !pagesMap[item.parent_id]).map((item) => {
return {
...item,
expand: false,
depth: 0,
}
}))
}, [list])
const searchDataList = list.filter((item) => {
return item.page_name.includes(searchValue)
}).map((item) => {
return {
...item,
expand: false,
depth: 0,
}
})
const currentDataList = searchValue ? searchDataList : dataList
const currentPreviewPageId = previewPageId === undefined ? localPreviewPageId : previewPageId
const listMapWithChildrenAndDescendants = useMemo(() => {
return list.reduce((prev: NotionPageTreeMap, next: DataSourceNotionPage) => {
const pageId = next.page_id
@@ -229,47 +217,89 @@ const PageSelector = ({
}, {})
}, [list, pagesMap])
const handleToggle = (index: number) => {
const current = dataList[index]
const pageId = current.page_id
const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[pageId]
const descendantsIds = Array.from(currentWithChildrenAndDescendants.descendants)
const childrenIds = Array.from(currentWithChildrenAndDescendants.children)
let newDataList = []
if (current.expand) {
current.expand = false
newDataList = dataList.filter(item => !descendantsIds.includes(item.page_id))
const childrenByParent = useMemo(() => {
const map = new Map<string | null, DataSourceNotionPage[]>()
for (const item of list) {
const isRoot = item.parent_id === 'root' || !pagesMap[item.parent_id]
const parentKey = isRoot ? null : item.parent_id
const children = map.get(parentKey) || []
children.push(item)
map.set(parentKey, children)
}
else {
current.expand = true
return map
}, [list, pagesMap])
newDataList = [
...dataList.slice(0, index + 1),
...childrenIds.map(item => ({
...pagesMap[item],
expand: false,
depth: listMapWithChildrenAndDescendants[item].depth,
})),
...dataList.slice(index + 1),
]
const dataList = useMemo(() => {
const result: NotionPageItem[] = []
const buildVisibleList = (parentId: string | null, depth: number) => {
const items = childrenByParent.get(parentId) || []
for (const item of items) {
const isExpanded = expandedIds.has(item.page_id)
result.push({
...item,
expand: isExpanded,
depth,
})
if (isExpanded) {
buildVisibleList(item.page_id, depth + 1)
}
}
}
setDataList(newDataList)
}
const copyValue = new Set(value)
const handleCheck = (index: number) => {
const current = currentDataList[index]
const pageId = current.page_id
buildVisibleList(null, 0)
return result
}, [childrenByParent, expandedIds])
const searchDataList = useMemo(() => list.filter((item) => {
return item.page_name.includes(searchValue)
}).map((item) => {
return {
...item,
expand: false,
depth: 0,
}
}), [list, searchValue])
const currentDataList = searchValue ? searchDataList : dataList
const currentPreviewPageId = previewPageId === undefined ? localPreviewPageId : previewPageId
const virtualizer = useVirtualizer({
count: currentDataList.length,
getScrollElement: () => parentRef.current,
estimateSize: () => 28,
overscan: 5,
getItemKey: index => currentDataList[index].page_id,
})
const handleToggle = useCallback((pageId: string) => {
setExpandedIds((prev) => {
const next = new Set(prev)
if (prev.has(pageId)) {
next.delete(pageId)
const descendants = listMapWithChildrenAndDescendants[pageId]?.descendants
if (descendants) {
for (const descendantId of descendants)
next.delete(descendantId)
}
}
else {
next.add(pageId)
}
return next
})
}, [listMapWithChildrenAndDescendants])
const handleCheck = useCallback((pageId: string) => {
const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[pageId]
const copyValue = new Set(value)
if (copyValue.has(pageId)) {
if (!searchValue) {
for (const item of currentWithChildrenAndDescendants.descendants)
copyValue.delete(item)
}
copyValue.delete(pageId)
}
else {
@@ -277,22 +307,17 @@ const PageSelector = ({
for (const item of currentWithChildrenAndDescendants.descendants)
copyValue.add(item)
}
copyValue.add(pageId)
}
onSelect(new Set(copyValue))
}
const handlePreview = (index: number) => {
const current = currentDataList[index]
const pageId = current.page_id
onSelect(copyValue)
}, [listMapWithChildrenAndDescendants, onSelect, searchValue, value])
const handlePreview = useCallback((pageId: string) => {
setLocalPreviewPageId(pageId)
if (onPreview)
onPreview(pageId)
}
}, [onPreview])
if (!currentDataList.length) {
return (
@@ -303,29 +328,41 @@ const PageSelector = ({
}
return (
<List
<div
ref={parentRef}
className="py-2"
height={296}
itemCount={currentDataList.length}
itemSize={28}
width="100%"
itemKey={(index, data) => data.dataList[index].page_id}
itemData={{
dataList: currentDataList,
handleToggle,
checkedIds: value,
disabledCheckedIds: disabledValue,
handleCheck,
canPreview,
handlePreview,
listMapWithChildrenAndDescendants,
searchValue,
previewPageId: currentPreviewPageId,
pagesMap,
}}
style={{ height: 296, width: '100%', overflow: 'auto' }}
>
{Item}
</List>
<div
style={{
height: virtualizer.getTotalSize(),
width: '100%',
position: 'relative',
}}
>
{virtualizer.getVirtualItems().map((virtualRow) => {
const current = currentDataList[virtualRow.index]
return (
<Item
key={virtualRow.key}
virtualStart={virtualRow.start}
virtualSize={virtualRow.size}
current={current}
onToggle={handleToggle}
checkedIds={value}
disabledCheckedIds={disabledValue}
onCheck={handleCheck}
canPreview={canPreview}
onPreview={handlePreview}
listMapWithChildrenAndDescendants={listMapWithChildrenAndDescendants}
searchValue={searchValue}
previewPageId={currentPreviewPageId}
pagesMap={pagesMap}
/>
)
})}
</div>
</div>
)
}

View File

@@ -11,21 +11,18 @@ import { recursivePushInParentDescendants } from './utils'
// Note: react-i18next uses global mock from web/vitest.setup.ts
// Mock react-window FixedSizeList - renders items directly for testing
vi.mock('react-window', () => ({
FixedSizeList: ({ children: ItemComponent, itemCount, itemData, itemKey }: any) => (
<div data-testid="virtual-list">
{Array.from({ length: itemCount }).map((_, index) => (
<ItemComponent
key={itemKey?.(index, itemData) || index}
index={index}
style={{ top: index * 28, left: 0, right: 0, width: '100%', position: 'absolute' }}
data={itemData}
/>
))}
</div>
),
areEqual: (prevProps: any, nextProps: any) => prevProps === nextProps,
// Mock @tanstack/react-virtual useVirtualizer hook - renders items directly for testing
vi.mock('@tanstack/react-virtual', () => ({
useVirtualizer: ({ count, getItemKey }: { count: number, getItemKey?: (index: number) => string }) => ({
getVirtualItems: () =>
Array.from({ length: count }).map((_, index) => ({
index,
key: getItemKey ? getItemKey(index) : index,
start: index * 28,
size: 28,
})),
getTotalSize: () => count * 28,
}),
}))
// Note: NotionIcon from @/app/components/base/ is NOT mocked - using real component per testing guidelines
@@ -119,7 +116,7 @@ describe('PageSelector', () => {
render(<PageSelector {...props} />)
// Assert
expect(screen.getByTestId('virtual-list')).toBeInTheDocument()
expect(screen.getByText('Test Page')).toBeInTheDocument()
})
it('should render empty state when list is empty', () => {
@@ -134,7 +131,7 @@ describe('PageSelector', () => {
// Assert
expect(screen.getByText('common.dataSource.notion.selector.noSearchResult')).toBeInTheDocument()
expect(screen.queryByTestId('virtual-list')).not.toBeInTheDocument()
expect(screen.queryByText('Test Page')).not.toBeInTheDocument()
})
it('should render items using FixedSizeList', () => {
@@ -1166,7 +1163,7 @@ describe('PageSelector', () => {
render(<PageSelector {...props} />)
// Assert
expect(screen.getByTestId('virtual-list')).toBeInTheDocument()
expect(screen.getByText('Test Page')).toBeInTheDocument()
})
it('should handle special characters in page name', () => {
@@ -1340,7 +1337,7 @@ describe('PageSelector', () => {
render(<PageSelector {...props} />)
// Assert
expect(screen.getByTestId('virtual-list')).toBeInTheDocument()
expect(screen.getByText('Test Page')).toBeInTheDocument()
if (propVariation.canPreview)
expect(screen.getByText('common.dataSource.notion.selector.preview')).toBeInTheDocument()
else

View File

@@ -1,7 +1,7 @@
import type { DataSourceNotionPage, DataSourceNotionPageMap } from '@/models/common'
import { useCallback, useEffect, useMemo, useState } from 'react'
import { useVirtualizer } from '@tanstack/react-virtual'
import { useCallback, useMemo, useRef, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { FixedSizeList as List } from 'react-window'
import Item from './item'
import { recursivePushInParentDescendants } from './utils'
@@ -45,29 +45,16 @@ const PageSelector = ({
currentCredentialId,
}: PageSelectorProps) => {
const { t } = useTranslation()
const [dataList, setDataList] = useState<NotionPageItem[]>([])
const parentRef = useRef<HTMLDivElement>(null)
const [expandedIds, setExpandedIds] = useState<Set<string>>(() => new Set())
const [currentPreviewPageId, setCurrentPreviewPageId] = useState('')
const prevCredentialIdRef = useRef(currentCredentialId)
useEffect(() => {
setDataList(list.filter(item => item.parent_id === 'root' || !pagesMap[item.parent_id]).map((item) => {
return {
...item,
expand: false,
depth: 0,
}
}))
}, [currentCredentialId])
const searchDataList = list.filter((item) => {
return item.page_name.includes(searchValue)
}).map((item) => {
return {
...item,
expand: false,
depth: 0,
}
})
const currentDataList = searchValue ? searchDataList : dataList
// Reset expanded state when credential changes (render-time detection)
if (prevCredentialIdRef.current !== currentCredentialId) {
prevCredentialIdRef.current = currentCredentialId
setExpandedIds(new Set())
}
const listMapWithChildrenAndDescendants = useMemo(() => {
return list.reduce((prev: NotionPageTreeMap, next: DataSourceNotionPage) => {
@@ -80,39 +67,86 @@ const PageSelector = ({
}, {})
}, [list, pagesMap])
const handleToggle = useCallback((index: number) => {
const current = dataList[index]
const pageId = current.page_id
const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[pageId]
const descendantsIds = Array.from(currentWithChildrenAndDescendants.descendants)
const childrenIds = Array.from(currentWithChildrenAndDescendants.children)
let newDataList = []
if (current.expand) {
current.expand = false
newDataList = dataList.filter(item => !descendantsIds.includes(item.page_id))
// Pre-build children index for O(1) lookup instead of O(n) filter
const childrenByParent = useMemo(() => {
const map = new Map<string | null, DataSourceNotionPage[]>()
for (const item of list) {
const isRoot = item.parent_id === 'root' || !pagesMap[item.parent_id]
const parentKey = isRoot ? null : item.parent_id
const children = map.get(parentKey) || []
children.push(item)
map.set(parentKey, children)
}
else {
current.expand = true
return map
}, [list, pagesMap])
newDataList = [
...dataList.slice(0, index + 1),
...childrenIds.map(item => ({
...pagesMap[item],
expand: false,
depth: listMapWithChildrenAndDescendants[item].depth,
})),
...dataList.slice(index + 1),
]
// Compute visible data list based on expanded state
const dataList = useMemo(() => {
const result: NotionPageItem[] = []
const buildVisibleList = (parentId: string | null, depth: number) => {
const items = childrenByParent.get(parentId) || []
for (const item of items) {
const isExpanded = expandedIds.has(item.page_id)
result.push({
...item,
expand: isExpanded,
depth,
})
if (isExpanded) {
buildVisibleList(item.page_id, depth + 1)
}
}
}
setDataList(newDataList)
}, [dataList, listMapWithChildrenAndDescendants, pagesMap])
const handleCheck = useCallback((index: number) => {
buildVisibleList(null, 0)
return result
}, [childrenByParent, expandedIds])
const searchDataList = useMemo(() => list.filter((item) => {
return item.page_name.includes(searchValue)
}).map((item) => {
return {
...item,
expand: false,
depth: 0,
}
}), [list, searchValue])
const currentDataList = searchValue ? searchDataList : dataList
const virtualizer = useVirtualizer({
count: currentDataList.length,
getScrollElement: () => parentRef.current,
estimateSize: () => 28,
overscan: 5,
getItemKey: index => currentDataList[index].page_id,
})
// Stable callback - no dependencies on dataList
const handleToggle = useCallback((pageId: string) => {
setExpandedIds((prev) => {
const next = new Set(prev)
if (prev.has(pageId)) {
// Collapse: remove current and all descendants
next.delete(pageId)
const descendants = listMapWithChildrenAndDescendants[pageId]?.descendants
if (descendants) {
for (const descendantId of descendants)
next.delete(descendantId)
}
}
else {
next.add(pageId)
}
return next
})
}, [listMapWithChildrenAndDescendants])
// Stable callback - uses pageId parameter instead of index
const handleCheck = useCallback((pageId: string) => {
const copyValue = new Set(checkedIds)
const current = currentDataList[index]
const pageId = current.page_id
const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[pageId]
if (copyValue.has(pageId)) {
@@ -120,7 +154,6 @@ const PageSelector = ({
for (const item of currentWithChildrenAndDescendants.descendants)
copyValue.delete(item)
}
copyValue.delete(pageId)
}
else {
@@ -138,18 +171,15 @@ const PageSelector = ({
}
}
onSelect(new Set(copyValue))
}, [currentDataList, isMultipleChoice, listMapWithChildrenAndDescendants, onSelect, searchValue, checkedIds])
const handlePreview = useCallback((index: number) => {
const current = currentDataList[index]
const pageId = current.page_id
onSelect(copyValue)
}, [checkedIds, isMultipleChoice, listMapWithChildrenAndDescendants, onSelect, searchValue])
// Stable callback
const handlePreview = useCallback((pageId: string) => {
setCurrentPreviewPageId(pageId)
if (onPreview)
onPreview(pageId)
}, [currentDataList, onPreview])
}, [onPreview])
if (!currentDataList.length) {
return (
@@ -160,30 +190,42 @@ const PageSelector = ({
}
return (
<List
<div
ref={parentRef}
className="py-2"
height={296}
itemCount={currentDataList.length}
itemSize={28}
width="100%"
itemKey={(index, data) => data.dataList[index].page_id}
itemData={{
dataList: currentDataList,
handleToggle,
checkedIds,
disabledCheckedIds: disabledValue,
handleCheck,
canPreview,
handlePreview,
listMapWithChildrenAndDescendants,
searchValue,
previewPageId: currentPreviewPageId,
pagesMap,
isMultipleChoice,
}}
style={{ height: 296, width: '100%', overflow: 'auto' }}
>
{Item}
</List>
<div
style={{
height: virtualizer.getTotalSize(),
width: '100%',
position: 'relative',
}}
>
{virtualizer.getVirtualItems().map((virtualRow) => {
const current = currentDataList[virtualRow.index]
return (
<Item
key={virtualRow.key}
virtualStart={virtualRow.start}
virtualSize={virtualRow.size}
current={current}
onToggle={handleToggle}
checkedIds={checkedIds}
disabledCheckedIds={disabledValue}
onCheck={handleCheck}
canPreview={canPreview}
onPreview={handlePreview}
listMapWithChildrenAndDescendants={listMapWithChildrenAndDescendants}
searchValue={searchValue}
previewPageId={currentPreviewPageId}
pagesMap={pagesMap}
isMultipleChoice={isMultipleChoice}
/>
)
})}
</div>
</div>
)
}

View File

@@ -1,9 +1,7 @@
import type { ListChildComponentProps } from 'react-window'
import type { DataSourceNotionPage, DataSourceNotionPageMap } from '@/models/common'
import { RiArrowDownSLine, RiArrowRightSLine } from '@remixicon/react'
import * as React from 'react'
import { memo } from 'react'
import { useTranslation } from 'react-i18next'
import { areEqual } from 'react-window'
import Checkbox from '@/app/components/base/checkbox'
import NotionIcon from '@/app/components/base/notion-icon'
import Radio from '@/app/components/base/radio/ui'
@@ -23,36 +21,40 @@ type NotionPageItem = {
depth: number
} & DataSourceNotionPage
const Item = ({ index, style, data }: ListChildComponentProps<{
dataList: NotionPageItem[]
handleToggle: (index: number) => void
type ItemProps = {
virtualStart: number
virtualSize: number
current: NotionPageItem
onToggle: (pageId: string) => void
checkedIds: Set<string>
disabledCheckedIds: Set<string>
handleCheck: (index: number) => void
onCheck: (pageId: string) => void
canPreview?: boolean
handlePreview: (index: number) => void
onPreview: (pageId: string) => void
listMapWithChildrenAndDescendants: NotionPageTreeMap
searchValue: string
previewPageId: string
pagesMap: DataSourceNotionPageMap
isMultipleChoice?: boolean
}>) => {
}
const Item = ({
virtualStart,
virtualSize,
current,
onToggle,
checkedIds,
disabledCheckedIds,
onCheck,
canPreview,
onPreview,
listMapWithChildrenAndDescendants,
searchValue,
previewPageId,
pagesMap,
isMultipleChoice,
}: ItemProps) => {
const { t } = useTranslation()
const {
dataList,
handleToggle,
checkedIds,
disabledCheckedIds,
handleCheck,
canPreview,
handlePreview,
listMapWithChildrenAndDescendants,
searchValue,
previewPageId,
pagesMap,
isMultipleChoice,
} = data
const current = dataList[index]
const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[current.page_id]
const hasChild = currentWithChildrenAndDescendants.descendants.size > 0
const ancestors = currentWithChildrenAndDescendants.ancestors
@@ -65,7 +67,7 @@ const Item = ({ index, style, data }: ListChildComponentProps<{
<div
className="mr-1 flex h-5 w-5 shrink-0 items-center justify-center rounded-md hover:bg-components-button-ghost-bg-hover"
style={{ marginLeft: current.depth * 8 }}
onClick={() => handleToggle(index)}
onClick={() => onToggle(current.page_id)}
>
{
current.expand
@@ -88,7 +90,15 @@ const Item = ({ index, style, data }: ListChildComponentProps<{
return (
<div
className={cn('group flex cursor-pointer items-center rounded-md pl-2 pr-[2px] hover:bg-state-base-hover', previewPageId === current.page_id && 'bg-state-base-hover')}
style={{ ...style, top: style.top as number + 8, left: 8, right: 8, width: 'calc(100% - 16px)' }}
style={{
position: 'absolute',
top: 0,
left: 8,
right: 8,
width: 'calc(100% - 16px)',
height: virtualSize,
transform: `translateY(${virtualStart + 8}px)`,
}}
>
{isMultipleChoice
? (
@@ -96,9 +106,7 @@ const Item = ({ index, style, data }: ListChildComponentProps<{
className="mr-2 shrink-0"
checked={checkedIds.has(current.page_id)}
disabled={disabled}
onCheck={() => {
handleCheck(index)
}}
onCheck={() => onCheck(current.page_id)}
/>
)
: (
@@ -106,9 +114,7 @@ const Item = ({ index, style, data }: ListChildComponentProps<{
className="mr-2 shrink-0"
isChecked={checkedIds.has(current.page_id)}
disabled={disabled}
onCheck={() => {
handleCheck(index)
}}
onCheck={() => onCheck(current.page_id)}
/>
)}
{!searchValue && renderArrow()}
@@ -129,7 +135,7 @@ const Item = ({ index, style, data }: ListChildComponentProps<{
className="ml-1 hidden h-6 shrink-0 cursor-pointer items-center rounded-md border-[0.5px] border-components-button-secondary-border bg-components-button-secondary-bg px-2 text-xs
font-medium leading-4 text-components-button-secondary-text shadow-xs shadow-shadow-shadow-3 backdrop-blur-[10px]
hover:border-components-button-secondary-border-hover hover:bg-components-button-secondary-bg-hover group-hover:flex"
onClick={() => handlePreview(index)}
onClick={() => onPreview(current.page_id)}
>
{t('dataSource.notion.selector.preview', { ns: 'common' })}
</div>
@@ -149,4 +155,4 @@ const Item = ({ index, style, data }: ListChildComponentProps<{
)
}
export default React.memo(Item, areEqual)
export default memo(Item)

View File

@@ -1399,11 +1399,6 @@
"count": 2
}
},
"app/components/base/notion-page-selector/page-selector/index.tsx": {
"react-hooks-extra/no-direct-set-state-in-use-effect": {
"count": 1
}
},
"app/components/base/pagination/index.tsx": {
"unicorn/prefer-number-properties": {
"count": 1
@@ -1848,12 +1843,7 @@
},
"app/components/datasets/documents/create-from-pipeline/data-source/online-documents/page-selector/index.spec.tsx": {
"ts/no-explicit-any": {
"count": 5
}
},
"app/components/datasets/documents/create-from-pipeline/data-source/online-documents/page-selector/index.tsx": {
"react-hooks-extra/no-direct-set-state-in-use-effect": {
"count": 1
"count": 2
}
},
"app/components/datasets/documents/create-from-pipeline/data-source/online-drive/connect/index.spec.tsx": {

View File

@@ -1,7 +1,7 @@
{
"name": "dify-web",
"type": "module",
"version": "1.11.4",
"version": "1.12.0",
"private": true,
"packageManager": "pnpm@10.27.0+sha512.72d699da16b1179c14ba9e64dc71c9a40988cbdc65c264cb0e489db7de917f20dcf4d64d8723625f2969ba52d4b7e2a1170682d9ac2a5dcaeaab732b7e16f04a",
"imports": {
@@ -84,6 +84,7 @@
"@tailwindcss/typography": "0.5.19",
"@tanstack/react-form": "1.23.7",
"@tanstack/react-query": "5.90.5",
"@tanstack/react-virtual": "3.13.18",
"abcjs": "6.5.2",
"ahooks": "3.9.5",
"class-variance-authority": "0.7.1",
@@ -142,7 +143,6 @@
"react-sortablejs": "6.1.4",
"react-syntax-highlighter": "15.6.6",
"react-textarea-autosize": "8.5.9",
"react-window": "1.8.11",
"reactflow": "11.11.4",
"rehype-katex": "7.0.1",
"rehype-raw": "7.0.0",
@@ -199,7 +199,6 @@
"@types/react-dom": "19.2.3",
"@types/react-slider": "1.3.6",
"@types/react-syntax-highlighter": "15.5.13",
"@types/react-window": "1.8.8",
"@types/semver": "7.7.1",
"@types/sortablejs": "1.15.8",
"@types/uuid": "10.0.0",

1905
web/pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff