From cf9216f3adf6d97f1e2467b77708f90233625b56 Mon Sep 17 00:00:00 2001 From: -LAN- Date: Mon, 16 Feb 2026 19:33:30 +0800 Subject: [PATCH] fix(workflow): fix document extractor typing and factory construction --- api/core/app/workflow/node_factory.py | 5 ++--- api/core/workflow/nodes/document_extractor/node.py | 12 ++++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/api/core/app/workflow/node_factory.py b/api/core/app/workflow/node_factory.py index efb6a51ea9..df3fe35432 100644 --- a/api/core/app/workflow/node_factory.py +++ b/api/core/app/workflow/node_factory.py @@ -1,5 +1,5 @@ from collections.abc import Callable, Sequence -from typing import TYPE_CHECKING, Any, cast, final +from typing import TYPE_CHECKING, Any, final from typing_extensions import override @@ -160,8 +160,7 @@ class DifyNodeFactory(NodeFactory): ) if node_type == NodeType.DOCUMENT_EXTRACTOR: - document_extractor_class = cast(type[DocumentExtractorNode], node_class) - return document_extractor_class( + return DocumentExtractorNode( **common_kwargs, unstructured_api_config=self._document_extractor_unstructured_api_config, ) diff --git a/api/core/workflow/nodes/document_extractor/node.py b/api/core/workflow/nodes/document_extractor/node.py index 957d3ded04..63250b16a8 100644 --- a/api/core/workflow/nodes/document_extractor/node.py +++ b/api/core/workflow/nodes/document_extractor/node.py @@ -353,6 +353,7 @@ def _extract_text_from_doc(file_content: bytes, *, unstructured_api_config: Unst if not unstructured_api_config.api_url: raise TextExtractionError("Unstructured API URL is not configured for DOC file processing.") + api_key = unstructured_api_config.api_key or "" try: with tempfile.NamedTemporaryFile(suffix=".doc", delete=False) as temp_file: @@ -363,7 +364,7 @@ def _extract_text_from_doc(file_content: bytes, *, unstructured_api_config: Unst file=file, metadata_filename=temp_file.name, api_url=unstructured_api_config.api_url, - api_key=unstructured_api_config.api_key, + api_key=api_key, ) os.unlink(temp_file.name) return "\n".join([getattr(element, "text", "") for element in elements]) @@ -561,6 +562,7 @@ def _extract_text_from_excel(file_content: bytes) -> str: def _extract_text_from_ppt(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str: from unstructured.partition.api import partition_via_api from unstructured.partition.ppt import partition_ppt + api_key = unstructured_api_config.api_key or "" try: if unstructured_api_config.api_url: @@ -572,7 +574,7 @@ def _extract_text_from_ppt(file_content: bytes, *, unstructured_api_config: Unst file=file, metadata_filename=temp_file.name, api_url=unstructured_api_config.api_url, - api_key=unstructured_api_config.api_key, + api_key=api_key, ) os.unlink(temp_file.name) else: @@ -587,6 +589,7 @@ def _extract_text_from_ppt(file_content: bytes, *, unstructured_api_config: Unst def _extract_text_from_pptx(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str: from unstructured.partition.api import partition_via_api from unstructured.partition.pptx import partition_pptx + api_key = unstructured_api_config.api_key or "" try: if unstructured_api_config.api_url: @@ -598,7 +601,7 @@ def _extract_text_from_pptx(file_content: bytes, *, unstructured_api_config: Uns file=file, metadata_filename=temp_file.name, api_url=unstructured_api_config.api_url, - api_key=unstructured_api_config.api_key, + api_key=api_key, ) os.unlink(temp_file.name) else: @@ -612,6 +615,7 @@ def _extract_text_from_pptx(file_content: bytes, *, unstructured_api_config: Uns def _extract_text_from_epub(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str: from unstructured.partition.api import partition_via_api from unstructured.partition.epub import partition_epub + api_key = unstructured_api_config.api_key or "" try: if unstructured_api_config.api_url: @@ -623,7 +627,7 @@ def _extract_text_from_epub(file_content: bytes, *, unstructured_api_config: Uns file=file, metadata_filename=temp_file.name, api_url=unstructured_api_config.api_url, - api_key=unstructured_api_config.api_key, + api_key=api_key, ) os.unlink(temp_file.name) else: