Merge remote branch changes for main compatibility

fix: preserve MyScale text content on insert
Harden MyScale query parameterization
2026-02-24 18:05:11 +00:00 · 2026-02-16 23:45:09 +08:00 · 2026-02-16 23:44:31 +08:00 · 2026-02-16 23:44:31 +08:00 · 2026-02-16 23:02:14 +08:00 · 2026-02-16 23:02:14 +08:00
4 changed files with 114 additions and 134 deletions
--- a/api/.importlinter
+++ b/api/.importlinter
@@ -114,6 +114,7 @@ ignore_imports =
    core.workflow.nodes.datasource.datasource_node -> models.model
    core.workflow.nodes.datasource.datasource_node -> models.tools
    core.workflow.nodes.datasource.datasource_node -> services.datasource_provider_service
+    core.workflow.nodes.document_extractor.node -> configs
    core.workflow.nodes.document_extractor.node -> core.helper.ssrf_proxy
    core.workflow.nodes.http_request.entities -> configs
    core.workflow.nodes.http_request.executor -> configs
--- a/api/core/app/workflow/node_factory.py
+++ b/api/core/app/workflow/node_factory.py
@@ -16,7 +16,6 @@ from core.workflow.graph.graph import NodeFactory
 from core.workflow.nodes.base.node import Node
 from core.workflow.nodes.code.code_node import CodeNode
 from core.workflow.nodes.code.limits import CodeNodeLimits
-from core.workflow.nodes.document_extractor import DocumentExtractorNode, UnstructuredApiConfig
 from core.workflow.nodes.http_request.node import HttpRequestNode
 from core.workflow.nodes.knowledge_retrieval.knowledge_retrieval_node import KnowledgeRetrievalNode
 from core.workflow.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING
@@ -45,6 +44,7 @@ class DifyNodeFactory(NodeFactory):
        self,
        graph_init_params: "GraphInitParams",
        graph_runtime_state: "GraphRuntimeState",
+        *,
        code_executor: type[CodeExecutor] | None = None,
        code_providers: Sequence[type[CodeNodeProvider]] | None = None,
        code_limits: CodeNodeLimits | None = None,
@@ -53,7 +53,6 @@ class DifyNodeFactory(NodeFactory):
        http_request_http_client: HttpClientProtocol | None = None,
        http_request_tool_file_manager_factory: Callable[[], ToolFileManager] = ToolFileManager,
        http_request_file_manager: FileManagerProtocol | None = None,
-        document_extractor_unstructured_api_config: UnstructuredApiConfig | None = None,
    ) -> None:
        self.graph_init_params = graph_init_params
        self.graph_runtime_state = graph_runtime_state
@@ -79,13 +78,6 @@ class DifyNodeFactory(NodeFactory):
        self._http_request_tool_file_manager_factory = http_request_tool_file_manager_factory
        self._http_request_file_manager = http_request_file_manager or file_manager
        self._rag_retrieval = DatasetRetrieval()
-        self._document_extractor_unstructured_api_config = (
-            document_extractor_unstructured_api_config
-            or UnstructuredApiConfig(
-                api_url=dify_config.UNSTRUCTURED_API_URL,
-                api_key=dify_config.UNSTRUCTURED_API_KEY or "",
-            )
-        )

    @override
    def create_node(self, node_config: NodeConfigDict) -> Node:
@@ -160,15 +152,6 @@ class DifyNodeFactory(NodeFactory):
                rag_retrieval=self._rag_retrieval,
            )

-        if node_type == NodeType.DOCUMENT_EXTRACTOR:
-            return DocumentExtractorNode(
-                id=node_id,
-                config=node_config,
-                graph_init_params=self.graph_init_params,
-                graph_runtime_state=self.graph_runtime_state,
-                unstructured_api_config=self._document_extractor_unstructured_api_config,
-            )
-
        return node_class(
            id=node_id,
            config=node_config,
--- a/api/core/rag/datasource/vdb/myscale/myscale_vector.py
+++ b/api/core/rag/datasource/vdb/myscale/myscale_vector.py
@@ -33,6 +33,18 @@ class SortOrder(StrEnum):


 class MyScaleVector(BaseVector):
+    _METADATA_KEY_WHITELIST = {
+        "annotation_id",
+        "app_id",
+        "batch",
+        "dataset_id",
+        "doc_hash",
+        "doc_id",
+        "document_id",
+        "lang",
+        "source",
+    }
+
    def __init__(self, collection_name: str, config: MyScaleConfig, metric: str = "Cosine"):
        super().__init__(collection_name)
        self._config = config
@@ -45,10 +57,17 @@ class MyScaleVector(BaseVector):
            password=config.password,
        )
        self._client.command("SET allow_experimental_object_type=1")
+        self._qualified_table = f"{self._config.database}.{self._collection_name}"

    def get_type(self) -> str:
        return VectorType.MYSCALE

+    @classmethod
+    def _validate_metadata_key(cls, key: str) -> str:
+        if key not in cls._METADATA_KEY_WHITELIST:
+            raise ValueError(f"Unsupported metadata key: {key!r}")
+        return key
+
    def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
        dimension = len(embeddings[0])
        self._create_collection(dimension)
@@ -59,7 +78,7 @@ class MyScaleVector(BaseVector):
        self._client.command(f"CREATE DATABASE IF NOT EXISTS {self._config.database}")
        fts_params = f"('{self._config.fts_params}')" if self._config.fts_params else ""
        sql = f"""
-            CREATE TABLE IF NOT EXISTS {self._config.database}.{self._collection_name}(
+            CREATE TABLE IF NOT EXISTS {self._qualified_table}(
                id String,
                text String,
                vector Array(Float32),
@@ -74,73 +93,98 @@ class MyScaleVector(BaseVector):
    def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
        ids = []
        columns = ["id", "text", "vector", "metadata"]
-        values = []
+        rows = []
        for i, doc in enumerate(documents):
            if doc.metadata is not None:
                doc_id = doc.metadata.get("doc_id", str(uuid.uuid4()))
-                row = (
-                    doc_id,
-                    self.escape_str(doc.page_content),
-                    embeddings[i],
-                    json.dumps(doc.metadata) if doc.metadata else {},
+                rows.append(
+                    (
+                        doc_id,
+                        doc.page_content,
+                        embeddings[i],
+                        json.dumps(doc.metadata or {}),
+                    )
                )
-                values.append(str(row))
                ids.append(doc_id)
-        sql = f"""
-            INSERT INTO {self._config.database}.{self._collection_name}
-            ({",".join(columns)}) VALUES {",".join(values)}
-        """
-        self._client.command(sql)
+        if rows:
+            self._client.insert(self._qualified_table, rows, column_names=columns)
        return ids

-    @staticmethod
-    def escape_str(value: Any) -> str:
-        return "".join(" " if c in {"\\", "'"} else c for c in str(value))
-
    def text_exists(self, id: str) -> bool:
-        results = self._client.query(f"SELECT id FROM {self._config.database}.{self._collection_name} WHERE id='{id}'")
+        results = self._client.query(
+            f"SELECT id FROM {self._qualified_table} WHERE id = %(id)s LIMIT 1",
+            parameters={"id": id},
+        )
        return results.row_count > 0

    def delete_by_ids(self, ids: list[str]):
        if not ids:
            return
+        placeholders, params = self._build_in_params("id", ids)
        self._client.command(
-            f"DELETE FROM {self._config.database}.{self._collection_name} WHERE id IN {str(tuple(ids))}"
+            f"DELETE FROM {self._qualified_table} WHERE id IN ({placeholders})",
+            parameters=params,
        )

    def get_ids_by_metadata_field(self, key: str, value: str):
+        safe_key = self._validate_metadata_key(key)
        rows = self._client.query(
-            f"SELECT DISTINCT id FROM {self._config.database}.{self._collection_name} WHERE metadata.{key}='{value}'"
+            f"SELECT DISTINCT id FROM {self._qualified_table} WHERE metadata.{safe_key} = %(value)s",
+            parameters={"value": value},
        ).result_rows
        return [row[0] for row in rows]

    def delete_by_metadata_field(self, key: str, value: str):
+        safe_key = self._validate_metadata_key(key)
        self._client.command(
-            f"DELETE FROM {self._config.database}.{self._collection_name} WHERE metadata.{key}='{value}'"
+            f"DELETE FROM {self._qualified_table} WHERE metadata.{safe_key} = %(value)s",
+            parameters={"value": value},
        )

    def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
        return self._search(f"distance(vector, {str(query_vector)})", self._vec_order, **kwargs)

    def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
-        return self._search(f"TextSearch('enable_nlq=false')(text, '{query}')", SortOrder.DESC, **kwargs)
+        return self._search(
+            "TextSearch('enable_nlq=false')(text, %(query)s)",
+            SortOrder.DESC,
+            parameters={"query": query},
+            **kwargs,
+        )

-    def _search(self, dist: str, order: SortOrder, **kwargs: Any) -> list[Document]:
+    @staticmethod
+    def _build_in_params(prefix: str, values: list[str]) -> tuple[str, dict[str, str]]:
+        params: dict[str, str] = {}
+        placeholders = []
+        for i, value in enumerate(values):
+            name = f"{prefix}_{i}"
+            placeholders.append(f"%({name})s")
+            params[name] = value
+        return ", ".join(placeholders), params
+
+    def _search(
+        self,
+        dist: str,
+        order: SortOrder,
+        parameters: dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> list[Document]:
        top_k = kwargs.get("top_k", 4)
        if not isinstance(top_k, int) or top_k <= 0:
            raise ValueError("top_k must be a positive integer")
        score_threshold = float(kwargs.get("score_threshold") or 0.0)
-        where_str = (
-            f"WHERE dist < {1 - score_threshold}"
-            if self._metric.upper() == "COSINE" and order == SortOrder.ASC and score_threshold > 0.0
-            else ""
-        )
+        where_clauses = []
+        if self._metric.upper() == "COSINE" and order == SortOrder.ASC and score_threshold > 0.0:
+            where_clauses.append(f"dist < {1 - score_threshold}")
        document_ids_filter = kwargs.get("document_ids_filter")
+        query_params = dict(parameters or {})
        if document_ids_filter:
-            document_ids = ", ".join(f"'{id}'" for id in document_ids_filter)
-            where_str = f"{where_str} AND metadata['document_id'] in ({document_ids})"
+            placeholders, params = self._build_in_params("document_id", document_ids_filter)
+            where_clauses.append(f"metadata['document_id'] IN ({placeholders})")
+            query_params.update(params)
+        where_str = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
        sql = f"""
-            SELECT text, vector, metadata, {dist} as dist FROM {self._config.database}.{self._collection_name}
+            SELECT text, vector, metadata, {dist} as dist FROM {self._qualified_table}
            {where_str} ORDER BY dist {order.value} LIMIT {top_k}
        """
        try:
@@ -150,14 +194,14 @@ class MyScaleVector(BaseVector):
                    vector=r["vector"],
                    metadata=r["metadata"],
                )
-                for r in self._client.query(sql).named_results()
+                for r in self._client.query(sql, parameters=query_params).named_results()
            ]
        except Exception:
            logger.exception("Vector search operation failed")
            return []

    def delete(self):
-        self._client.command(f"DROP TABLE IF EXISTS {self._config.database}.{self._collection_name}")
+        self._client.command(f"DROP TABLE IF EXISTS {self._qualified_table}")


 class MyScaleVectorFactory(AbstractVectorFactory):
--- a/api/core/workflow/nodes/document_extractor/node.py
+++ b/api/core/workflow/nodes/document_extractor/node.py
@@ -5,7 +5,7 @@ import logging
 import os
 import tempfile
 from collections.abc import Mapping, Sequence
-from typing import TYPE_CHECKING, Any
+from typing import Any

 import charset_normalizer
 import docx
@@ -20,6 +20,7 @@ from docx.oxml.text.paragraph import CT_P
 from docx.table import Table
 from docx.text.paragraph import Paragraph

+from configs import dify_config
 from core.helper import ssrf_proxy
 from core.variables import ArrayFileSegment
 from core.variables.segments import ArrayStringSegment, FileSegment
@@ -28,15 +29,11 @@ from core.workflow.file import File, FileTransferMethod, file_manager
 from core.workflow.node_events import NodeRunResult
 from core.workflow.nodes.base.node import Node

-from .entities import DocumentExtractorNodeData, UnstructuredApiConfig
+from .entities import DocumentExtractorNodeData
 from .exc import DocumentExtractorError, FileDownloadError, TextExtractionError, UnsupportedFileTypeError

 logger = logging.getLogger(__name__)

-if TYPE_CHECKING:
-    from core.workflow.entities import GraphInitParams
-    from core.workflow.runtime import GraphRuntimeState
-

 class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
    """
@@ -50,23 +47,6 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
    def version(cls) -> str:
        return "1"

-    def __init__(
-        self,
-        id: str,
-        config: Mapping[str, Any],
-        graph_init_params: "GraphInitParams",
-        graph_runtime_state: "GraphRuntimeState",
-        *,
-        unstructured_api_config: UnstructuredApiConfig | None = None,
-    ) -> None:
-        super().__init__(
-            id=id,
-            config=config,
-            graph_init_params=graph_init_params,
-            graph_runtime_state=graph_runtime_state,
-        )
-        self._unstructured_api_config = unstructured_api_config or UnstructuredApiConfig()
-
    def _run(self):
        variable_selector = self.node_data.variable_selector
        variable = self.graph_runtime_state.variable_pool.get(variable_selector)
@@ -84,10 +64,7 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):

        try:
            if isinstance(value, list):
-                extracted_text_list = [
-                    _extract_text_from_file(file, unstructured_api_config=self._unstructured_api_config)
-                    for file in value
-                ]
+                extracted_text_list = list(map(_extract_text_from_file, value))
                return NodeRunResult(
                    status=WorkflowNodeExecutionStatus.SUCCEEDED,
                    inputs=inputs,
@@ -95,7 +72,7 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
                    outputs={"text": ArrayStringSegment(value=extracted_text_list)},
                )
            elif isinstance(value, File):
-                extracted_text = _extract_text_from_file(value, unstructured_api_config=self._unstructured_api_config)
+                extracted_text = _extract_text_from_file(value)
                return NodeRunResult(
                    status=WorkflowNodeExecutionStatus.SUCCEEDED,
                    inputs=inputs,
@@ -126,12 +103,7 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
        return {node_id + ".files": typed_node_data.variable_selector}


-def _extract_text_by_mime_type(
-    *,
-    file_content: bytes,
-    mime_type: str,
-    unstructured_api_config: UnstructuredApiConfig,
-) -> str:
+def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str:
    """Extract text from a file based on its MIME type."""
    match mime_type:
        case "text/plain" | "text/html" | "text/htm" | "text/markdown" | "text/xml":
@@ -139,7 +111,7 @@ def _extract_text_by_mime_type(
        case "application/pdf":
            return _extract_text_from_pdf(file_content)
        case "application/msword":
-            return _extract_text_from_doc(file_content, unstructured_api_config=unstructured_api_config)
+            return _extract_text_from_doc(file_content)
        case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            return _extract_text_from_docx(file_content)
        case "text/csv":
@@ -147,11 +119,11 @@ def _extract_text_by_mime_type(
        case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | "application/vnd.ms-excel":
            return _extract_text_from_excel(file_content)
        case "application/vnd.ms-powerpoint":
-            return _extract_text_from_ppt(file_content, unstructured_api_config=unstructured_api_config)
+            return _extract_text_from_ppt(file_content)
        case "application/vnd.openxmlformats-officedocument.presentationml.presentation":
-            return _extract_text_from_pptx(file_content, unstructured_api_config=unstructured_api_config)
+            return _extract_text_from_pptx(file_content)
        case "application/epub+zip":
-            return _extract_text_from_epub(file_content, unstructured_api_config=unstructured_api_config)
+            return _extract_text_from_epub(file_content)
        case "message/rfc822":
            return _extract_text_from_eml(file_content)
        case "application/vnd.ms-outlook":
@@ -168,12 +140,7 @@ def _extract_text_by_mime_type(
            raise UnsupportedFileTypeError(f"Unsupported MIME type: {mime_type}")


-def _extract_text_by_file_extension(
-    *,
-    file_content: bytes,
-    file_extension: str,
-    unstructured_api_config: UnstructuredApiConfig,
-) -> str:
+def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str) -> str:
    """Extract text from a file based on its file extension."""
    match file_extension:
        case (
@@ -236,7 +203,7 @@ def _extract_text_by_file_extension(
        case ".pdf":
            return _extract_text_from_pdf(file_content)
        case ".doc":
-            return _extract_text_from_doc(file_content, unstructured_api_config=unstructured_api_config)
+            return _extract_text_from_doc(file_content)
        case ".docx":
            return _extract_text_from_docx(file_content)
        case ".csv":
@@ -244,11 +211,11 @@ def _extract_text_by_file_extension(
        case ".xls" | ".xlsx":
            return _extract_text_from_excel(file_content)
        case ".ppt":
-            return _extract_text_from_ppt(file_content, unstructured_api_config=unstructured_api_config)
+            return _extract_text_from_ppt(file_content)
        case ".pptx":
-            return _extract_text_from_pptx(file_content, unstructured_api_config=unstructured_api_config)
+            return _extract_text_from_pptx(file_content)
        case ".epub":
-            return _extract_text_from_epub(file_content, unstructured_api_config=unstructured_api_config)
+            return _extract_text_from_epub(file_content)
        case ".eml":
            return _extract_text_from_eml(file_content)
        case ".msg":
@@ -345,15 +312,14 @@ def _extract_text_from_pdf(file_content: bytes) -> str:
        raise TextExtractionError(f"Failed to extract text from PDF: {str(e)}") from e


-def _extract_text_from_doc(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str:
+def _extract_text_from_doc(file_content: bytes) -> str:
    """
    Extract text from a DOC file.
    """
    from unstructured.partition.api import partition_via_api

-    if not unstructured_api_config.api_url:
-        raise TextExtractionError("Unstructured API URL is not configured for DOC file processing.")
-    api_key = unstructured_api_config.api_key or ""
+    if not dify_config.UNSTRUCTURED_API_URL:
+        raise TextExtractionError("UNSTRUCTURED_API_URL must be set")

    try:
        with tempfile.NamedTemporaryFile(suffix=".doc", delete=False) as temp_file:
@@ -363,8 +329,8 @@ def _extract_text_from_doc(file_content: bytes, *, unstructured_api_config: Unst
                elements = partition_via_api(
                    file=file,
                    metadata_filename=temp_file.name,
-                    api_url=unstructured_api_config.api_url,
-                    api_key=api_key,
+                    api_url=dify_config.UNSTRUCTURED_API_URL,
+                    api_key=dify_config.UNSTRUCTURED_API_KEY,  # type: ignore
                )
            os.unlink(temp_file.name)
        return "\n".join([getattr(element, "text", "") for element in elements])
@@ -454,20 +420,12 @@ def _download_file_content(file: File) -> bytes:
        raise FileDownloadError(f"Error downloading file: {str(e)}") from e


-def _extract_text_from_file(file: File, *, unstructured_api_config: UnstructuredApiConfig) -> str:
+def _extract_text_from_file(file: File):
    file_content = _download_file_content(file)
    if file.extension:
-        extracted_text = _extract_text_by_file_extension(
-            file_content=file_content,
-            file_extension=file.extension,
-            unstructured_api_config=unstructured_api_config,
-        )
+        extracted_text = _extract_text_by_file_extension(file_content=file_content, file_extension=file.extension)
    elif file.mime_type:
-        extracted_text = _extract_text_by_mime_type(
-            file_content=file_content,
-            mime_type=file.mime_type,
-            unstructured_api_config=unstructured_api_config,
-        )
+        extracted_text = _extract_text_by_mime_type(file_content=file_content, mime_type=file.mime_type)
    else:
        raise UnsupportedFileTypeError("Unable to determine file type: MIME type or file extension is missing")
    return extracted_text
@@ -559,14 +517,12 @@ def _extract_text_from_excel(file_content: bytes) -> str:
        raise TextExtractionError(f"Failed to extract text from Excel file: {str(e)}") from e


-def _extract_text_from_ppt(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str:
+def _extract_text_from_ppt(file_content: bytes) -> str:
    from unstructured.partition.api import partition_via_api
    from unstructured.partition.ppt import partition_ppt

-    api_key = unstructured_api_config.api_key or ""
-
    try:
-        if unstructured_api_config.api_url:
+        if dify_config.UNSTRUCTURED_API_URL:
            with tempfile.NamedTemporaryFile(suffix=".ppt", delete=False) as temp_file:
                temp_file.write(file_content)
                temp_file.flush()
@@ -574,8 +530,8 @@ def _extract_text_from_ppt(file_content: bytes, *, unstructured_api_config: Unst
                    elements = partition_via_api(
                        file=file,
                        metadata_filename=temp_file.name,
-                        api_url=unstructured_api_config.api_url,
-                        api_key=api_key,
+                        api_url=dify_config.UNSTRUCTURED_API_URL,
+                        api_key=dify_config.UNSTRUCTURED_API_KEY,  # type: ignore
                    )
                os.unlink(temp_file.name)
        else:
@@ -587,14 +543,12 @@ def _extract_text_from_ppt(file_content: bytes, *, unstructured_api_config: Unst
        raise TextExtractionError(f"Failed to extract text from PPTX: {str(e)}") from e


-def _extract_text_from_pptx(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str:
+def _extract_text_from_pptx(file_content: bytes) -> str:
    from unstructured.partition.api import partition_via_api
    from unstructured.partition.pptx import partition_pptx

-    api_key = unstructured_api_config.api_key or ""
-
    try:
-        if unstructured_api_config.api_url:
+        if dify_config.UNSTRUCTURED_API_URL:
            with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as temp_file:
                temp_file.write(file_content)
                temp_file.flush()
@@ -602,8 +556,8 @@ def _extract_text_from_pptx(file_content: bytes, *, unstructured_api_config: Uns
                    elements = partition_via_api(
                        file=file,
                        metadata_filename=temp_file.name,
-                        api_url=unstructured_api_config.api_url,
-                        api_key=api_key,
+                        api_url=dify_config.UNSTRUCTURED_API_URL,
+                        api_key=dify_config.UNSTRUCTURED_API_KEY,  # type: ignore
                    )
                os.unlink(temp_file.name)
        else:
@@ -614,14 +568,12 @@ def _extract_text_from_pptx(file_content: bytes, *, unstructured_api_config: Uns
        raise TextExtractionError(f"Failed to extract text from PPTX: {str(e)}") from e


-def _extract_text_from_epub(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str:
+def _extract_text_from_epub(file_content: bytes) -> str:
    from unstructured.partition.api import partition_via_api
    from unstructured.partition.epub import partition_epub

-    api_key = unstructured_api_config.api_key or ""
-
    try:
-        if unstructured_api_config.api_url:
+        if dify_config.UNSTRUCTURED_API_URL:
            with tempfile.NamedTemporaryFile(suffix=".epub", delete=False) as temp_file:
                temp_file.write(file_content)
                temp_file.flush()
@@ -629,8 +581,8 @@ def _extract_text_from_epub(file_content: bytes, *, unstructured_api_config: Uns
                    elements = partition_via_api(
                        file=file,
                        metadata_filename=temp_file.name,
-                        api_url=unstructured_api_config.api_url,
-                        api_key=api_key,
+                        api_url=dify_config.UNSTRUCTURED_API_URL,
+                        api_key=dify_config.UNSTRUCTURED_API_KEY,  # type: ignore
                    )
                os.unlink(temp_file.name)
        else:
Author	SHA1	Message	Date
-LAN-	93d40d50d3	Merge remote branch changes for main compatibility	2026-02-16 23:45:09 +08:00
-LAN-	1c756e0073	fix: preserve MyScale text content on insert	2026-02-16 23:44:31 +08:00
-LAN-	e1681b1a16	Harden MyScale query parameterization	2026-02-16 23:44:31 +08:00
99	a63e0dc2d9	refactor(workflow-file): move `core.file` to `core.workflow.file` (#32252 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:02:14 +08:00
Haohao	a811983daa	fix(i18n): fix critical errors and overhaul Persian (fa-IR) translations in workflow.json (#32342 )	2026-02-16 23:02:14 +08:00
dependabot[bot]	23c3319594	chore(deps-dev): bump types-greenlet from 3.1.0.20250401 to 3.3.0.20251206 in /api (#32349 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-02-16 23:02:14 +08:00
Asuka Minato	186ba8a0b4	ci: update dependabot config (#32346 )	2026-02-16 23:02:13 +08:00
yyh	94bda9fda1	refactor(web): centralize role-based route guards and fix anti-patterns (#32302 )	2026-02-16 23:02:13 +08:00
yyh	68a2168db6	fix: remove explore context and migrate query to orpc contract (#32320 ) Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>	2026-02-16 23:02:13 +08:00
L1nSn0w	da6bf01c08	fix(api): make DB migration Redis lock TTL configurable and prevent LockNotOwnedError from masking failures (#32299 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:02:13 +08:00
Xiyuan Chen	4c702ce923	fix(app-copy): inherit web app permission from original app (#32323 )	2026-02-16 23:02:13 +08:00
Poojan	dbe37dbd71	test: add unit tests for base chat components (#32249 )	2026-02-16 23:02:12 +08:00
dependabot[bot]	55dd4a0f89	chore(deps): bump sqlparse from 0.5.3 to 0.5.4 in /api (#32315 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-02-16 23:02:12 +08:00
Coding On Star	96b280cf9b	refactor(web): extract custom hooks from complex components and add comprehensive tests (#32301 ) Co-authored-by: CodingOnStar <hanxujiang@dify.com>	2026-02-16 23:02:12 +08:00
Saumya Talwani	ba75c37c16	test: add tests for some base components (#32265 )	2026-02-16 23:02:12 +08:00
Coding On Star	8cecca58ff	test: add integration tests for app card operations, list browsing, and create app flows (#32298 ) Co-authored-by: CodingOnStar <hanxujiang@dify.com>	2026-02-16 23:02:12 +08:00
Poojan	b2e4072664	test: add unit tests for base components-part-1 (#32154 )	2026-02-16 23:02:11 +08:00
dependabot[bot]	313ef28a8d	chore(deps): bump qs from 6.14.1 to 6.14.2 in /web (#32290 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-02-16 23:02:11 +08:00
Coding On Star	3cb7194433	test(web): add and enhance frontend automated tests across multiple modules (#32268 ) Co-authored-by: CodingOnStar <hanxujiang@dify.com>	2026-02-16 23:02:11 +08:00
Conner Mo	abbff8d05f	feat(api): optimize OceanBase vector store performance and configurability (#32263 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:02:11 +08:00
Bowen Liang	9d8cff1571	feat: support config max size of plugin generated files (#30887 ) Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>	2026-02-16 23:02:11 +08:00
dependabot[bot]	7f401d3c69	chore(deps): bump pillow from 12.0.0 to 12.1.1 in /api (#32250 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-02-16 23:02:10 +08:00
Stephen Zhou	255abccefd	fix: can not upload file in single run (#32276 )	2026-02-16 23:02:10 +08:00
Varun Chawla	875f3de415	fix: metadata batch edit silently fails due to split transactions and swallowed exceptions (#32041 )	2026-02-16 23:02:10 +08:00
Coding On Star	1c4102c1af	feat(tests): add integration tests for explore app list, installed apps, and sidebar lifecycle flows (#32248 ) Co-authored-by: CodingOnStar <hanxujiang@dify.com>	2026-02-16 23:02:10 +08:00
Coding On Star	331ce867d9	test: add comprehensive unit and integration tests for RAG Pipeline components (#32237 ) Co-authored-by: CodingOnStar <hanxujiang@dify.com>	2026-02-16 23:02:09 +08:00
Coding On Star	e4c9196465	test: add unit and integration tests for share, develop, and goto-anything modules (#32246 ) Co-authored-by: CodingOnStar <hanxujiang@dify.com>	2026-02-16 23:02:09 +08:00
Coding On Star	4044d8a8db	test: add comprehensive unit and integration tests for billing components (#32227 ) Co-authored-by: CodingOnStar <hanxujiang@dify.com>	2026-02-16 23:02:09 +08:00
Coding On Star	979592e183	test(web): add comprehensive unit and integration tests for plugins and tools modules (#32220 ) Co-authored-by: CodingOnStar <hanxujiang@dify.com>	2026-02-16 23:02:09 +08:00
Coding On Star	7ca6219559	test: add comprehensive unit and integration tests for dataset module (#32187 ) Co-authored-by: CodingOnStar <hanxujiang@dify.com> Co-authored-by: Cursor <cursoragent@cursor.com>	2026-02-16 23:02:09 +08:00
Saumya Talwani	a7211c6338	test: add unit tests for some base components (#32201 )	2026-02-16 23:02:08 +08:00
Runzhe	45032116e0	feat(api): add scheduled cleanup task for specific workflow logs (#31843 ) Co-authored-by: 章润喆 <zhangrunzhe@zhangrunzhedeMacBook-Air.local> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: hjlarry <hjlarry@163.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: hj24 <mambahj24@gmail.com>	2026-02-16 23:02:08 +08:00
QuantumGhost	9c8fa5a295	chore: bump version to 1.13.0 (#32147 )	2026-02-16 23:02:08 +08:00
NFish	489b1fb87d	fix: hide invite button if current user is not workspace manager (#31744 )	2026-02-16 23:02:08 +08:00
hj24	88bdef9c04	fix: update index to optimize message clean performance (#32238 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:02:07 +08:00
wangxiaolei	7f959c09c0	fix: fix use fastopenapi lead user is anonymouse (#32236 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:02:07 +08:00
Wu Tianwei	1fd1d6f503	feat(tests): add mock for useInvalidateWorkflowRunHistory in pipeline run tests (#32234 )	2026-02-16 23:02:07 +08:00
veganmosfet	1e1e446ff6	Merge commit from fork Removed the dangerous `new function` call during echarts parsing and replaced with an error message. Co-authored-by: Byron Wang <byron@linux.com>	2026-02-16 23:02:07 +08:00
dependabot[bot]	d8b5243e6a	chore(deps): bump google-api-python-client from 2.90.0 to 2.189.0 in /api (#32102 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-02-16 23:02:07 +08:00
dependabot[bot]	2ab9fa90fd	chore(deps): bump cryptography from 46.0.3 to 46.0.5 in /api (#32218 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-02-16 23:02:06 +08:00
Wu Tianwei	b82f82bfcb	feat(workflow): enhance workflow run history management and UI updates (#32230 )	2026-02-16 23:02:06 +08:00
Byron.wang	8e05cd4c2e	docs(api): mark SetupApi as unauthenticated by design (#32224 )	2026-02-16 23:02:06 +08:00
wangxiaolei	e557acc1d3	fix: fix get_message_event_type return wrong message type (#32019 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:02:06 +08:00
wangxiaolei	4da6175307	feat: support nl-NL language (#32216 )	2026-02-16 23:02:06 +08:00
fenglin	db24e36d9f	fix: add unique constraint to tenant_default_models to prevent duplic… (#31221 ) Co-authored-by: qiaofenglin <qiaofenglin@baidu.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Novice <novice12185727@gmail.com>	2026-02-16 23:02:05 +08:00
wangxiaolei	f823b1df3b	fix: fix all tools is deleted (#32207 )	2026-02-16 23:02:05 +08:00
QuantumGhost	35f0d9e857	fix(api): excessive high CPU usage caused by RedisClientWrapper (#32212 ) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>	2026-02-16 23:02:05 +08:00
QuantumGhost	4f94fa81c3	chore(api): consume tasks in `workflow_based_app_execution` queue in start-worker script (#32214 )	2026-02-16 23:02:05 +08:00
Wu Tianwei	5627ca685a	fix: Fix the display of state icon of base node (#32208 )	2026-02-16 23:02:05 +08:00
dependabot[bot]	b753f37a89	chore(deps): bump axios from 1.13.2 to 1.13.5 in /sdks/nodejs-client (#32199 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-02-16 23:02:05 +08:00
非法操作	85018d557c	chore: allow draft run single node without connect to other node (#31977 )	2026-02-16 23:02:04 +08:00
weiguang li	16d0096491	fix(console): keep conversation updated_at unchanged when marking read (#32133 )	2026-02-16 23:02:04 +08:00
Ponder	4f7ee0d66e	feat: enhancement celery configuration (#32145 )	2026-02-16 23:02:04 +08:00
Stephen Zhou	c810b0f472	refactor: type safe env, update to zod v4 (#32035 )	2026-02-16 23:02:04 +08:00
wangxiaolei	077ee51753	fix: fix no dify home directory lead permission error (#32169 )	2026-02-16 23:02:04 +08:00
Coding On Star	83c1da0d09	refactor: extract sub-components and custom hooks from UpdateDSLModal and Metadata components (#32045 ) Co-authored-by: CodingOnStar <hanxujiang@dify.com> Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com>	2026-02-16 23:02:03 +08:00
Novice	a9927f24ca	chore(api): update launch.json.example to include new workflow_based_app_execution. (#32184 )	2026-02-16 23:02:03 +08:00
Wu Tianwei	50b3b0111f	test: add comprehensive tests for Human Input Node functionality (#32191 )	2026-02-16 23:02:03 +08:00
Varun Chawla	062b101e84	fix: replace sendBeacon with fetch keepalive for autosave on page close (#32088 ) Signed-off-by: Varun Chawla <varun_6april@hotmail.com>	2026-02-16 23:02:03 +08:00
weiguang li	3afe9f15d7	fix(web): fill workflow tool output descriptions from schema (#32117 )	2026-02-16 23:02:03 +08:00
weiguang li	a9cf7490c4	fix(api): include file marker for workflow tool file outputs (#32114 )	2026-02-16 23:02:02 +08:00
Stephen Zhou	9a41d147be	refactor: import component css in globals.css (#32180 ) Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>	2026-02-16 23:02:02 +08:00
zyssyz123	cf0760234c	fix: When the user is a non-sandbox user and has a paid balance, the … (#32173 ) Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:02:02 +08:00
Dream	70ecf8d1bd	fix(api): clean up orphaned pending accounts on member removal (#32151 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:02:02 +08:00
Shuvam Pandey	a70b31cd07	refactor(api): tighten OTel decorator typing (#32163 )	2026-02-16 23:02:02 +08:00
Stephen Zhou	ad8d253720	chore: introduce css icons (#32004 )	2026-02-16 23:02:01 +08:00
Stephen Zhou	3459c07974	chore: detect utilities in css (#32143 )	2026-02-16 23:02:01 +08:00
Vlad D	5fc4030477	fix(api): serialize pipeline file-upload created_at (#32098 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:02:01 +08:00
Vlad D	d37657440a	fix(api): register knowledge pipeline service API routes (#32097 ) Co-authored-by: Crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: FFXN <31929997+FFXN@users.noreply.github.com>	2026-02-16 23:02:01 +08:00
wangxiaolei	2616debd1e	refactor: document_indexing_sync_task split db session (#32129 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:02:01 +08:00
Stephen Zhou	fdfe90c8c0	chore: fix type for useTranslation in `#i18n` (#32134 ) Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>	2026-02-16 23:02:00 +08:00
Yessenia-d	c8a3b92e47	style: update banner item styles and enhance dark/light theme variables (#32111 ) Co-authored-by: Crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>	2026-02-16 23:02:00 +08:00
QuantumGhost	ecbe760d2f	chore(api): update launch.json.template (#32124 )	2026-02-16 23:02:00 +08:00
QuantumGhost	c786aee9e9	feat: Human Input Node (#32060 ) The frontend and backend implementation for the human input node. Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: JzoNg <jzongcode@gmail.com> Co-authored-by: yyh <92089059+lyzno1@users.noreply.github.com> Co-authored-by: zhsama <torvalds@linux.do>	2026-02-16 23:02:00 +08:00
wangxiaolei	5cc4ecfbbc	fix: fix trigger output schema miss (#32116 )	2026-02-16 23:01:59 +08:00
GuanMu	8f4f8da714	fix: pass user timezone from app context to the date picker component. (#31831 ) Co-authored-by: yyh <92089059+lyzno1@users.noreply.github.com>	2026-02-16 23:01:59 +08:00
盐粒 Yanli	819214ba76	feat: Service API - add end-user lookup endpoint (#32015 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:01:59 +08:00
wangxiaolei	9d9ab89f80	refactor: decouple database operations from knowledge retrieval nodes (#31981 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:01:59 +08:00
Stephen Zhou	e8cf6d6e1f	test: stable test (#32108 )	2026-02-16 23:01:59 +08:00
wangxiaolei	f201f57cd2	refactor: document_indexing_update_task split database session (#32105 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:01:58 +08:00
zxhlyh	823cbc304b	fix: search model provider list (#32106 )	2026-02-16 23:01:58 +08:00
wangxiaolei	77eb424dd7	feat: extract mcp tool usage (#31802 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:01:58 +08:00
wangxiaolei	522aced46b	refactor: partition Celery task sessions into smaller, discrete execu… (#32085 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:01:58 +08:00
kongwenyan	46c22330e8	refactor: remove unnecessary type: async_workflow_service.py (#32081 )	2026-02-16 23:01:58 +08:00
Crazywoola	a9d7f54b1d	fix: remove unexpected scrollbar in KB Retrieval settings (#32082 )	2026-02-16 23:01:57 +08:00
Jyong	e7a7506099	fix: batch delete document db session block (#32062 )	2026-02-16 23:01:57 +08:00
zyssyz123	1d8531161d	fix: redis for api token (#31861 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: hj24 <mambahj24@gmail.com>	2026-02-16 23:01:57 +08:00
QuantumGhost	23b42a22a9	chore: update deploy branches for deploy-hitl.yaml (#32051 )	2026-02-16 23:01:57 +08:00
Stephen Zhou	1c8feece1b	test: only remove text coverage in CI (#32043 )	2026-02-16 23:01:57 +08:00
NeatGuyCoding	e2d784f726	fix(api): return proper HTTP 204 status code in DELETE endpoints (#32012 ) Signed-off-by: NeatGuyCoding <15627489+NeatGuyCoding@users.noreply.github.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:01:56 +08:00
QuantumGhost	65681d8351	perf: use batch delete method instead of single delete (#32036 ) Co-authored-by: fatelei <fatelei@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: FFXN <lizy@dify.ai>	2026-02-16 23:01:56 +08:00
wangxiaolei	f207c20514	fix: fix tool type is miss (#32042 )	2026-02-16 23:01:56 +08:00
QuantumGhost	ae40313fab	chore: update HITL auto deploy workflow (#32040 )	2026-02-16 23:01:56 +08:00
longbingljw	6e83348be9	fix: make `flask upgrade-db` fail on error (#32024 )	2026-02-16 23:01:56 +08:00
wangxiaolei	a1224cd023	fix: fix agent node tool type is not right (#32008 ) Infer real tool type via querying relevant database tables. The root cause for incorrect `type` field is still not clear.	2026-02-16 23:01:55 +08:00
Ryan	25552dbd38	chore: remove .codex/skills directory (#32022 ) Co-authored-by: Longwei Liu <longweiliu@LongweideMacBook-Air.local>	2026-02-16 23:01:55 +08:00
99	fc559e5449	refactor: strip external imports in workflow template transform (#32017 )	2026-02-16 23:01:55 +08:00
99	3af042066f	chore: Remove redundant double space in variable type description (core/variables/variables.py) (#32002 )	2026-02-16 23:01:55 +08:00
QuantumGhost	12ce014205	perf(api): Optimize the response time of AppListApi endpoint (#31999 )	2026-02-16 23:01:55 +08:00
Stream	659c3bd0c4	feat: use static manifest for pre-caching all plugin manifests before checking updates (#31942 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Junyan Qin <rockchinq@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>	2026-02-16 23:01:54 +08:00
Stephen Zhou	a9b76d957f	chore: migrate to eslint-better-tailwind (#31969 )	2026-02-16 23:01:54 +08:00
wangxiaolei	c95adf2534	fix: fix uuid_generate_v4 only used in postgresql (#31304 )	2026-02-16 23:01:54 +08:00
lif	8741717a97	fix(web): add rewrite rule to fix Serwist precaching 404 errors (#31770 ) Signed-off-by: majiayu000 <1835304752@qq.com> Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com>	2026-02-16 23:01:54 +08:00
Joel	4ef51f545e	chore: sticky the applist header in explore page (#31967 )	2026-02-16 23:01:54 +08:00
wangxiaolei	78fee247bd	fix: fix miss use db.session (#31971 )	2026-02-16 23:01:53 +08:00
Stephen Zhou	f07110ff66	test: fix test (#31975 )	2026-02-16 23:01:53 +08:00
Xiyuan Chen	7feca6a5a5	feat: account delete cleanup (#31519 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:01:53 +08:00
yyh	50545d82e4	fix: remove staleTime/gcTime overrides from trigger query hooks and use orpc contract (#31863 )	2026-02-16 23:01:53 +08:00
wangxiaolei	37d80fafb8	feat: use latest hash to sync draft (#31924 )	2026-02-16 23:01:53 +08:00
Coding On Star	81c64e3d05	chore: update CODEOWNERS to specify test file patterns for base components (#31941 ) Co-authored-by: CodingOnStar <hanxujiang@dify.com>	2026-02-16 23:01:52 +08:00
QuantumGhost	4f9891b0fa	chore: bump version in docker-compose and package manager to 1.12.1 (#31947 )	2026-02-16 23:01:52 +08:00
wangxiaolei	f59029d8ed	fix: fix delete_draft_variables_batch cycle forever (#31934 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-02-16 23:01:52 +08:00
Coding On Star	de76c38f7a	refactor(web): extract complex components into modular structure with comprehensive tests (#31729 ) Co-authored-by: CodingOnStar <hanxujiang@dify.com> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>	2026-02-16 23:01:52 +08:00
-LAN-	a7d035fa4d	chore: assign code owners for test directories (#31940 )	2026-02-16 23:01:52 +08:00
Coding On Star	2781cd8d79	fix: include locale in appList query key for localization support inuseExploreAppList (#31921 ) Co-authored-by: CodingOnStar <hanxujiang@dify.com>	2026-02-16 23:01:51 +08:00
Coding On Star	49ab0605cd	refactor(datasets): extract hooks and components with comprehensive tests (#31707 ) Co-authored-by: CodingOnStar <hanxujiang@dify.com> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>	2026-02-16 23:01:50 +08:00
-LAN-	fd80ed1c99	fix: preserve MyScale text content on insert	2026-02-10 16:39:00 +08:00
-LAN-	beb3ce172d	Harden MyScale query parameterization	2026-02-04 19:06:15 +08:00