fix: parameterize myscale query vector and add regression test

fix: preserve MyScale text content on insert
Harden MyScale query parameterization
2026-03-01 21:15:10 +00:00 · 2026-03-01 19:56:13 +08:00 · 2026-03-01 19:56:13 +08:00 · 2026-03-01 19:56:13 +08:00 · 2026-03-01 19:43:05 +08:00 · 2026-03-01 19:37:51 +08:00
29 changed files with 286 additions and 756 deletions
--- a/5
+++ b/5
@@ -68,10 +68,9 @@ lint:
 	@echo "✅ Linting complete"

 type-check:
-	@echo "📝 Running type checks (basedpyright + mypy + ty)..."
+	@echo "📝 Running type checks (basedpyright + mypy)..."
 	@./dev/basedpyright-check $(PATH_TO_CHECK)
 	@uv --directory api run mypy --exclude-gitignore --exclude 'tests/' --exclude 'migrations/' --check-untyped-defs --disable-error-code=import-untyped .
-	@cd api && uv run ty check
 	@echo "✅ Type checks complete"

 test:
@@ -132,7 +131,7 @@ help:
 	@echo "  make format         - Format code with ruff"
 	@echo "  make check          - Check code with ruff"
 	@echo "  make lint           - Format, fix, and lint code (ruff, imports, dotenv)"
-	@echo "  make type-check     - Run type checks (basedpyright, mypy, ty)"
+	@echo "  make type-check     - Run type checks (basedpyright, mypy)"
 	@echo "  make test           - Run backend unit tests (or TARGET_TESTS=./api/tests/<target_tests>)"
 	@echo ""
 	@echo "Docker Build Targets:"
--- a/api/constants/pipeline_templates.json
+++ b/api/constants/pipeline_templates.json
--- a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_openapi.py
+++ b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_openapi.py
@@ -192,8 +192,8 @@ class AnalyticdbVectorOpenAPI:
            collection=self._collection_name,
            metrics=self.config.metrics,
            include_values=True,
-            vector=None,  # ty: ignore [invalid-argument-type]
-            content=None,  # ty: ignore [invalid-argument-type]
+            vector=None,
+            content=None,
            top_k=1,
            filter=f"ref_doc_id='{id}'",
        )
@@ -211,7 +211,7 @@ class AnalyticdbVectorOpenAPI:
            namespace=self.config.namespace,
            namespace_password=self.config.namespace_password,
            collection=self._collection_name,
-            collection_data=None,  # ty: ignore [invalid-argument-type]
+            collection_data=None,
            collection_data_filter=f"ref_doc_id IN {ids_str}",
        )
        self._client.delete_collection_data(request)
@@ -225,7 +225,7 @@ class AnalyticdbVectorOpenAPI:
            namespace=self.config.namespace,
            namespace_password=self.config.namespace_password,
            collection=self._collection_name,
-            collection_data=None,  # ty: ignore [invalid-argument-type]
+            collection_data=None,
            collection_data_filter=f"metadata_ ->> '{key}' = '{value}'",
        )
        self._client.delete_collection_data(request)
@@ -249,7 +249,7 @@ class AnalyticdbVectorOpenAPI:
            include_values=kwargs.pop("include_values", True),
            metrics=self.config.metrics,
            vector=query_vector,
-            content=None,  # ty: ignore [invalid-argument-type]
+            content=None,
            top_k=kwargs.get("top_k", 4),
            filter=where_clause,
        )
@@ -285,7 +285,7 @@ class AnalyticdbVectorOpenAPI:
            collection=self._collection_name,
            include_values=kwargs.pop("include_values", True),
            metrics=self.config.metrics,
-            vector=None,  # ty: ignore [invalid-argument-type]
+            vector=None,
            content=query,
            top_k=kwargs.get("top_k", 4),
            filter=where_clause,
--- a/api/core/rag/datasource/vdb/couchbase/couchbase_vector.py
+++ b/api/core/rag/datasource/vdb/couchbase/couchbase_vector.py
@@ -306,7 +306,7 @@ class CouchbaseVector(BaseVector):
    def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
        top_k = kwargs.get("top_k", 4)
        try:
-            CBrequest = search.SearchRequest.create(search.QueryStringQuery("text:" + query))  # ty: ignore [too-many-positional-arguments]
+            CBrequest = search.SearchRequest.create(search.QueryStringQuery("text:" + query))
            search_iter = self._scope.search(
                self._collection_name + "_search", CBrequest, SearchOptions(limit=top_k, fields=["*"])
            )
--- a/api/core/rag/datasource/vdb/myscale/myscale_vector.py
+++ b/api/core/rag/datasource/vdb/myscale/myscale_vector.py
@@ -33,6 +33,18 @@ class SortOrder(StrEnum):


 class MyScaleVector(BaseVector):
+    _METADATA_KEY_WHITELIST = {
+        "annotation_id",
+        "app_id",
+        "batch",
+        "dataset_id",
+        "doc_hash",
+        "doc_id",
+        "document_id",
+        "lang",
+        "source",
+    }
+
    def __init__(self, collection_name: str, config: MyScaleConfig, metric: str = "Cosine"):
        super().__init__(collection_name)
        self._config = config
@@ -45,10 +57,17 @@ class MyScaleVector(BaseVector):
            password=config.password,
        )
        self._client.command("SET allow_experimental_object_type=1")
+        self._qualified_table = f"{self._config.database}.{self._collection_name}"

    def get_type(self) -> str:
        return VectorType.MYSCALE

+    @classmethod
+    def _validate_metadata_key(cls, key: str) -> str:
+        if key not in cls._METADATA_KEY_WHITELIST:
+            raise ValueError(f"Unsupported metadata key: {key!r}")
+        return key
+
    def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
        dimension = len(embeddings[0])
        self._create_collection(dimension)
@@ -59,7 +78,7 @@ class MyScaleVector(BaseVector):
        self._client.command(f"CREATE DATABASE IF NOT EXISTS {self._config.database}")
        fts_params = f"('{self._config.fts_params}')" if self._config.fts_params else ""
        sql = f"""
-            CREATE TABLE IF NOT EXISTS {self._config.database}.{self._collection_name}(
+            CREATE TABLE IF NOT EXISTS {self._qualified_table}(
                id String,
                text String,
                vector Array(Float32),
@@ -74,73 +93,103 @@ class MyScaleVector(BaseVector):
    def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
        ids = []
        columns = ["id", "text", "vector", "metadata"]
-        values = []
+        rows = []
        for i, doc in enumerate(documents):
            if doc.metadata is not None:
                doc_id = doc.metadata.get("doc_id", str(uuid.uuid4()))
-                row = (
-                    doc_id,
-                    self.escape_str(doc.page_content),
-                    embeddings[i],
-                    json.dumps(doc.metadata) if doc.metadata else {},
+                rows.append(
+                    (
+                        doc_id,
+                        doc.page_content,
+                        embeddings[i],
+                        json.dumps(doc.metadata or {}),
+                    )
                )
-                values.append(str(row))
                ids.append(doc_id)
-        sql = f"""
-            INSERT INTO {self._config.database}.{self._collection_name}
-            ({",".join(columns)}) VALUES {",".join(values)}
-        """
-        self._client.command(sql)
+        if rows:
+            self._client.insert(self._qualified_table, rows, column_names=columns)
        return ids

-    @staticmethod
-    def escape_str(value: Any) -> str:
-        return "".join(" " if c in {"\\", "'"} else c for c in str(value))
-
    def text_exists(self, id: str) -> bool:
-        results = self._client.query(f"SELECT id FROM {self._config.database}.{self._collection_name} WHERE id='{id}'")
+        results = self._client.query(
+            f"SELECT id FROM {self._qualified_table} WHERE id = %(id)s LIMIT 1",
+            parameters={"id": id},
+        )
        return results.row_count > 0

    def delete_by_ids(self, ids: list[str]):
        if not ids:
            return
+        placeholders, params = self._build_in_params("id", ids)
        self._client.command(
-            f"DELETE FROM {self._config.database}.{self._collection_name} WHERE id IN {str(tuple(ids))}"
+            f"DELETE FROM {self._qualified_table} WHERE id IN ({placeholders})",
+            parameters=params,
        )

    def get_ids_by_metadata_field(self, key: str, value: str):
+        safe_key = self._validate_metadata_key(key)
        rows = self._client.query(
-            f"SELECT DISTINCT id FROM {self._config.database}.{self._collection_name} WHERE metadata.{key}='{value}'"
+            f"SELECT DISTINCT id FROM {self._qualified_table} WHERE metadata.{safe_key} = %(value)s",
+            parameters={"value": value},
        ).result_rows
        return [row[0] for row in rows]

    def delete_by_metadata_field(self, key: str, value: str):
+        safe_key = self._validate_metadata_key(key)
        self._client.command(
-            f"DELETE FROM {self._config.database}.{self._collection_name} WHERE metadata.{key}='{value}'"
+            f"DELETE FROM {self._qualified_table} WHERE metadata.{safe_key} = %(value)s",
+            parameters={"value": value},
        )

    def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
-        return self._search(f"distance(vector, {str(query_vector)})", self._vec_order, **kwargs)
+        return self._search(
+            "distance(vector, %(query_vector)s)",
+            self._vec_order,
+            parameters={"query_vector": query_vector},
+            **kwargs,
+        )

    def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
-        return self._search(f"TextSearch('enable_nlq=false')(text, '{query}')", SortOrder.DESC, **kwargs)
+        return self._search(
+            "TextSearch('enable_nlq=false')(text, %(query)s)",
+            SortOrder.DESC,
+            parameters={"query": query},
+            **kwargs,
+        )

-    def _search(self, dist: str, order: SortOrder, **kwargs: Any) -> list[Document]:
+    @staticmethod
+    def _build_in_params(prefix: str, values: list[str]) -> tuple[str, dict[str, str]]:
+        params: dict[str, str] = {}
+        placeholders = []
+        for i, value in enumerate(values):
+            name = f"{prefix}_{i}"
+            placeholders.append(f"%({name})s")
+            params[name] = value
+        return ", ".join(placeholders), params
+
+    def _search(
+        self,
+        dist: str,
+        order: SortOrder,
+        parameters: dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> list[Document]:
        top_k = kwargs.get("top_k", 4)
        if not isinstance(top_k, int) or top_k <= 0:
            raise ValueError("top_k must be a positive integer")
        score_threshold = float(kwargs.get("score_threshold") or 0.0)
-        where_str = (
-            f"WHERE dist < {1 - score_threshold}"
-            if self._metric.upper() == "COSINE" and order == SortOrder.ASC and score_threshold > 0.0
-            else ""
-        )
+        where_clauses = []
+        if self._metric.upper() == "COSINE" and order == SortOrder.ASC and score_threshold > 0.0:
+            where_clauses.append(f"dist < {1 - score_threshold}")
        document_ids_filter = kwargs.get("document_ids_filter")
+        query_params = dict(parameters or {})
        if document_ids_filter:
-            document_ids = ", ".join(f"'{id}'" for id in document_ids_filter)
-            where_str = f"{where_str} AND metadata['document_id'] in ({document_ids})"
+            placeholders, params = self._build_in_params("document_id", document_ids_filter)
+            where_clauses.append(f"metadata['document_id'] IN ({placeholders})")
+            query_params.update(params)
+        where_str = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
        sql = f"""
-            SELECT text, vector, metadata, {dist} as dist FROM {self._config.database}.{self._collection_name}
+            SELECT text, vector, metadata, {dist} as dist FROM {self._qualified_table}
            {where_str} ORDER BY dist {order.value} LIMIT {top_k}
        """
        try:
@@ -150,14 +199,14 @@ class MyScaleVector(BaseVector):
                    vector=r["vector"],
                    metadata=r["metadata"],
                )
-                for r in self._client.query(sql).named_results()
+                for r in self._client.query(sql, parameters=query_params).named_results()
            ]
        except Exception:
            logger.exception("Vector search operation failed")
            return []

    def delete(self):
-        self._client.command(f"DROP TABLE IF EXISTS {self._config.database}.{self._collection_name}")
+        self._client.command(f"DROP TABLE IF EXISTS {self._qualified_table}")


 class MyScaleVectorFactory(AbstractVectorFactory):
--- a/api/core/workflow/graph_engine/graph_engine.py
+++ b/api/core/workflow/graph_engine/graph_engine.py
@@ -9,7 +9,6 @@ from __future__ import annotations

 import logging
 import queue
-import threading
 from collections.abc import Generator
 from typing import TYPE_CHECKING, cast, final

@@ -77,13 +76,10 @@ class GraphEngine:
        config: GraphEngineConfig = _DEFAULT_CONFIG,
    ) -> None:
        """Initialize the graph engine with all subsystems and dependencies."""
-        # stop event
-        self._stop_event = threading.Event()

        # Bind runtime state to current workflow context
        self._graph = graph
        self._graph_runtime_state = graph_runtime_state
-        self._graph_runtime_state.stop_event = self._stop_event
        self._graph_runtime_state.configure(graph=cast("GraphProtocol", graph))
        self._command_channel = command_channel
        self._config = config
@@ -163,7 +159,6 @@ class GraphEngine:
            layers=self._layers,
            execution_context=execution_context,
            config=self._config,
-            stop_event=self._stop_event,
        )

        # === Orchestration ===
@@ -194,7 +189,6 @@ class GraphEngine:
            event_handler=self._event_handler_registry,
            execution_coordinator=self._execution_coordinator,
            event_emitter=self._event_manager,
-            stop_event=self._stop_event,
        )

        # === Validation ===
@@ -314,7 +308,6 @@ class GraphEngine:

    def _start_execution(self, *, resume: bool = False) -> None:
        """Start execution subsystems."""
-        self._stop_event.clear()
        paused_nodes: list[str] = []
        deferred_nodes: list[str] = []
        if resume:
@@ -348,7 +341,6 @@ class GraphEngine:

    def _stop_execution(self) -> None:
        """Stop execution subsystems."""
-        self._stop_event.set()
        self._dispatcher.stop()
        self._worker_pool.stop()
        # Don't mark complete here as the dispatcher already does it
--- a/api/core/workflow/graph_engine/orchestration/dispatcher.py
+++ b/api/core/workflow/graph_engine/orchestration/dispatcher.py
@@ -44,7 +44,6 @@ class Dispatcher:
        event_queue: queue.Queue[GraphNodeEventBase],
        event_handler: "EventHandler",
        execution_coordinator: ExecutionCoordinator,
-        stop_event: threading.Event,
        event_emitter: EventManager | None = None,
    ) -> None:
        """
@@ -62,7 +61,7 @@ class Dispatcher:
        self._event_emitter = event_emitter

        self._thread: threading.Thread | None = None
-        self._stop_event = stop_event
+        self._stop_event = threading.Event()
        self._start_time: float | None = None

    def start(self) -> None:
@@ -70,12 +69,14 @@ class Dispatcher:
        if self._thread and self._thread.is_alive():
            return

+        self._stop_event.clear()
        self._start_time = time.time()
        self._thread = threading.Thread(target=self._dispatcher_loop, name="GraphDispatcher", daemon=True)
        self._thread.start()

    def stop(self) -> None:
        """Stop the dispatcher thread."""
+        self._stop_event.set()
        if self._thread and self._thread.is_alive():
            self._thread.join(timeout=2.0)

--- a/api/core/workflow/graph_engine/worker.py
+++ b/api/core/workflow/graph_engine/worker.py
@@ -42,7 +42,6 @@ class Worker(threading.Thread):
        event_queue: queue.Queue[GraphNodeEventBase],
        graph: Graph,
        layers: Sequence[GraphEngineLayer],
-        stop_event: threading.Event,
        worker_id: int = 0,
        execution_context: IExecutionContext | None = None,
    ) -> None:
@@ -63,16 +62,13 @@ class Worker(threading.Thread):
        self._graph = graph
        self._worker_id = worker_id
        self._execution_context = execution_context
-        self._stop_event = stop_event
+        self._stop_event = threading.Event()
        self._layers = layers if layers is not None else []
        self._last_task_time = time.time()

    def stop(self) -> None:
-        """Worker is controlled via shared stop_event from GraphEngine.
-
-        This method is a no-op retained for backward compatibility.
-        """
-        pass
+        """Signal the worker to stop processing."""
+        self._stop_event.set()

    @property
    def is_idle(self) -> bool:
--- a/api/core/workflow/graph_engine/worker_management/worker_pool.py
+++ b/api/core/workflow/graph_engine/worker_management/worker_pool.py
@@ -37,7 +37,6 @@ class WorkerPool:
        event_queue: queue.Queue[GraphNodeEventBase],
        graph: Graph,
        layers: list[GraphEngineLayer],
-        stop_event: threading.Event,
        config: GraphEngineConfig,
        execution_context: IExecutionContext | None = None,
    ) -> None:
@@ -64,7 +63,6 @@ class WorkerPool:
        self._worker_counter = 0
        self._lock = threading.RLock()
        self._running = False
-        self._stop_event = stop_event

        # No longer tracking worker states with callbacks to avoid lock contention

@@ -135,7 +133,6 @@ class WorkerPool:
            layers=self._layers,
            worker_id=worker_id,
            execution_context=self._execution_context,
-            stop_event=self._stop_event,
        )

        worker.start()
--- a/api/core/workflow/nodes/base/node.py
+++ b/api/core/workflow/nodes/base/node.py
@@ -302,10 +302,6 @@ class Node(Generic[NodeDataT]):
        """
        raise NotImplementedError

-    def _should_stop(self) -> bool:
-        """Check if execution should be stopped."""
-        return self.graph_runtime_state.stop_event.is_set()
-
    def run(self) -> Generator[GraphNodeEventBase, None, None]:
        execution_id = self.ensure_execution_id()
        self._start_at = naive_utc_now()
@@ -374,21 +370,6 @@ class Node(Generic[NodeDataT]):
                    yield event
                else:
                    yield event
-
-                if self._should_stop():
-                    error_message = "Execution cancelled"
-                    yield NodeRunFailedEvent(
-                        id=self.execution_id,
-                        node_id=self._node_id,
-                        node_type=self.node_type,
-                        start_at=self._start_at,
-                        node_run_result=NodeRunResult(
-                            status=WorkflowNodeExecutionStatus.FAILED,
-                            error=error_message,
-                        ),
-                        error=error_message,
-                    )
-                    return
        except Exception as e:
            logger.exception("Node %s failed to run", self._node_id)
            result = NodeRunResult(
--- a/api/core/workflow/runtime/graph_runtime_state.py
+++ b/api/core/workflow/runtime/graph_runtime_state.py
@@ -2,7 +2,6 @@ from __future__ import annotations

 import importlib
 import json
-import threading
 from collections.abc import Mapping, Sequence
 from copy import deepcopy
 from dataclasses import dataclass
@@ -219,8 +218,6 @@ class GraphRuntimeState:
        self._pending_graph_node_states: dict[str, NodeState] | None = None
        self._pending_graph_edge_states: dict[str, NodeState] | None = None

-        self.stop_event: threading.Event = threading.Event()
-
        if graph is not None:
            self.attach_graph(graph)

--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -116,7 +116,6 @@ dev = [
    "dotenv-linter~=0.5.0",
    "faker~=38.2.0",
    "lxml-stubs~=0.5.1",
-    "ty>=0.0.14",
    "basedpyright~=1.31.0",
    "ruff~=0.14.0",
    "pytest~=8.3.2",
--- a/api/services/conversation_service.py
+++ b/api/services/conversation_service.py
@@ -180,6 +180,14 @@ class ConversationService:

    @classmethod
    def delete(cls, app_model: App, conversation_id: str, user: Union[Account, EndUser] | None):
+        """
+        Delete a conversation only if it belongs to the given user and app context.
+
+        Raises:
+            ConversationNotExistsError: When the conversation is not visible to the current user.
+        """
+        conversation = cls.get_conversation(app_model, conversation_id, user)
+
        try:
            logger.info(
                "Initiating conversation deletion for app_name %s, conversation_id: %s",
@@ -187,10 +195,10 @@ class ConversationService:
                conversation_id,
            )

-            db.session.query(Conversation).where(Conversation.id == conversation_id).delete(synchronize_session=False)
+            db.session.delete(conversation)
            db.session.commit()

-            delete_conversation_related_data.delay(conversation_id)
+            delete_conversation_related_data.delay(conversation.id)

        except Exception as e:
            db.session.rollback()
--- a/api/tasks/document_indexing_sync_task.py
+++ b/api/tasks/document_indexing_sync_task.py
@@ -1,3 +1,4 @@
+import json
 import logging
 import time

@@ -125,7 +126,7 @@ def document_indexing_sync_task(dataset_id: str, document_id: str):

        data_source_info = document.data_source_info_dict
        data_source_info["last_edited_time"] = last_edited_time
-        document.data_source_info = data_source_info
+        document.data_source_info = json.dumps(data_source_info)

        document.indexing_status = "parsing"
        document.processing_started_at = naive_utc_now()
--- a/api/tests/test_containers_integration_tests/services/test_conversation_service.py
+++ b/api/tests/test_containers_integration_tests/services/test_conversation_service.py
@@ -1034,3 +1034,34 @@ class TestConversationServiceExport:
        # Step 2: Async cleanup task triggered
        # The Celery task will handle cleanup of messages, annotations, etc.
        mock_delete_task.delay.assert_called_once_with(conversation_id)
+
+    @patch("services.conversation_service.delete_conversation_related_data")
+    def test_delete_conversation_not_owned_by_account(self, mock_delete_task, db_session_with_containers):
+        """
+        Test deletion is denied when conversation belongs to a different account.
+        """
+        # Arrange
+        app_model, owner_account = ConversationServiceIntegrationTestDataFactory.create_app_and_account(
+            db_session_with_containers
+        )
+        _, other_account = ConversationServiceIntegrationTestDataFactory.create_app_and_account(
+            db_session_with_containers
+        )
+        conversation = ConversationServiceIntegrationTestDataFactory.create_conversation(
+            db_session_with_containers,
+            app_model,
+            owner_account,
+        )
+
+        # Act & Assert
+        with pytest.raises(ConversationNotExistsError):
+            ConversationService.delete(
+                app_model=app_model,
+                conversation_id=conversation.id,
+                user=other_account,
+            )
+
+        # Verify no deletion and no async cleanup trigger
+        not_deleted = db_session_with_containers.scalar(select(Conversation).where(Conversation.id == conversation.id))
+        assert not_deleted is not None
+        mock_delete_task.delay.assert_not_called()
--- a/api/tests/test_containers_integration_tests/tasks/test_document_indexing_sync_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_document_indexing_sync_task.py
@@ -12,8 +12,6 @@ from unittest.mock import Mock, patch
 from uuid import uuid4

 import pytest
-from psycopg2.extensions import register_adapter
-from psycopg2.extras import Json

 from core.indexing_runner import DocumentIsPausedError, IndexingRunner
 from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
@@ -21,12 +19,6 @@ from models.dataset import Dataset, Document, DocumentSegment
 from tasks.document_indexing_sync_task import document_indexing_sync_task


-@pytest.fixture(autouse=True)
-def _register_dict_adapter_for_psycopg2():
-    """Align test DB adapter behavior with dict payloads used in task update flow."""
-    register_adapter(dict, Json)
-
-
 class DocumentIndexingSyncTaskTestDataFactory:
    """Create real DB entities for document indexing sync integration tests."""

--- a/api/tests/unit_tests/core/rag/datasource/vdb/myscale/test_myscale_vector.py
+++ b/api/tests/unit_tests/core/rag/datasource/vdb/myscale/test_myscale_vector.py
@@ -0,0 +1,32 @@
+from unittest.mock import MagicMock, patch
+
+from core.rag.datasource.vdb.myscale.myscale_vector import MyScaleConfig, MyScaleVector
+
+
+@patch("core.rag.datasource.vdb.myscale.myscale_vector.get_client")
+def test_search_by_vector_uses_parameterized_query(mock_get_client):
+    mock_client = MagicMock()
+    mock_get_client.return_value = mock_client
+
+    vector = MyScaleVector(
+        collection_name="test_collection",
+        config=MyScaleConfig(
+            host="localhost",
+            port=8123,
+            user="default",
+            password="",
+            database="dify",
+            fts_params="",
+        ),
+    )
+    vector._search = MagicMock(return_value=[])
+
+    query_vector = [0.1, 0.2, 0.3]
+    vector.search_by_vector(query_vector, top_k=5)
+
+    vector._search.assert_called_once_with(
+        "distance(vector, %(query_vector)s)",
+        vector._vec_order,
+        parameters={"query_vector": query_vector},
+        top_k=5,
+    )
--- a/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py
+++ b/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py
@@ -3,7 +3,6 @@
 from __future__ import annotations

 import queue
-import threading
 from unittest import mock

 from core.workflow.entities.pause_reason import SchedulingPause
@@ -37,7 +36,6 @@ def test_dispatcher_should_consume_remains_events_after_pause():
        event_queue=event_queue,
        event_handler=event_handler,
        execution_coordinator=execution_coordinator,
-        stop_event=threading.Event(),
    )
    dispatcher._dispatcher_loop()
    assert event_queue.empty()
@@ -98,7 +96,6 @@ def _run_dispatcher_for_event(event) -> int:
        event_queue=event_queue,
        event_handler=event_handler,
        execution_coordinator=coordinator,
-        stop_event=threading.Event(),
    )

    dispatcher._dispatcher_loop()
@@ -184,7 +181,6 @@ def test_dispatcher_drain_event_queue():
        event_queue=event_queue,
        event_handler=event_handler,
        execution_coordinator=coordinator,
-        stop_event=threading.Event(),
    )

    dispatcher._dispatcher_loop()
--- a/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher_pause_drain.py
+++ b/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher_pause_drain.py
@@ -1,5 +1,4 @@
 import queue
-import threading
 from datetime import datetime

 from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus
@@ -65,7 +64,6 @@ def test_dispatcher_drains_events_when_paused() -> None:
        event_handler=handler,
        execution_coordinator=coordinator,
        event_emitter=None,
-        stop_event=threading.Event(),
    )

    dispatcher._dispatcher_loop()
--- a/api/tests/unit_tests/core/workflow/graph_engine/test_stop_event.py
+++ b/api/tests/unit_tests/core/workflow/graph_engine/test_stop_event.py
@@ -1,550 +0,0 @@
-"""
-Unit tests for stop_event functionality in GraphEngine.
-
-Tests the unified stop_event management by GraphEngine and its propagation
-to WorkerPool, Worker, Dispatcher, and Nodes.
-"""
-
-import threading
-import time
-from unittest.mock import MagicMock, Mock, patch
-
-from core.app.entities.app_invoke_entities import InvokeFrom
-from core.workflow.entities.graph_init_params import GraphInitParams
-from core.workflow.graph import Graph
-from core.workflow.graph_engine import GraphEngine, GraphEngineConfig
-from core.workflow.graph_engine.command_channels import InMemoryChannel
-from core.workflow.graph_events import (
-    GraphRunStartedEvent,
-    GraphRunSucceededEvent,
-    NodeRunStartedEvent,
-)
-from core.workflow.nodes.answer.answer_node import AnswerNode
-from core.workflow.nodes.start.start_node import StartNode
-from core.workflow.runtime import GraphRuntimeState, VariablePool
-from models.enums import UserFrom
-
-
-class TestStopEventPropagation:
-    """Test suite for stop_event propagation through GraphEngine components."""
-
-    def test_graph_engine_creates_stop_event(self):
-        """Test that GraphEngine creates a stop_event on initialization."""
-        runtime_state = GraphRuntimeState(variable_pool=VariablePool(), start_at=time.perf_counter())
-        mock_graph = MagicMock(spec=Graph)
-        mock_graph.nodes = {}
-        mock_graph.edges = {}
-        mock_graph.root_node = MagicMock()
-
-        engine = GraphEngine(
-            workflow_id="test_workflow",
-            graph=mock_graph,
-            graph_runtime_state=runtime_state,
-            command_channel=InMemoryChannel(),
-            config=GraphEngineConfig(),
-        )
-
-        # Verify stop_event was created
-        assert engine._stop_event is not None
-        assert isinstance(engine._stop_event, threading.Event)
-
-        # Verify it was set in graph_runtime_state
-        assert runtime_state.stop_event is not None
-        assert runtime_state.stop_event is engine._stop_event
-
-    def test_stop_event_cleared_on_start(self):
-        """Test that stop_event is cleared when execution starts."""
-        runtime_state = GraphRuntimeState(variable_pool=VariablePool(), start_at=time.perf_counter())
-        mock_graph = MagicMock(spec=Graph)
-        mock_graph.nodes = {}
-        mock_graph.edges = {}
-        mock_graph.root_node = MagicMock()
-        mock_graph.root_node.id = "start"  # Set proper id
-
-        start_node = StartNode(
-            id="start",
-            config={"id": "start", "data": {"title": "start", "variables": []}},
-            graph_init_params=GraphInitParams(
-                tenant_id="test_tenant",
-                app_id="test_app",
-                workflow_id="test_workflow",
-                graph_config={},
-                user_id="test_user",
-                user_from=UserFrom.ACCOUNT,
-                invoke_from=InvokeFrom.DEBUGGER,
-                call_depth=0,
-            ),
-            graph_runtime_state=runtime_state,
-        )
-        mock_graph.nodes["start"] = start_node
-        mock_graph.get_outgoing_edges = MagicMock(return_value=[])
-        mock_graph.get_incoming_edges = MagicMock(return_value=[])
-
-        engine = GraphEngine(
-            workflow_id="test_workflow",
-            graph=mock_graph,
-            graph_runtime_state=runtime_state,
-            command_channel=InMemoryChannel(),
-            config=GraphEngineConfig(),
-        )
-
-        # Set the stop_event before running
-        engine._stop_event.set()
-        assert engine._stop_event.is_set()
-
-        # Run the engine (should clear the stop_event)
-        events = list(engine.run())
-
-        # After running, stop_event should be set again (by _stop_execution)
-        # But during start it was cleared
-        assert any(isinstance(e, GraphRunStartedEvent) for e in events)
-        assert any(isinstance(e, GraphRunSucceededEvent) for e in events)
-
-    def test_stop_event_set_on_stop(self):
-        """Test that stop_event is set when execution stops."""
-        runtime_state = GraphRuntimeState(variable_pool=VariablePool(), start_at=time.perf_counter())
-        mock_graph = MagicMock(spec=Graph)
-        mock_graph.nodes = {}
-        mock_graph.edges = {}
-        mock_graph.root_node = MagicMock()
-        mock_graph.root_node.id = "start"  # Set proper id
-
-        start_node = StartNode(
-            id="start",
-            config={"id": "start", "data": {"title": "start", "variables": []}},
-            graph_init_params=GraphInitParams(
-                tenant_id="test_tenant",
-                app_id="test_app",
-                workflow_id="test_workflow",
-                graph_config={},
-                user_id="test_user",
-                user_from=UserFrom.ACCOUNT,
-                invoke_from=InvokeFrom.DEBUGGER,
-                call_depth=0,
-            ),
-            graph_runtime_state=runtime_state,
-        )
-        mock_graph.nodes["start"] = start_node
-        mock_graph.get_outgoing_edges = MagicMock(return_value=[])
-        mock_graph.get_incoming_edges = MagicMock(return_value=[])
-
-        engine = GraphEngine(
-            workflow_id="test_workflow",
-            graph=mock_graph,
-            graph_runtime_state=runtime_state,
-            command_channel=InMemoryChannel(),
-            config=GraphEngineConfig(),
-        )
-
-        # Initially not set
-        assert not engine._stop_event.is_set()
-
-        # Run the engine
-        list(engine.run())
-
-        # After execution completes, stop_event should be set
-        assert engine._stop_event.is_set()
-
-    def test_stop_event_passed_to_worker_pool(self):
-        """Test that stop_event is passed to WorkerPool."""
-        runtime_state = GraphRuntimeState(variable_pool=VariablePool(), start_at=time.perf_counter())
-        mock_graph = MagicMock(spec=Graph)
-        mock_graph.nodes = {}
-        mock_graph.edges = {}
-        mock_graph.root_node = MagicMock()
-
-        engine = GraphEngine(
-            workflow_id="test_workflow",
-            graph=mock_graph,
-            graph_runtime_state=runtime_state,
-            command_channel=InMemoryChannel(),
-            config=GraphEngineConfig(),
-        )
-
-        # Verify WorkerPool has the stop_event
-        assert engine._worker_pool._stop_event is not None
-        assert engine._worker_pool._stop_event is engine._stop_event
-
-    def test_stop_event_passed_to_dispatcher(self):
-        """Test that stop_event is passed to Dispatcher."""
-        runtime_state = GraphRuntimeState(variable_pool=VariablePool(), start_at=time.perf_counter())
-        mock_graph = MagicMock(spec=Graph)
-        mock_graph.nodes = {}
-        mock_graph.edges = {}
-        mock_graph.root_node = MagicMock()
-
-        engine = GraphEngine(
-            workflow_id="test_workflow",
-            graph=mock_graph,
-            graph_runtime_state=runtime_state,
-            command_channel=InMemoryChannel(),
-            config=GraphEngineConfig(),
-        )
-
-        # Verify Dispatcher has the stop_event
-        assert engine._dispatcher._stop_event is not None
-        assert engine._dispatcher._stop_event is engine._stop_event
-
-
-class TestNodeStopCheck:
-    """Test suite for Node._should_stop() functionality."""
-
-    def test_node_should_stop_checks_runtime_state(self):
-        """Test that Node._should_stop() checks GraphRuntimeState.stop_event."""
-        runtime_state = GraphRuntimeState(variable_pool=VariablePool(), start_at=time.perf_counter())
-
-        answer_node = AnswerNode(
-            id="answer",
-            config={"id": "answer", "data": {"title": "answer", "answer": "{{#start.result#}}"}},
-            graph_init_params=GraphInitParams(
-                tenant_id="test_tenant",
-                app_id="test_app",
-                workflow_id="test_workflow",
-                graph_config={},
-                user_id="test_user",
-                user_from=UserFrom.ACCOUNT,
-                invoke_from=InvokeFrom.DEBUGGER,
-                call_depth=0,
-            ),
-            graph_runtime_state=runtime_state,
-        )
-
-        # Initially stop_event is not set
-        assert not answer_node._should_stop()
-
-        # Set the stop_event
-        runtime_state.stop_event.set()
-
-        # Now _should_stop should return True
-        assert answer_node._should_stop()
-
-    def test_node_run_checks_stop_event_between_yields(self):
-        """Test that Node.run() checks stop_event between yielding events."""
-        runtime_state = GraphRuntimeState(variable_pool=VariablePool(), start_at=time.perf_counter())
-
-        # Create a simple node
-        answer_node = AnswerNode(
-            id="answer",
-            config={"id": "answer", "data": {"title": "answer", "answer": "hello"}},
-            graph_init_params=GraphInitParams(
-                tenant_id="test_tenant",
-                app_id="test_app",
-                workflow_id="test_workflow",
-                graph_config={},
-                user_id="test_user",
-                user_from=UserFrom.ACCOUNT,
-                invoke_from=InvokeFrom.DEBUGGER,
-                call_depth=0,
-            ),
-            graph_runtime_state=runtime_state,
-        )
-
-        # Set stop_event BEFORE running the node
-        runtime_state.stop_event.set()
-
-        # Run the node - should yield start event then detect stop
-        # The node should check stop_event before processing
-        assert answer_node._should_stop(), "stop_event should be set"
-
-        # Run and collect events
-        events = list(answer_node.run())
-
-        # Since stop_event is set at the start, we should get:
-        # 1. NodeRunStartedEvent (always yielded first)
-        # 2. Either NodeRunFailedEvent (if detected early) or NodeRunSucceededEvent (if too fast)
-        assert len(events) >= 2
-        assert isinstance(events[0], NodeRunStartedEvent)
-
-        # Note: AnswerNode is very simple and might complete before stop check
-        # The important thing is that _should_stop() returns True when stop_event is set
-        assert answer_node._should_stop()
-
-
-class TestStopEventIntegration:
-    """Integration tests for stop_event in workflow execution."""
-
-    def test_simple_workflow_respects_stop_event(self):
-        """Test that a simple workflow respects stop_event."""
-        runtime_state = GraphRuntimeState(variable_pool=VariablePool(), start_at=time.perf_counter())
-
-        mock_graph = MagicMock(spec=Graph)
-        mock_graph.nodes = {}
-        mock_graph.edges = {}
-        mock_graph.root_node = MagicMock()
-        mock_graph.root_node.id = "start"
-
-        # Create start and answer nodes
-        start_node = StartNode(
-            id="start",
-            config={"id": "start", "data": {"title": "start", "variables": []}},
-            graph_init_params=GraphInitParams(
-                tenant_id="test_tenant",
-                app_id="test_app",
-                workflow_id="test_workflow",
-                graph_config={},
-                user_id="test_user",
-                user_from=UserFrom.ACCOUNT,
-                invoke_from=InvokeFrom.DEBUGGER,
-                call_depth=0,
-            ),
-            graph_runtime_state=runtime_state,
-        )
-
-        answer_node = AnswerNode(
-            id="answer",
-            config={"id": "answer", "data": {"title": "answer", "answer": "hello"}},
-            graph_init_params=GraphInitParams(
-                tenant_id="test_tenant",
-                app_id="test_app",
-                workflow_id="test_workflow",
-                graph_config={},
-                user_id="test_user",
-                user_from=UserFrom.ACCOUNT,
-                invoke_from=InvokeFrom.DEBUGGER,
-                call_depth=0,
-            ),
-            graph_runtime_state=runtime_state,
-        )
-
-        mock_graph.nodes["start"] = start_node
-        mock_graph.nodes["answer"] = answer_node
-        mock_graph.get_outgoing_edges = MagicMock(return_value=[])
-        mock_graph.get_incoming_edges = MagicMock(return_value=[])
-
-        engine = GraphEngine(
-            workflow_id="test_workflow",
-            graph=mock_graph,
-            graph_runtime_state=runtime_state,
-            command_channel=InMemoryChannel(),
-            config=GraphEngineConfig(),
-        )
-
-        # Set stop_event before running
-        runtime_state.stop_event.set()
-
-        # Run the engine
-        events = list(engine.run())
-
-        # Should get started event but not succeeded (due to stop)
-        assert any(isinstance(e, GraphRunStartedEvent) for e in events)
-        # The workflow should still complete (start node runs quickly)
-        # but answer node might be cancelled depending on timing
-
-    def test_stop_event_with_concurrent_nodes(self):
-        """Test stop_event behavior with multiple concurrent nodes."""
-        runtime_state = GraphRuntimeState(variable_pool=VariablePool(), start_at=time.perf_counter())
-
-        mock_graph = MagicMock(spec=Graph)
-        mock_graph.nodes = {}
-        mock_graph.edges = {}
-        mock_graph.root_node = MagicMock()
-
-        # Create multiple nodes
-        for i in range(3):
-            answer_node = AnswerNode(
-                id=f"answer_{i}",
-                config={"id": f"answer_{i}", "data": {"title": f"answer_{i}", "answer": f"test{i}"}},
-                graph_init_params=GraphInitParams(
-                    tenant_id="test_tenant",
-                    app_id="test_app",
-                    workflow_id="test_workflow",
-                    graph_config={},
-                    user_id="test_user",
-                    user_from=UserFrom.ACCOUNT,
-                    invoke_from=InvokeFrom.DEBUGGER,
-                    call_depth=0,
-                ),
-                graph_runtime_state=runtime_state,
-            )
-            mock_graph.nodes[f"answer_{i}"] = answer_node
-
-        mock_graph.get_outgoing_edges = MagicMock(return_value=[])
-        mock_graph.get_incoming_edges = MagicMock(return_value=[])
-
-        engine = GraphEngine(
-            workflow_id="test_workflow",
-            graph=mock_graph,
-            graph_runtime_state=runtime_state,
-            command_channel=InMemoryChannel(),
-            config=GraphEngineConfig(),
-        )
-
-        # All nodes should share the same stop_event
-        for node in mock_graph.nodes.values():
-            assert node.graph_runtime_state.stop_event is runtime_state.stop_event
-            assert node.graph_runtime_state.stop_event is engine._stop_event
-
-
-class TestStopEventTimeoutBehavior:
-    """Test stop_event behavior with join timeouts."""
-
-    @patch("core.workflow.graph_engine.orchestration.dispatcher.threading.Thread", autospec=True)
-    def test_dispatcher_uses_shorter_timeout(self, mock_thread_cls: MagicMock):
-        """Test that Dispatcher uses 2s timeout instead of 10s."""
-        runtime_state = GraphRuntimeState(variable_pool=VariablePool(), start_at=time.perf_counter())
-        mock_graph = MagicMock(spec=Graph)
-        mock_graph.nodes = {}
-        mock_graph.edges = {}
-        mock_graph.root_node = MagicMock()
-
-        engine = GraphEngine(
-            workflow_id="test_workflow",
-            graph=mock_graph,
-            graph_runtime_state=runtime_state,
-            command_channel=InMemoryChannel(),
-            config=GraphEngineConfig(),
-        )
-
-        dispatcher = engine._dispatcher
-        dispatcher.start()  # This will create and start the mocked thread
-
-        mock_thread_instance = mock_thread_cls.return_value
-        mock_thread_instance.is_alive.return_value = True
-
-        dispatcher.stop()
-
-        mock_thread_instance.join.assert_called_once_with(timeout=2.0)
-
-    @patch("core.workflow.graph_engine.worker_management.worker_pool.Worker", autospec=True)
-    def test_worker_pool_uses_shorter_timeout(self, mock_worker_cls: MagicMock):
-        """Test that WorkerPool uses 2s timeout instead of 10s."""
-        runtime_state = GraphRuntimeState(variable_pool=VariablePool(), start_at=time.perf_counter())
-        mock_graph = MagicMock(spec=Graph)
-        mock_graph.nodes = {}
-        mock_graph.edges = {}
-        mock_graph.root_node = MagicMock()
-
-        engine = GraphEngine(
-            workflow_id="test_workflow",
-            graph=mock_graph,
-            graph_runtime_state=runtime_state,
-            command_channel=InMemoryChannel(),
-            config=GraphEngineConfig(),
-        )
-
-        worker_pool = engine._worker_pool
-        worker_pool.start(initial_count=1)  # Start with one worker
-
-        mock_worker_instance = mock_worker_cls.return_value
-        mock_worker_instance.is_alive.return_value = True
-
-        worker_pool.stop()
-
-        mock_worker_instance.join.assert_called_once_with(timeout=2.0)
-
-
-class TestStopEventResumeBehavior:
-    """Test stop_event behavior during workflow resume."""
-
-    def test_stop_event_cleared_on_resume(self):
-        """Test that stop_event is cleared when resuming a paused workflow."""
-        runtime_state = GraphRuntimeState(variable_pool=VariablePool(), start_at=time.perf_counter())
-        mock_graph = MagicMock(spec=Graph)
-        mock_graph.nodes = {}
-        mock_graph.edges = {}
-        mock_graph.root_node = MagicMock()
-        mock_graph.root_node.id = "start"  # Set proper id
-
-        start_node = StartNode(
-            id="start",
-            config={"id": "start", "data": {"title": "start", "variables": []}},
-            graph_init_params=GraphInitParams(
-                tenant_id="test_tenant",
-                app_id="test_app",
-                workflow_id="test_workflow",
-                graph_config={},
-                user_id="test_user",
-                user_from=UserFrom.ACCOUNT,
-                invoke_from=InvokeFrom.DEBUGGER,
-                call_depth=0,
-            ),
-            graph_runtime_state=runtime_state,
-        )
-        mock_graph.nodes["start"] = start_node
-        mock_graph.get_outgoing_edges = MagicMock(return_value=[])
-        mock_graph.get_incoming_edges = MagicMock(return_value=[])
-
-        engine = GraphEngine(
-            workflow_id="test_workflow",
-            graph=mock_graph,
-            graph_runtime_state=runtime_state,
-            command_channel=InMemoryChannel(),
-            config=GraphEngineConfig(),
-        )
-
-        # Simulate a previous execution that set stop_event
-        engine._stop_event.set()
-        assert engine._stop_event.is_set()
-
-        # Run the engine (should clear stop_event in _start_execution)
-        events = list(engine.run())
-
-        # Execution should complete successfully
-        assert any(isinstance(e, GraphRunStartedEvent) for e in events)
-        assert any(isinstance(e, GraphRunSucceededEvent) for e in events)
-
-
-class TestWorkerStopBehavior:
-    """Test Worker behavior with shared stop_event."""
-
-    def test_worker_uses_shared_stop_event(self):
-        """Test that Worker uses shared stop_event from GraphEngine."""
-        runtime_state = GraphRuntimeState(variable_pool=VariablePool(), start_at=time.perf_counter())
-        mock_graph = MagicMock(spec=Graph)
-        mock_graph.nodes = {}
-        mock_graph.edges = {}
-        mock_graph.root_node = MagicMock()
-
-        engine = GraphEngine(
-            workflow_id="test_workflow",
-            graph=mock_graph,
-            graph_runtime_state=runtime_state,
-            command_channel=InMemoryChannel(),
-            config=GraphEngineConfig(),
-        )
-
-        # Get the worker pool and check workers
-        worker_pool = engine._worker_pool
-
-        # Start the worker pool to create workers
-        worker_pool.start()
-
-        # Check that at least one worker was created
-        assert len(worker_pool._workers) > 0
-
-        # Verify workers use the shared stop_event
-        for worker in worker_pool._workers:
-            assert worker._stop_event is engine._stop_event
-
-        # Clean up
-        worker_pool.stop()
-
-    def test_worker_stop_is_noop(self):
-        """Test that Worker.stop() is now a no-op."""
-        runtime_state = GraphRuntimeState(variable_pool=VariablePool(), start_at=time.perf_counter())
-
-        # Create a mock worker
-        from core.workflow.graph_engine.ready_queue import InMemoryReadyQueue
-        from core.workflow.graph_engine.worker import Worker
-
-        ready_queue = InMemoryReadyQueue()
-        event_queue = MagicMock()
-
-        # Create a proper mock graph with real dict
-        mock_graph = Mock(spec=Graph)
-        mock_graph.nodes = {}  # Use real dict
-
-        stop_event = threading.Event()
-
-        worker = Worker(
-            ready_queue=ready_queue,
-            event_queue=event_queue,
-            graph=mock_graph,
-            layers=[],
-            stop_event=stop_event,
-        )
-
-        # Calling stop() should do nothing (no-op)
-        # and should NOT set the stop_event
-        worker.stop()
-        assert not stop_event.is_set()
--- a/api/tests/unit_tests/tasks/test_document_indexing_sync_task.py
+++ b/api/tests/unit_tests/tasks/test_document_indexing_sync_task.py
@@ -5,6 +5,7 @@ These tests intentionally stay in unit scope because they validate call argument
 for external collaborators rather than SQL-backed state transitions.
 """

+import json
 import uuid
 from unittest.mock import MagicMock, Mock, patch

@@ -196,3 +197,78 @@ class TestDocumentIndexingSyncTaskCollaboratorParams:
            provider="notion_datasource",
            plugin_id="langgenius/notion_datasource",
        )
+
+
+class TestDataSourceInfoSerialization:
+    """Regression test: data_source_info must be written as a JSON string, not a raw dict.
+
+    See https://github.com/langgenius/dify/issues/32705
+    psycopg2 raises ``ProgrammingError: can't adapt type 'dict'`` when a Python
+    dict is passed directly to a text/LongText column.
+    """
+
+    def test_data_source_info_serialized_as_json_string(
+        self,
+        mock_document,
+        mock_dataset,
+        dataset_id,
+        document_id,
+    ):
+        """data_source_info must be serialized with json.dumps before DB write."""
+        with (
+            patch("tasks.document_indexing_sync_task.session_factory") as mock_session_factory,
+            patch("tasks.document_indexing_sync_task.DatasourceProviderService") as mock_service_class,
+            patch("tasks.document_indexing_sync_task.NotionExtractor") as mock_extractor_class,
+            patch("tasks.document_indexing_sync_task.IndexProcessorFactory") as mock_ipf,
+            patch("tasks.document_indexing_sync_task.IndexingRunner") as mock_runner_class,
+        ):
+            # External collaborators
+            mock_service = MagicMock()
+            mock_service.get_datasource_credentials.return_value = {"integration_secret": "token"}
+            mock_service_class.return_value = mock_service
+
+            mock_extractor = MagicMock()
+            # Return a *different* timestamp so the task enters the sync/update branch
+            mock_extractor.get_notion_last_edited_time.return_value = "2024-02-01T00:00:00Z"
+            mock_extractor_class.return_value = mock_extractor
+
+            mock_ip = MagicMock()
+            mock_ipf.return_value.init_index_processor.return_value = mock_ip
+
+            mock_runner = MagicMock()
+            mock_runner_class.return_value = mock_runner
+
+            # DB session mock — shared across all ``session_factory.create_session()`` calls
+            session = MagicMock()
+            session.scalars.return_value.all.return_value = []
+            # .where() path: session 1 reads document + dataset, session 2 reads dataset
+            session.query.return_value.where.return_value.first.side_effect = [
+                mock_document,
+                mock_dataset,
+                mock_dataset,
+            ]
+            # .filter_by() path: session 3 (update), session 4 (indexing)
+            session.query.return_value.filter_by.return_value.first.side_effect = [
+                mock_document,
+                mock_document,
+            ]
+
+            begin_cm = MagicMock()
+            begin_cm.__enter__.return_value = session
+            begin_cm.__exit__.return_value = False
+            session.begin.return_value = begin_cm
+
+            session_cm = MagicMock()
+            session_cm.__enter__.return_value = session
+            session_cm.__exit__.return_value = False
+            mock_session_factory.create_session.return_value = session_cm
+
+            # Act
+            document_indexing_sync_task(dataset_id, document_id)
+
+            # Assert: data_source_info must be a JSON *string*, not a dict
+            assert isinstance(mock_document.data_source_info, str), (
+                f"data_source_info should be a JSON string, got {type(mock_document.data_source_info).__name__}"
+            )
+            parsed = json.loads(mock_document.data_source_info)
+            assert parsed["last_edited_time"] == "2024-02-01T00:00:00Z"
--- a/api/ty.toml
+++ b/api/ty.toml
@@ -1,50 +0,0 @@
-[src]
-exclude = [
-    # deps groups (A1/A2/B/C/D/E)
-    # B: app runner + prompt
-    "core/prompt",
-    "core/app/apps/base_app_runner.py",
-    "core/app/apps/workflow_app_runner.py",
-    "core/agent",
-    "core/plugin",
-    # C: services/controllers/fields/libs
-    "services",
-    "controllers/inner_api",
-    "controllers/console/app",
-    "controllers/console/explore",
-    "controllers/console/datasets",
-    "controllers/console/workspace",
-    "controllers/service_api/wraps.py",
-    "fields/conversation_fields.py",
-    "libs/external_api.py",
-    # D: observability + integrations
-    "core/ops",
-    "extensions",
-    # E: vector DB integrations
-    "core/rag/datasource/vdb",
-    # non-producition or generated code
-    "migrations",
-    "tests",
-    # targeted ignores for current type-check errors
-    # TODO(QuantumGhost): suppress type errors in HITL related code.
-    # fix the type error later
-    "configs/middleware/cache/redis_pubsub_config.py",
-    "extensions/ext_redis.py",
-    "models/execution_extra_content.py",
-    "tasks/workflow_execution_tasks.py",
-    "core/workflow/nodes/base/node.py",
-    "services/human_input_delivery_test_service.py",
-    "core/app/apps/advanced_chat/app_generator.py",
-    "controllers/console/human_input_form.py",
-    "controllers/console/app/workflow_run.py",
-    "repositories/sqlalchemy_api_workflow_node_execution_repository.py",
-    "extensions/logstore/repositories/logstore_api_workflow_run_repository.py",
-    "controllers/web/workflow_events.py",
-    "tasks/app_generate/workflow_execute_task.py",
-]
-
-
-[rules]
-deprecated = "ignore"
-unused-ignore-comment = "ignore"
-# possibly-missing-attribute = "ignore"
--- a/api/uv.lock
+++ b/api/uv.lock
@@ -1483,7 +1483,6 @@ dev = [
    { name = "scipy-stubs" },
    { name = "sseclient-py" },
    { name = "testcontainers" },
-    { name = "ty" },
    { name = "types-aiofiles" },
    { name = "types-beautifulsoup4" },
    { name = "types-cachetools" },
@@ -1684,7 +1683,6 @@ dev = [
    { name = "scipy-stubs", specifier = ">=1.15.3.0" },
    { name = "sseclient-py", specifier = ">=1.8.0" },
    { name = "testcontainers", specifier = "~=4.13.2" },
-    { name = "ty", specifier = ">=0.0.14" },
    { name = "types-aiofiles", specifier = "~=24.1.0" },
    { name = "types-beautifulsoup4", specifier = "~=4.12.0" },
    { name = "types-cachetools", specifier = "~=5.5.0" },
@@ -6278,30 +6276,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/70/26/2591b48412bde75e33bfd292034103ffe41743cacd03120e3242516cd143/transformers-4.56.2-py3-none-any.whl", hash = "sha256:79c03d0e85b26cb573c109ff9eafa96f3c8d4febfd8a0774e8bba32702dd6dde", size = 11608055, upload-time = "2025-09-19T15:16:23.736Z" },
 ]

-[[package]]
-name = "ty"
-version = "0.0.14"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/af/57/22c3d6bf95c2229120c49ffc2f0da8d9e8823755a1c3194da56e51f1cc31/ty-0.0.14.tar.gz", hash = "sha256:a691010565f59dd7f15cf324cdcd1d9065e010c77a04f887e1ea070ba34a7de2", size = 5036573, upload-time = "2026-01-27T00:57:31.427Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/99/cb/cc6d1d8de59beb17a41f9a614585f884ec2d95450306c173b3b7cc090d2e/ty-0.0.14-py3-none-linux_armv6l.whl", hash = "sha256:32cf2a7596e693094621d3ae568d7ee16707dce28c34d1762947874060fdddaa", size = 10034228, upload-time = "2026-01-27T00:57:53.133Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/96/dd42816a2075a8f31542296ae687483a8d047f86a6538dfba573223eaf9a/ty-0.0.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f971bf9805f49ce8c0968ad53e29624d80b970b9eb597b7cbaba25d8a18ce9a2", size = 9939162, upload-time = "2026-01-27T00:57:43.857Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/b4/73c4859004e0f0a9eead9ecb67021438b2e8e5fdd8d03e7f5aca77623992/ty-0.0.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:45448b9e4806423523268bc15e9208c4f3f2ead7c344f615549d2e2354d6e924", size = 9418661, upload-time = "2026-01-27T00:58:03.411Z" },
-    { url = "https://files.pythonhosted.org/packages/58/35/839c4551b94613db4afa20ee555dd4f33bfa7352d5da74c5fa416ffa0fd2/ty-0.0.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee94a9b747ff40114085206bdb3205a631ef19a4d3fb89e302a88754cbbae54c", size = 9837872, upload-time = "2026-01-27T00:57:23.718Z" },
-    { url = "https://files.pythonhosted.org/packages/41/2b/bbecf7e2faa20c04bebd35fc478668953ca50ee5847ce23e08acf20ea119/ty-0.0.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6756715a3c33182e9ab8ffca2bb314d3c99b9c410b171736e145773ee0ae41c3", size = 9848819, upload-time = "2026-01-27T00:57:58.501Z" },
-    { url = "https://files.pythonhosted.org/packages/be/60/3c0ba0f19c0f647ad9d2b5b5ac68c0f0b4dc899001bd53b3a7537fb247a2/ty-0.0.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:89d0038a2f698ba8b6fec5cf216a4e44e2f95e4a5095a8c0f57fe549f87087c2", size = 10324371, upload-time = "2026-01-27T00:57:29.291Z" },
-    { url = "https://files.pythonhosted.org/packages/24/32/99d0a0b37d0397b0a989ffc2682493286aa3bc252b24004a6714368c2c3d/ty-0.0.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c64a83a2d669b77f50a4957039ca1450626fb474619f18f6f8a3eb885bf7544", size = 10865898, upload-time = "2026-01-27T00:57:33.542Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/88/30b583a9e0311bb474269cfa91db53350557ebec09002bfc3fb3fc364e8c/ty-0.0.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:242488bfb547ef080199f6fd81369ab9cb638a778bb161511d091ffd49c12129", size = 10555777, upload-time = "2026-01-27T00:58:05.853Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/a2/cb53fb6325dcf3d40f2b1d0457a25d55bfbae633c8e337bde8ec01a190eb/ty-0.0.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4790c3866f6c83a4f424fc7d09ebdb225c1f1131647ba8bdc6fcdc28f09ed0ff", size = 10412913, upload-time = "2026-01-27T00:57:38.834Z" },
-    { url = "https://files.pythonhosted.org/packages/42/8f/f2f5202d725ed1e6a4e5ffaa32b190a1fe70c0b1a2503d38515da4130b4c/ty-0.0.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:950f320437f96d4ea9a2332bbfb5b68f1c1acd269ebfa4c09b6970cc1565bd9d", size = 9837608, upload-time = "2026-01-27T00:57:55.898Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/ba/59a2a0521640c489dafa2c546ae1f8465f92956fede18660653cce73b4c5/ty-0.0.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:4a0ec3ee70d83887f86925bbc1c56f4628bd58a0f47f6f32ddfe04e1f05466df", size = 9884324, upload-time = "2026-01-27T00:57:46.786Z" },
-    { url = "https://files.pythonhosted.org/packages/03/95/8d2a49880f47b638743212f011088552ecc454dd7a665ddcbdabea25772a/ty-0.0.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:a1a4e6b6da0c58b34415955279eff754d6206b35af56a18bb70eb519d8d139ef", size = 10033537, upload-time = "2026-01-27T00:58:01.149Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/40/4523b36f2ce69f92ccf783855a9e0ebbbd0f0bb5cdce6211ee1737159ed3/ty-0.0.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:dc04384e874c5de4c5d743369c277c8aa73d1edea3c7fc646b2064b637db4db3", size = 10495910, upload-time = "2026-01-27T00:57:26.691Z" },
-    { url = "https://files.pythonhosted.org/packages/08/d5/655beb51224d1bfd4f9ddc0bb209659bfe71ff141bcf05c418ab670698f0/ty-0.0.14-py3-none-win32.whl", hash = "sha256:b20e22cf54c66b3e37e87377635da412d9a552c9bf4ad9fc449fed8b2e19dad2", size = 9507626, upload-time = "2026-01-27T00:57:41.43Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/d9/c569c9961760e20e0a4bc008eeb1415754564304fd53997a371b7cf3f864/ty-0.0.14-py3-none-win_amd64.whl", hash = "sha256:e312ff9475522d1a33186657fe74d1ec98e4a13e016d66f5758a452c90ff6409", size = 10437980, upload-time = "2026-01-27T00:57:36.422Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/0c/186829654f5bfd9a028f6648e9caeb11271960a61de97484627d24443f91/ty-0.0.14-py3-none-win_arm64.whl", hash = "sha256:b6facdbe9b740cb2c15293a1d178e22ffc600653646452632541d01c36d5e378", size = 9885831, upload-time = "2026-01-27T00:57:49.747Z" },
-]
-
 [[package]]
 name = "typer"
 version = "0.20.0"
--- a/docker/docker-compose-template.yaml
+++ b/docker/docker-compose-template.yaml
@@ -149,7 +149,6 @@ services:
      MARKETPLACE_URL: ${MARKETPLACE_URL:-https://marketplace.dify.ai}
      TOP_K_MAX_VALUE: ${TOP_K_MAX_VALUE:-}
      INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH: ${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH:-}
-      PM2_INSTANCES: ${PM2_INSTANCES:-2}
      LOOP_NODE_MAX_COUNT: ${LOOP_NODE_MAX_COUNT:-100}
      MAX_TOOLS_NUM: ${MAX_TOOLS_NUM:-10}
      MAX_PARALLEL_LIMIT: ${MAX_PARALLEL_LIMIT:-10}
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -844,7 +844,6 @@ services:
      MARKETPLACE_URL: ${MARKETPLACE_URL:-https://marketplace.dify.ai}
      TOP_K_MAX_VALUE: ${TOP_K_MAX_VALUE:-}
      INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH: ${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH:-}
-      PM2_INSTANCES: ${PM2_INSTANCES:-2}
      LOOP_NODE_MAX_COUNT: ${LOOP_NODE_MAX_COUNT:-100}
      MAX_TOOLS_NUM: ${MAX_TOOLS_NUM:-10}
      MAX_PARALLEL_LIMIT: ${MAX_PARALLEL_LIMIT:-10}
--- a/web/Dockerfile
+++ b/web/Dockerfile
@@ -50,24 +50,18 @@ ENV MARKETPLACE_API_URL=https://marketplace.dify.ai
 ENV MARKETPLACE_URL=https://marketplace.dify.ai
 ENV PORT=3000
 ENV NEXT_TELEMETRY_DISABLED=1
-ENV PM2_INSTANCES=2

 # set timezone
 ENV TZ=UTC
 RUN ln -s /usr/share/zoneinfo/${TZ} /etc/localtime \
    && echo ${TZ} > /etc/timezone

-# global runtime packages
-RUN pnpm add -g pm2
-
-
 # Create non-root user
 ARG dify_uid=1001
 RUN addgroup -S -g ${dify_uid} dify && \
    adduser -S -u ${dify_uid} -G dify -s /bin/ash -h /home/dify dify && \
    mkdir /app && \
-    mkdir /.pm2 && \
-    chown -R dify:dify /app /.pm2
+    chown -R dify:dify /app


 WORKDIR /app/web
--- a/web/README.md
+++ b/web/README.md
@@ -89,8 +89,6 @@ If you want to customize the host and port:
 pnpm run start --port=3001 --host=0.0.0.0
 ```

-If you want to customize the number of instances launched by PM2, you can configure `PM2_INSTANCES` in `docker-compose.yaml` or `Dockerfile`.
-
 ## Storybook

 This project uses [Storybook](https://storybook.js.org/) for UI component development.
--- a/web/docker/entrypoint.sh
+++ b/web/docker/entrypoint.sh
@@ -43,4 +43,4 @@ export NEXT_PUBLIC_MAX_PARALLEL_LIMIT=${MAX_PARALLEL_LIMIT}
 export NEXT_PUBLIC_MAX_ITERATIONS_NUM=${MAX_ITERATIONS_NUM}
 export NEXT_PUBLIC_MAX_TREE_DEPTH=${MAX_TREE_DEPTH}

-pm2 start /app/web/server.js --name dify-web --cwd /app/web -i ${PM2_INSTANCES} --no-daemon
+exec node /app/web/server.js
--- a/web/docker/pm2.json
+++ b/web/docker/pm2.json
@@ -1,11 +0,0 @@
-{
-  "apps": [
-    {
-      "name": "dify-web",
-      "script": "/app/web/server.js",
-      "cwd": "/app/web",
-      "exec_mode": "cluster",
-      "instances": 2
-    }
-  ]
-}
Author	SHA1	Message	Date
-LAN-	6a80d0d8f0	fix: parameterize myscale query vector and add regression test	2026-03-01 19:56:13 +08:00
-LAN-	a9e1ce0a1e	fix: preserve MyScale text content on insert	2026-03-01 19:56:13 +08:00
-LAN-	346e9d8ccf	Harden MyScale query parameterization	2026-03-01 19:56:13 +08:00
-LAN-	ffe77fecdf	revert(graph-engine): rollback stop-event unification (#32789 )	2026-03-01 19:43:05 +08:00
weiguang li	b462a96fa0	fix: serialize data_source_info with json.dumps in Notion sync task (#32747 )	2026-03-01 19:37:51 +08:00
lif	fb538b005c	chore(web): remove PM2 process manager (#30252 ) Signed-off-by: majiayu000 <1835304752@qq.com>	2026-03-01 19:31:45 +08:00
盐粒 Yanli	bc6fd0b5dd	chore: remove ty from backend type-check pipeline (#32782 )	2026-03-01 19:10:24 +08:00
-LAN-	53c62fde33	fix(api): enforce ownership check for conversation delete (#32686 )	2026-03-01 17:53:37 +08:00
akkoaya	f0f01c69aa	fix: add missing pipeline_templates (#31528 ) Co-authored-by: FFXN <31929997+FFXN@users.noreply.github.com>	2026-03-01 17:33:04 +08:00