feat(workflow): precheck llm quota and abort early

2026-03-01 12:55:13 +00:00 · 2026-03-01 18:29:32 +08:00
parent 27601fab44
commit e028e07953
5 changed files with 127 additions and 9 deletions
--- a/api/core/app/llm/init.py
+++ b/api/core/app/llm/init.py
@@ -1,5 +1,5 @@
 """LLM-related application services."""

-from .quota import deduct_llm_quota
+from .quota import deduct_llm_quota, ensure_llm_quota_available

-__all__ = ["deduct_llm_quota"]
+__all__ = ["deduct_llm_quota", "ensure_llm_quota_available"]
--- a/api/core/app/llm/quota.py
+++ b/api/core/app/llm/quota.py
@@ -2,7 +2,9 @@ from sqlalchemy import update
 from sqlalchemy.orm import Session

 from configs import dify_config
+from core.entities.model_entities import ModelStatus
 from core.entities.provider_entities import ProviderQuotaType, QuotaUnit
+from core.errors.error import QuotaExceededError
 from core.model_manager import ModelInstance
 from core.model_runtime.entities.llm_entities import LLMUsage
 from extensions.ext_database import db
@@ -11,6 +13,21 @@ from models.provider import Provider, ProviderType
 from models.provider_ids import ModelProviderID


+def ensure_llm_quota_available(*, model_instance: ModelInstance) -> None:
+    provider_model_bundle = model_instance.provider_model_bundle
+    provider_configuration = provider_model_bundle.configuration
+
+    if provider_configuration.using_provider_type != ProviderType.SYSTEM:
+        return
+
+    provider_model = provider_configuration.get_provider_model(
+        model_type=model_instance.model_type_instance.model_type,
+        model=model_instance.model_name,
+    )
+    if provider_model and provider_model.status == ModelStatus.QUOTA_EXCEEDED:
+        raise QuotaExceededError(f"Model provider {model_instance.provider} quota exceeded.")
+
+
 def deduct_llm_quota(*, tenant_id: str, model_instance: ModelInstance, usage: LLMUsage) -> None:
    provider_model_bundle = model_instance.provider_model_bundle
    provider_configuration = provider_model_bundle.configuration
--- a/api/core/app/workflow/layers/llm_quota.py
+++ b/api/core/app/workflow/layers/llm_quota.py
@@ -9,9 +9,11 @@ from typing import TYPE_CHECKING, cast, final

 from typing_extensions import override

-from core.app.llm import deduct_llm_quota
+from core.app.llm import deduct_llm_quota, ensure_llm_quota_available
+from core.errors.error import QuotaExceededError
 from core.model_manager import ModelInstance
 from core.workflow.enums import NodeType
+from core.workflow.graph_engine.entities.commands import AbortCommand, CommandType
 from core.workflow.graph_engine.layers.base import GraphEngineLayer
 from core.workflow.graph_events import GraphEngineEvent, GraphNodeEventBase
 from core.workflow.graph_events.node import NodeRunSucceededEvent
@@ -29,9 +31,13 @@ logger = logging.getLogger(__name__)
 class LLMQuotaLayer(GraphEngineLayer):
    """Graph layer that applies LLM quota deduction after node execution."""

+    def __init__(self) -> None:
+        super().__init__()
+        self._abort_sent = False
+
    @override
    def on_graph_start(self) -> None:
-        return
+        self._abort_sent = False

    @override
    def on_event(self, event: GraphEngineEvent) -> None:
@@ -41,6 +47,22 @@ class LLMQuotaLayer(GraphEngineLayer):
    def on_graph_end(self, error: Exception | None) -> None:
        _ = error

+    @override
+    def on_node_run_start(self, node: Node) -> None:
+        if self._abort_sent:
+            return
+
+        model_instance = self._extract_model_instance(node)
+        if model_instance is None:
+            return
+
+        try:
+            ensure_llm_quota_available(model_instance=model_instance)
+        except QuotaExceededError as exc:
+            node.graph_runtime_state.stop_event.set()
+            self._send_abort_command(reason=str(exc))
+            logger.warning("LLM quota check failed, node_id=%s, error=%s", node.id, exc)
+
    @override
    def on_node_run_end(
        self, node: Node, error: Exception | None, result_event: GraphNodeEventBase | None = None
@@ -61,6 +83,21 @@ class LLMQuotaLayer(GraphEngineLayer):
        except Exception:
            logger.exception("LLM quota deduction failed, node_id=%s", node.id)

+    def _send_abort_command(self, *, reason: str) -> None:
+        if not self.command_channel or self._abort_sent:
+            return
+
+        try:
+            self.command_channel.send_command(
+                AbortCommand(
+                    command_type=CommandType.ABORT,
+                    reason=reason,
+                )
+            )
+            self._abort_sent = True
+        except Exception:
+            logger.exception("Failed to send quota abort command")
+
    @staticmethod
    def _extract_model_instance(node: Node) -> ModelInstance | None:
        try:
--- a/api/core/workflow/nodes/base/node.py
+++ b/api/core/workflow/nodes/base/node.py
@@ -352,6 +352,21 @@ class Node(Generic[NodeDataT]):
        # ===
        yield start_event

+        if self._should_stop():
+            error_message = "Execution cancelled"
+            yield NodeRunFailedEvent(
+                id=self.execution_id,
+                node_id=self._node_id,
+                node_type=self.node_type,
+                start_at=self._start_at,
+                node_run_result=NodeRunResult(
+                    status=WorkflowNodeExecutionStatus.FAILED,
+                    error=error_message,
+                ),
+                error=error_message,
+            )
+            return
+
        try:
            result = self._run()

--- a/api/tests/unit_tests/core/workflow/graph_engine/layers/test_llm_quota.py
+++ b/api/tests/unit_tests/core/workflow/graph_engine/layers/test_llm_quota.py
@@ -1,9 +1,12 @@
+import threading
 from datetime import datetime
 from unittest.mock import MagicMock, patch

 from core.app.workflow.layers.llm_quota import LLMQuotaLayer
+from core.errors.error import QuotaExceededError
 from core.model_runtime.entities.llm_entities import LLMUsage
 from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus
+from core.workflow.graph_engine.entities.commands import CommandType
 from core.workflow.graph_events.node import NodeRunSucceededEvent
 from core.workflow.node_events import NodeRunResult

@@ -29,7 +32,7 @@ def test_deduct_quota_called_for_successful_llm_node() -> None:
    node.execution_id = "execution-id"
    node.node_type = NodeType.LLM
    node.tenant_id = "tenant-id"
-    node._model_instance = object()
+    node.model_instance = object()

    result_event = _build_succeeded_event()
    with patch("core.app.workflow.layers.llm_quota.deduct_llm_quota", autospec=True) as mock_deduct:
@@ -37,7 +40,7 @@ def test_deduct_quota_called_for_successful_llm_node() -> None:

    mock_deduct.assert_called_once_with(
        tenant_id="tenant-id",
-        model_instance=node._model_instance,
+        model_instance=node.model_instance,
        usage=result_event.node_run_result.llm_usage,
    )

@@ -49,7 +52,7 @@ def test_deduct_quota_called_for_question_classifier_node() -> None:
    node.execution_id = "execution-id"
    node.node_type = NodeType.QUESTION_CLASSIFIER
    node.tenant_id = "tenant-id"
-    node._model_instance = object()
+    node.model_instance = object()

    result_event = _build_succeeded_event()
    with patch("core.app.workflow.layers.llm_quota.deduct_llm_quota", autospec=True) as mock_deduct:
@@ -57,7 +60,7 @@ def test_deduct_quota_called_for_question_classifier_node() -> None:

    mock_deduct.assert_called_once_with(
        tenant_id="tenant-id",
-        model_instance=node._model_instance,
+        model_instance=node.model_instance,
        usage=result_event.node_run_result.llm_usage,
    )

@@ -85,7 +88,7 @@ def test_quota_error_is_handled_in_layer() -> None:
    node.execution_id = "execution-id"
    node.node_type = NodeType.LLM
    node.tenant_id = "tenant-id"
-    node._model_instance = object()
+    node.model_instance = object()

    result_event = _build_succeeded_event()
    with patch(
@@ -94,3 +97,49 @@ def test_quota_error_is_handled_in_layer() -> None:
        side_effect=ValueError("quota exceeded"),
    ):
        layer.on_node_run_end(node=node, error=None, result_event=result_event)
+
+
+def test_quota_precheck_failure_aborts_workflow_immediately() -> None:
+    layer = LLMQuotaLayer()
+    stop_event = threading.Event()
+    layer.command_channel = MagicMock()
+
+    node = MagicMock()
+    node.id = "llm-node-id"
+    node.node_type = NodeType.LLM
+    node.model_instance = object()
+    node.graph_runtime_state = MagicMock()
+    node.graph_runtime_state.stop_event = stop_event
+
+    with patch(
+        "core.app.workflow.layers.llm_quota.ensure_llm_quota_available",
+        autospec=True,
+        side_effect=QuotaExceededError("Model provider openai quota exceeded."),
+    ):
+        layer.on_node_run_start(node)
+
+    assert stop_event.is_set()
+    layer.command_channel.send_command.assert_called_once()
+    abort_command = layer.command_channel.send_command.call_args.args[0]
+    assert abort_command.command_type == CommandType.ABORT
+    assert abort_command.reason == "Model provider openai quota exceeded."
+
+
+def test_quota_precheck_passes_without_abort() -> None:
+    layer = LLMQuotaLayer()
+    stop_event = threading.Event()
+    layer.command_channel = MagicMock()
+
+    node = MagicMock()
+    node.id = "llm-node-id"
+    node.node_type = NodeType.LLM
+    node.model_instance = object()
+    node.graph_runtime_state = MagicMock()
+    node.graph_runtime_state.stop_event = stop_event
+
+    with patch("core.app.workflow.layers.llm_quota.ensure_llm_quota_available", autospec=True) as mock_check:
+        layer.on_node_run_start(node)
+
+    assert not stop_event.is_set()
+    mock_check.assert_called_once_with(model_instance=node.model_instance)
+    layer.command_channel.send_command.assert_not_called()