feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: twwu <twwu@dify.ai>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: quicksand <quicksandzn@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Hanqing Zhao <sherry9277@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
-LAN-
2025-09-18 12:49:10 +08:00
committed by GitHub
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions

View File

@@ -3,7 +3,6 @@ import time
import uuid
from collections.abc import Callable, Generator, Mapping, Sequence
from typing import Any, cast
from uuid import uuid4
from sqlalchemy import exists, select
from sqlalchemy.orm import Session, sessionmaker
@@ -15,22 +14,20 @@ from core.file import File
from core.repositories import DifyCoreRepositoryFactory
from core.variables import Variable
from core.variables.variables import VariableUnion
from core.workflow.entities.node_entities import NodeRunResult
from core.workflow.entities.variable_pool import VariablePool
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution, WorkflowNodeExecutionStatus
from core.workflow.entities import VariablePool, WorkflowNodeExecution
from core.workflow.enums import ErrorStrategy, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus
from core.workflow.errors import WorkflowNodeRunFailedError
from core.workflow.graph_engine.entities.event import InNodeEvent
from core.workflow.graph_events import GraphNodeEventBase, NodeRunFailedEvent, NodeRunSucceededEvent
from core.workflow.node_events import NodeRunResult
from core.workflow.nodes import NodeType
from core.workflow.nodes.base.node import BaseNode
from core.workflow.nodes.enums import ErrorStrategy
from core.workflow.nodes.event import RunCompletedEvent
from core.workflow.nodes.event.types import NodeEvent
from core.workflow.nodes.base.node import Node
from core.workflow.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING
from core.workflow.nodes.start.entities import StartNodeData
from core.workflow.system_variable import SystemVariable
from core.workflow.workflow_entry import WorkflowEntry
from events.app_event import app_draft_workflow_was_synced, app_published_workflow_was_updated
from extensions.ext_database import db
from extensions.ext_storage import storage
from factories.file_factory import build_from_mapping, build_from_mappings
from libs.datetime_utils import naive_utc_now
from models.account import Account
@@ -276,12 +273,13 @@ class WorkflowService:
type=draft_workflow.type,
version=Workflow.version_from_datetime(naive_utc_now()),
graph=draft_workflow.graph,
features=draft_workflow.features,
created_by=account.id,
environment_variables=draft_workflow.environment_variables,
conversation_variables=draft_workflow.conversation_variables,
marked_name=marked_name,
marked_comment=marked_comment,
rag_pipeline_variables=draft_workflow.rag_pipeline_variables,
features=draft_workflow.features,
)
# commit db session changes
@@ -565,12 +563,12 @@ class WorkflowService:
# This will prevent validation errors from breaking the workflow
return []
def get_default_block_configs(self) -> list[dict]:
def get_default_block_configs(self) -> Sequence[Mapping[str, object]]:
"""
Get default block configs
"""
# return default block config
default_block_configs = []
default_block_configs: list[Mapping[str, object]] = []
for node_class_mapping in NODE_TYPE_CLASSES_MAPPING.values():
node_class = node_class_mapping[LATEST_VERSION]
default_config = node_class.get_default_config()
@@ -579,7 +577,9 @@ class WorkflowService:
return default_block_configs
def get_default_block_config(self, node_type: str, filters: dict | None = None) -> dict | None:
def get_default_block_config(
self, node_type: str, filters: Mapping[str, object] | None = None
) -> Mapping[str, object]:
"""
Get default config of node.
:param node_type: node type
@@ -590,12 +590,12 @@ class WorkflowService:
# return default block config
if node_type_enum not in NODE_TYPE_CLASSES_MAPPING:
return None
return {}
node_class = NODE_TYPE_CLASSES_MAPPING[node_type_enum][LATEST_VERSION]
default_config = node_class.get_default_config(filters=filters)
if not default_config:
return None
return {}
return default_config
@@ -677,7 +677,7 @@ class WorkflowService:
# run draft workflow node
start_at = time.perf_counter()
node_execution = self._handle_node_run_result(
node_execution = self._handle_single_step_result(
invoke_node_fn=lambda: run,
start_at=start_at,
node_id=node_id,
@@ -699,6 +699,9 @@ class WorkflowService:
if workflow_node_execution is None:
raise ValueError(f"WorkflowNodeExecution with id {node_execution.id} not found after saving")
with Session(db.engine) as session:
outputs = workflow_node_execution.load_full_outputs(session, storage)
with Session(bind=db.engine) as session, session.begin():
draft_var_saver = DraftVariableSaver(
session=session,
@@ -707,8 +710,9 @@ class WorkflowService:
node_type=NodeType(workflow_node_execution.node_type),
enclosing_node_id=enclosing_node_id,
node_execution_id=node_execution.id,
user=account,
)
draft_var_saver.save(process_data=node_execution.process_data, outputs=node_execution.outputs)
draft_var_saver.save(process_data=node_execution.process_data, outputs=outputs)
session.commit()
return workflow_node_execution
@@ -722,7 +726,7 @@ class WorkflowService:
# run free workflow node
start_at = time.perf_counter()
node_execution = self._handle_node_run_result(
node_execution = self._handle_single_step_result(
invoke_node_fn=lambda: WorkflowEntry.run_free_node(
node_id=node_id,
node_data=node_data,
@@ -736,103 +740,131 @@ class WorkflowService:
return node_execution
def _handle_node_run_result(
def _handle_single_step_result(
self,
invoke_node_fn: Callable[[], tuple[BaseNode, Generator[NodeEvent | InNodeEvent, None, None]]],
invoke_node_fn: Callable[[], tuple[Node, Generator[GraphNodeEventBase, None, None]]],
start_at: float,
node_id: str,
) -> WorkflowNodeExecution:
try:
node, node_events = invoke_node_fn()
"""
Handle single step execution and return WorkflowNodeExecution.
node_run_result: NodeRunResult | None = None
for event in node_events:
if isinstance(event, RunCompletedEvent):
node_run_result = event.run_result
Args:
invoke_node_fn: Function to invoke node execution
start_at: Execution start time
node_id: ID of the node being executed
# sign output files
# node_run_result.outputs = WorkflowEntry.handle_special_values(node_run_result.outputs)
break
Returns:
WorkflowNodeExecution: The execution result
"""
node, node_run_result, run_succeeded, error = self._execute_node_safely(invoke_node_fn)
if not node_run_result:
raise ValueError("Node run failed with no run result")
# single step debug mode error handling return
if node_run_result.status == WorkflowNodeExecutionStatus.FAILED and node.continue_on_error:
node_error_args: dict[str, Any] = {
"status": WorkflowNodeExecutionStatus.EXCEPTION,
"error": node_run_result.error,
"inputs": node_run_result.inputs,
"metadata": {"error_strategy": node.error_strategy},
}
if node.error_strategy is ErrorStrategy.DEFAULT_VALUE:
node_run_result = NodeRunResult(
**node_error_args,
outputs={
**node.default_value_dict,
"error_message": node_run_result.error,
"error_type": node_run_result.error_type,
},
)
else:
node_run_result = NodeRunResult(
**node_error_args,
outputs={
"error_message": node_run_result.error,
"error_type": node_run_result.error_type,
},
)
run_succeeded = node_run_result.status in (
WorkflowNodeExecutionStatus.SUCCEEDED,
WorkflowNodeExecutionStatus.EXCEPTION,
)
error = node_run_result.error if not run_succeeded else None
except WorkflowNodeRunFailedError as e:
node = e.node
run_succeeded = False
node_run_result = None
error = e.error
# Create a NodeExecution domain model
# Create base node execution
node_execution = WorkflowNodeExecution(
id=str(uuid4()),
workflow_id="", # This is a single-step execution, so no workflow ID
id=str(uuid.uuid4()),
workflow_id="", # Single-step execution has no workflow ID
index=1,
node_id=node_id,
node_type=node.type_,
node_type=node.node_type,
title=node.title,
elapsed_time=time.perf_counter() - start_at,
created_at=naive_utc_now(),
finished_at=naive_utc_now(),
)
# Populate execution result data
self._populate_execution_result(node_execution, node_run_result, run_succeeded, error)
return node_execution
def _execute_node_safely(
self, invoke_node_fn: Callable[[], tuple[Node, Generator[GraphNodeEventBase, None, None]]]
) -> tuple[Node, NodeRunResult | None, bool, str | None]:
"""
Execute node safely and handle errors according to error strategy.
Returns:
Tuple of (node, node_run_result, run_succeeded, error)
"""
try:
node, node_events = invoke_node_fn()
node_run_result = next(
(
event.node_run_result
for event in node_events
if isinstance(event, (NodeRunSucceededEvent, NodeRunFailedEvent))
),
None,
)
if not node_run_result:
raise ValueError("Node execution failed - no result returned")
# Apply error strategy if node failed
if node_run_result.status == WorkflowNodeExecutionStatus.FAILED and node.error_strategy:
node_run_result = self._apply_error_strategy(node, node_run_result)
run_succeeded = node_run_result.status in (
WorkflowNodeExecutionStatus.SUCCEEDED,
WorkflowNodeExecutionStatus.EXCEPTION,
)
error = node_run_result.error if not run_succeeded else None
return node, node_run_result, run_succeeded, error
except WorkflowNodeRunFailedError as e:
node = e.node
run_succeeded = False
node_run_result = None
error = e.error
return node, node_run_result, run_succeeded, error
def _apply_error_strategy(self, node: Node, node_run_result: NodeRunResult) -> NodeRunResult:
"""Apply error strategy when node execution fails."""
# TODO(Novice): Maybe we should apply error strategy to node level?
error_outputs = {
"error_message": node_run_result.error,
"error_type": node_run_result.error_type,
}
# Add default values if strategy is DEFAULT_VALUE
if node.error_strategy is ErrorStrategy.DEFAULT_VALUE:
error_outputs.update(node.default_value_dict)
return NodeRunResult(
status=WorkflowNodeExecutionStatus.EXCEPTION,
error=node_run_result.error,
inputs=node_run_result.inputs,
metadata={WorkflowNodeExecutionMetadataKey.ERROR_STRATEGY: node.error_strategy},
outputs=error_outputs,
)
def _populate_execution_result(
self,
node_execution: WorkflowNodeExecution,
node_run_result: NodeRunResult | None,
run_succeeded: bool,
error: str | None,
) -> None:
"""Populate node execution with result data."""
if run_succeeded and node_run_result:
# Set inputs, process_data, and outputs as dictionaries (not JSON strings)
inputs = WorkflowEntry.handle_special_values(node_run_result.inputs) if node_run_result.inputs else None
process_data = (
node_execution.inputs = (
WorkflowEntry.handle_special_values(node_run_result.inputs) if node_run_result.inputs else None
)
node_execution.process_data = (
WorkflowEntry.handle_special_values(node_run_result.process_data)
if node_run_result.process_data
else None
)
outputs = node_run_result.outputs
node_execution.inputs = inputs
node_execution.process_data = process_data
node_execution.outputs = outputs
node_execution.outputs = node_run_result.outputs
node_execution.metadata = node_run_result.metadata
# Map status from WorkflowNodeExecutionStatus to NodeExecutionStatus
if node_run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED:
node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED
elif node_run_result.status == WorkflowNodeExecutionStatus.EXCEPTION:
node_execution.status = WorkflowNodeExecutionStatus.EXCEPTION
# Set status and error based on result
node_execution.status = node_run_result.status
if node_run_result.status == WorkflowNodeExecutionStatus.EXCEPTION:
node_execution.error = node_run_result.error
else:
# Set failed status and error
node_execution.status = WorkflowNodeExecutionStatus.FAILED
node_execution.error = error
return node_execution
def convert_to_workflow(self, app_model: App, account: Account, args: dict) -> App:
"""
Basic mode of chatbot app(expert mode) to workflow