mirror of
https://github.com/langgenius/dify.git
synced 2026-01-08 07:14:14 +00:00
feat: knowledge pipeline (#25360)
Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com> Co-authored-by: quicksand <quicksandzn@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Hanqing Zhao <sherry9277@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
@@ -3,7 +3,6 @@ import time
|
||||
import uuid
|
||||
from collections.abc import Callable, Generator, Mapping, Sequence
|
||||
from typing import Any, cast
|
||||
from uuid import uuid4
|
||||
|
||||
from sqlalchemy import exists, select
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
@@ -15,22 +14,20 @@ from core.file import File
|
||||
from core.repositories import DifyCoreRepositoryFactory
|
||||
from core.variables import Variable
|
||||
from core.variables.variables import VariableUnion
|
||||
from core.workflow.entities.node_entities import NodeRunResult
|
||||
from core.workflow.entities.variable_pool import VariablePool
|
||||
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution, WorkflowNodeExecutionStatus
|
||||
from core.workflow.entities import VariablePool, WorkflowNodeExecution
|
||||
from core.workflow.enums import ErrorStrategy, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus
|
||||
from core.workflow.errors import WorkflowNodeRunFailedError
|
||||
from core.workflow.graph_engine.entities.event import InNodeEvent
|
||||
from core.workflow.graph_events import GraphNodeEventBase, NodeRunFailedEvent, NodeRunSucceededEvent
|
||||
from core.workflow.node_events import NodeRunResult
|
||||
from core.workflow.nodes import NodeType
|
||||
from core.workflow.nodes.base.node import BaseNode
|
||||
from core.workflow.nodes.enums import ErrorStrategy
|
||||
from core.workflow.nodes.event import RunCompletedEvent
|
||||
from core.workflow.nodes.event.types import NodeEvent
|
||||
from core.workflow.nodes.base.node import Node
|
||||
from core.workflow.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING
|
||||
from core.workflow.nodes.start.entities import StartNodeData
|
||||
from core.workflow.system_variable import SystemVariable
|
||||
from core.workflow.workflow_entry import WorkflowEntry
|
||||
from events.app_event import app_draft_workflow_was_synced, app_published_workflow_was_updated
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_storage import storage
|
||||
from factories.file_factory import build_from_mapping, build_from_mappings
|
||||
from libs.datetime_utils import naive_utc_now
|
||||
from models.account import Account
|
||||
@@ -276,12 +273,13 @@ class WorkflowService:
|
||||
type=draft_workflow.type,
|
||||
version=Workflow.version_from_datetime(naive_utc_now()),
|
||||
graph=draft_workflow.graph,
|
||||
features=draft_workflow.features,
|
||||
created_by=account.id,
|
||||
environment_variables=draft_workflow.environment_variables,
|
||||
conversation_variables=draft_workflow.conversation_variables,
|
||||
marked_name=marked_name,
|
||||
marked_comment=marked_comment,
|
||||
rag_pipeline_variables=draft_workflow.rag_pipeline_variables,
|
||||
features=draft_workflow.features,
|
||||
)
|
||||
|
||||
# commit db session changes
|
||||
@@ -565,12 +563,12 @@ class WorkflowService:
|
||||
# This will prevent validation errors from breaking the workflow
|
||||
return []
|
||||
|
||||
def get_default_block_configs(self) -> list[dict]:
|
||||
def get_default_block_configs(self) -> Sequence[Mapping[str, object]]:
|
||||
"""
|
||||
Get default block configs
|
||||
"""
|
||||
# return default block config
|
||||
default_block_configs = []
|
||||
default_block_configs: list[Mapping[str, object]] = []
|
||||
for node_class_mapping in NODE_TYPE_CLASSES_MAPPING.values():
|
||||
node_class = node_class_mapping[LATEST_VERSION]
|
||||
default_config = node_class.get_default_config()
|
||||
@@ -579,7 +577,9 @@ class WorkflowService:
|
||||
|
||||
return default_block_configs
|
||||
|
||||
def get_default_block_config(self, node_type: str, filters: dict | None = None) -> dict | None:
|
||||
def get_default_block_config(
|
||||
self, node_type: str, filters: Mapping[str, object] | None = None
|
||||
) -> Mapping[str, object]:
|
||||
"""
|
||||
Get default config of node.
|
||||
:param node_type: node type
|
||||
@@ -590,12 +590,12 @@ class WorkflowService:
|
||||
|
||||
# return default block config
|
||||
if node_type_enum not in NODE_TYPE_CLASSES_MAPPING:
|
||||
return None
|
||||
return {}
|
||||
|
||||
node_class = NODE_TYPE_CLASSES_MAPPING[node_type_enum][LATEST_VERSION]
|
||||
default_config = node_class.get_default_config(filters=filters)
|
||||
if not default_config:
|
||||
return None
|
||||
return {}
|
||||
|
||||
return default_config
|
||||
|
||||
@@ -677,7 +677,7 @@ class WorkflowService:
|
||||
|
||||
# run draft workflow node
|
||||
start_at = time.perf_counter()
|
||||
node_execution = self._handle_node_run_result(
|
||||
node_execution = self._handle_single_step_result(
|
||||
invoke_node_fn=lambda: run,
|
||||
start_at=start_at,
|
||||
node_id=node_id,
|
||||
@@ -699,6 +699,9 @@ class WorkflowService:
|
||||
if workflow_node_execution is None:
|
||||
raise ValueError(f"WorkflowNodeExecution with id {node_execution.id} not found after saving")
|
||||
|
||||
with Session(db.engine) as session:
|
||||
outputs = workflow_node_execution.load_full_outputs(session, storage)
|
||||
|
||||
with Session(bind=db.engine) as session, session.begin():
|
||||
draft_var_saver = DraftVariableSaver(
|
||||
session=session,
|
||||
@@ -707,8 +710,9 @@ class WorkflowService:
|
||||
node_type=NodeType(workflow_node_execution.node_type),
|
||||
enclosing_node_id=enclosing_node_id,
|
||||
node_execution_id=node_execution.id,
|
||||
user=account,
|
||||
)
|
||||
draft_var_saver.save(process_data=node_execution.process_data, outputs=node_execution.outputs)
|
||||
draft_var_saver.save(process_data=node_execution.process_data, outputs=outputs)
|
||||
session.commit()
|
||||
|
||||
return workflow_node_execution
|
||||
@@ -722,7 +726,7 @@ class WorkflowService:
|
||||
# run free workflow node
|
||||
start_at = time.perf_counter()
|
||||
|
||||
node_execution = self._handle_node_run_result(
|
||||
node_execution = self._handle_single_step_result(
|
||||
invoke_node_fn=lambda: WorkflowEntry.run_free_node(
|
||||
node_id=node_id,
|
||||
node_data=node_data,
|
||||
@@ -736,103 +740,131 @@ class WorkflowService:
|
||||
|
||||
return node_execution
|
||||
|
||||
def _handle_node_run_result(
|
||||
def _handle_single_step_result(
|
||||
self,
|
||||
invoke_node_fn: Callable[[], tuple[BaseNode, Generator[NodeEvent | InNodeEvent, None, None]]],
|
||||
invoke_node_fn: Callable[[], tuple[Node, Generator[GraphNodeEventBase, None, None]]],
|
||||
start_at: float,
|
||||
node_id: str,
|
||||
) -> WorkflowNodeExecution:
|
||||
try:
|
||||
node, node_events = invoke_node_fn()
|
||||
"""
|
||||
Handle single step execution and return WorkflowNodeExecution.
|
||||
|
||||
node_run_result: NodeRunResult | None = None
|
||||
for event in node_events:
|
||||
if isinstance(event, RunCompletedEvent):
|
||||
node_run_result = event.run_result
|
||||
Args:
|
||||
invoke_node_fn: Function to invoke node execution
|
||||
start_at: Execution start time
|
||||
node_id: ID of the node being executed
|
||||
|
||||
# sign output files
|
||||
# node_run_result.outputs = WorkflowEntry.handle_special_values(node_run_result.outputs)
|
||||
break
|
||||
Returns:
|
||||
WorkflowNodeExecution: The execution result
|
||||
"""
|
||||
node, node_run_result, run_succeeded, error = self._execute_node_safely(invoke_node_fn)
|
||||
|
||||
if not node_run_result:
|
||||
raise ValueError("Node run failed with no run result")
|
||||
# single step debug mode error handling return
|
||||
if node_run_result.status == WorkflowNodeExecutionStatus.FAILED and node.continue_on_error:
|
||||
node_error_args: dict[str, Any] = {
|
||||
"status": WorkflowNodeExecutionStatus.EXCEPTION,
|
||||
"error": node_run_result.error,
|
||||
"inputs": node_run_result.inputs,
|
||||
"metadata": {"error_strategy": node.error_strategy},
|
||||
}
|
||||
if node.error_strategy is ErrorStrategy.DEFAULT_VALUE:
|
||||
node_run_result = NodeRunResult(
|
||||
**node_error_args,
|
||||
outputs={
|
||||
**node.default_value_dict,
|
||||
"error_message": node_run_result.error,
|
||||
"error_type": node_run_result.error_type,
|
||||
},
|
||||
)
|
||||
else:
|
||||
node_run_result = NodeRunResult(
|
||||
**node_error_args,
|
||||
outputs={
|
||||
"error_message": node_run_result.error,
|
||||
"error_type": node_run_result.error_type,
|
||||
},
|
||||
)
|
||||
run_succeeded = node_run_result.status in (
|
||||
WorkflowNodeExecutionStatus.SUCCEEDED,
|
||||
WorkflowNodeExecutionStatus.EXCEPTION,
|
||||
)
|
||||
error = node_run_result.error if not run_succeeded else None
|
||||
except WorkflowNodeRunFailedError as e:
|
||||
node = e.node
|
||||
run_succeeded = False
|
||||
node_run_result = None
|
||||
error = e.error
|
||||
|
||||
# Create a NodeExecution domain model
|
||||
# Create base node execution
|
||||
node_execution = WorkflowNodeExecution(
|
||||
id=str(uuid4()),
|
||||
workflow_id="", # This is a single-step execution, so no workflow ID
|
||||
id=str(uuid.uuid4()),
|
||||
workflow_id="", # Single-step execution has no workflow ID
|
||||
index=1,
|
||||
node_id=node_id,
|
||||
node_type=node.type_,
|
||||
node_type=node.node_type,
|
||||
title=node.title,
|
||||
elapsed_time=time.perf_counter() - start_at,
|
||||
created_at=naive_utc_now(),
|
||||
finished_at=naive_utc_now(),
|
||||
)
|
||||
|
||||
# Populate execution result data
|
||||
self._populate_execution_result(node_execution, node_run_result, run_succeeded, error)
|
||||
|
||||
return node_execution
|
||||
|
||||
def _execute_node_safely(
|
||||
self, invoke_node_fn: Callable[[], tuple[Node, Generator[GraphNodeEventBase, None, None]]]
|
||||
) -> tuple[Node, NodeRunResult | None, bool, str | None]:
|
||||
"""
|
||||
Execute node safely and handle errors according to error strategy.
|
||||
|
||||
Returns:
|
||||
Tuple of (node, node_run_result, run_succeeded, error)
|
||||
"""
|
||||
try:
|
||||
node, node_events = invoke_node_fn()
|
||||
node_run_result = next(
|
||||
(
|
||||
event.node_run_result
|
||||
for event in node_events
|
||||
if isinstance(event, (NodeRunSucceededEvent, NodeRunFailedEvent))
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
if not node_run_result:
|
||||
raise ValueError("Node execution failed - no result returned")
|
||||
|
||||
# Apply error strategy if node failed
|
||||
if node_run_result.status == WorkflowNodeExecutionStatus.FAILED and node.error_strategy:
|
||||
node_run_result = self._apply_error_strategy(node, node_run_result)
|
||||
|
||||
run_succeeded = node_run_result.status in (
|
||||
WorkflowNodeExecutionStatus.SUCCEEDED,
|
||||
WorkflowNodeExecutionStatus.EXCEPTION,
|
||||
)
|
||||
error = node_run_result.error if not run_succeeded else None
|
||||
return node, node_run_result, run_succeeded, error
|
||||
except WorkflowNodeRunFailedError as e:
|
||||
node = e.node
|
||||
run_succeeded = False
|
||||
node_run_result = None
|
||||
error = e.error
|
||||
return node, node_run_result, run_succeeded, error
|
||||
|
||||
def _apply_error_strategy(self, node: Node, node_run_result: NodeRunResult) -> NodeRunResult:
|
||||
"""Apply error strategy when node execution fails."""
|
||||
# TODO(Novice): Maybe we should apply error strategy to node level?
|
||||
error_outputs = {
|
||||
"error_message": node_run_result.error,
|
||||
"error_type": node_run_result.error_type,
|
||||
}
|
||||
|
||||
# Add default values if strategy is DEFAULT_VALUE
|
||||
if node.error_strategy is ErrorStrategy.DEFAULT_VALUE:
|
||||
error_outputs.update(node.default_value_dict)
|
||||
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.EXCEPTION,
|
||||
error=node_run_result.error,
|
||||
inputs=node_run_result.inputs,
|
||||
metadata={WorkflowNodeExecutionMetadataKey.ERROR_STRATEGY: node.error_strategy},
|
||||
outputs=error_outputs,
|
||||
)
|
||||
|
||||
def _populate_execution_result(
|
||||
self,
|
||||
node_execution: WorkflowNodeExecution,
|
||||
node_run_result: NodeRunResult | None,
|
||||
run_succeeded: bool,
|
||||
error: str | None,
|
||||
) -> None:
|
||||
"""Populate node execution with result data."""
|
||||
if run_succeeded and node_run_result:
|
||||
# Set inputs, process_data, and outputs as dictionaries (not JSON strings)
|
||||
inputs = WorkflowEntry.handle_special_values(node_run_result.inputs) if node_run_result.inputs else None
|
||||
process_data = (
|
||||
node_execution.inputs = (
|
||||
WorkflowEntry.handle_special_values(node_run_result.inputs) if node_run_result.inputs else None
|
||||
)
|
||||
node_execution.process_data = (
|
||||
WorkflowEntry.handle_special_values(node_run_result.process_data)
|
||||
if node_run_result.process_data
|
||||
else None
|
||||
)
|
||||
outputs = node_run_result.outputs
|
||||
|
||||
node_execution.inputs = inputs
|
||||
node_execution.process_data = process_data
|
||||
node_execution.outputs = outputs
|
||||
node_execution.outputs = node_run_result.outputs
|
||||
node_execution.metadata = node_run_result.metadata
|
||||
|
||||
# Map status from WorkflowNodeExecutionStatus to NodeExecutionStatus
|
||||
if node_run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED:
|
||||
node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED
|
||||
elif node_run_result.status == WorkflowNodeExecutionStatus.EXCEPTION:
|
||||
node_execution.status = WorkflowNodeExecutionStatus.EXCEPTION
|
||||
# Set status and error based on result
|
||||
node_execution.status = node_run_result.status
|
||||
if node_run_result.status == WorkflowNodeExecutionStatus.EXCEPTION:
|
||||
node_execution.error = node_run_result.error
|
||||
else:
|
||||
# Set failed status and error
|
||||
node_execution.status = WorkflowNodeExecutionStatus.FAILED
|
||||
node_execution.error = error
|
||||
|
||||
return node_execution
|
||||
|
||||
def convert_to_workflow(self, app_model: App, account: Account, args: dict) -> App:
|
||||
"""
|
||||
Basic mode of chatbot app(expert mode) to workflow
|
||||
|
||||
Reference in New Issue
Block a user