feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: twwu <twwu@dify.ai>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: quicksand <quicksandzn@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Hanqing Zhao <sherry9277@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
-LAN-
2025-09-18 12:49:10 +08:00
committed by GitHub
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions

View File

@@ -1,5 +1,6 @@
from collections.abc import Mapping
from datetime import datetime
from typing import Any, Literal, Optional
from typing import Any, Literal
from pydantic import BaseModel, Field, field_validator
@@ -16,10 +17,10 @@ class ToolApiEntity(BaseModel):
description: I18nObject
parameters: list[ToolParameter] | None = None
labels: list[str] = Field(default_factory=list)
output_schema: dict | None = None
output_schema: Mapping[str, object] = Field(default_factory=dict)
ToolProviderTypeApiLiteral = Optional[Literal["builtin", "api", "workflow", "mcp"]]
ToolProviderTypeApiLiteral = Literal["builtin", "api", "workflow", "mcp"] | None
class ToolProviderApiEntity(BaseModel):
@@ -27,17 +28,17 @@ class ToolProviderApiEntity(BaseModel):
author: str
name: str # identifier
description: I18nObject
icon: str | dict
icon_dark: str | dict | None = Field(default=None, description="The dark icon of the tool")
icon: str | Mapping[str, str]
icon_dark: str | Mapping[str, str] = ""
label: I18nObject # label
type: ToolProviderType
masked_credentials: dict | None = None
original_credentials: dict | None = None
masked_credentials: Mapping[str, object] = Field(default_factory=dict)
original_credentials: Mapping[str, object] = Field(default_factory=dict)
is_team_authorization: bool = False
allow_delete: bool = True
plugin_id: str | None = Field(default="", description="The plugin id of the tool")
plugin_unique_identifier: str | None = Field(default="", description="The unique identifier of the tool")
tools: list[ToolApiEntity] = Field(default_factory=list)
tools: list[ToolApiEntity] = Field(default_factory=list[ToolApiEntity])
labels: list[str] = Field(default_factory=list)
# MCP
server_url: str | None = Field(default="", description="The server url of the tool")
@@ -105,7 +106,7 @@ class ToolProviderCredentialApiEntity(BaseModel):
is_default: bool = Field(
default=False, description="Whether the credential is the default credential for the provider in the workspace"
)
credentials: dict = Field(description="The credentials of the provider")
credentials: Mapping[str, object] = Field(description="The credentials of the provider", default_factory=dict)
class ToolProviderCredentialInfoApiEntity(BaseModel):

View File

@@ -22,22 +22,23 @@ from core.tools.entities.constants import TOOL_SELECTOR_MODEL_IDENTITY
class ToolLabelEnum(StrEnum):
SEARCH = auto()
IMAGE = auto()
VIDEOS = auto()
WEATHER = auto()
FINANCE = auto()
DESIGN = auto()
TRAVEL = auto()
SOCIAL = auto()
NEWS = auto()
MEDICAL = auto()
PRODUCTIVITY = auto()
EDUCATION = auto()
BUSINESS = auto()
ENTERTAINMENT = auto()
UTILITIES = auto()
OTHER = auto()
SEARCH = "search"
IMAGE = "image"
VIDEOS = "videos"
WEATHER = "weather"
FINANCE = "finance"
DESIGN = "design"
TRAVEL = "travel"
SOCIAL = "social"
NEWS = "news"
MEDICAL = "medical"
PRODUCTIVITY = "productivity"
EDUCATION = "education"
BUSINESS = "business"
ENTERTAINMENT = "entertainment"
UTILITIES = "utilities"
RAG = "rag"
OTHER = "other"
class ToolProviderType(StrEnum):
@@ -186,7 +187,7 @@ class ToolInvokeMessage(BaseModel):
error: str | None = Field(default=None, description="The error message")
status: LogStatus = Field(..., description="The status of the log")
data: Mapping[str, Any] = Field(..., description="Detailed log data")
metadata: Mapping[str, Any] | None = Field(default=None, description="The metadata of the log")
metadata: Mapping[str, Any] = Field(default_factory=dict, description="The metadata of the log")
class RetrieverResourceMessage(BaseModel):
retriever_resources: list[RetrievalSourceMetadata] = Field(..., description="retriever resources")
@@ -362,9 +363,9 @@ class ToolDescription(BaseModel):
class ToolEntity(BaseModel):
identity: ToolIdentity
parameters: list[ToolParameter] = Field(default_factory=list)
parameters: list[ToolParameter] = Field(default_factory=list[ToolParameter])
description: ToolDescription | None = None
output_schema: dict | None = None
output_schema: Mapping[str, object] = Field(default_factory=dict)
has_runtime_parameters: bool = Field(default=False, description="Whether the tool has runtime parameters")
# pydantic configs
@@ -377,21 +378,23 @@ class ToolEntity(BaseModel):
class OAuthSchema(BaseModel):
client_schema: list[ProviderConfig] = Field(default_factory=list, description="The schema of the OAuth client")
client_schema: list[ProviderConfig] = Field(
default_factory=list[ProviderConfig], description="The schema of the OAuth client"
)
credentials_schema: list[ProviderConfig] = Field(
default_factory=list, description="The schema of the OAuth credentials"
default_factory=list[ProviderConfig], description="The schema of the OAuth credentials"
)
class ToolProviderEntity(BaseModel):
identity: ToolProviderIdentity
plugin_id: str | None = None
credentials_schema: list[ProviderConfig] = Field(default_factory=list)
credentials_schema: list[ProviderConfig] = Field(default_factory=list[ProviderConfig])
oauth_schema: OAuthSchema | None = None
class ToolProviderEntityWithPlugin(ToolProviderEntity):
tools: list[ToolEntity] = Field(default_factory=list)
tools: list[ToolEntity] = Field(default_factory=list[ToolEntity])
class WorkflowToolParameterConfiguration(BaseModel):
@@ -502,9 +505,9 @@ class CredentialType(StrEnum):
@classmethod
def of(cls, credential_type: str) -> "CredentialType":
type_name = credential_type.lower()
if type_name == "api-key":
if type_name in {"api-key", "api_key"}:
return cls.API_KEY
elif type_name == "oauth2":
elif type_name in {"oauth2", "oauth"}:
return cls.OAUTH2
else:
raise ValueError(f"Invalid credential type: {credential_type}")

View File

@@ -49,6 +49,9 @@ ICONS = {
</svg>""", # noqa: E501
ToolLabelEnum.OTHER: """<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16" fill="none">
<path d="M8.00052 0.666748L4.00065 7.33342H12.0007L8.00052 0.666748ZM8.00052 3.25828L9.64572 6.00008H6.35553L8.00052 3.25828ZM4.50065 13.3334C3.48813 13.3334 2.66732 12.5126 2.66732 11.5001C2.66732 10.4875 3.48813 9.66675 4.50065 9.66675C5.51317 9.66675 6.33398 10.4875 6.33398 11.5001C6.33398 12.5126 5.51317 13.3334 4.50065 13.3334ZM4.50065 14.6667C6.24955 14.6667 7.66732 13.249 7.66732 11.5001C7.66732 9.75115 6.24955 8.33342 4.50065 8.33342C2.75175 8.33342 1.33398 9.75115 1.33398 11.5001C1.33398 13.249 2.75175 14.6667 4.50065 14.6667ZM10.0007 10.3334V13.0001H12.6673V10.3334H10.0007ZM8.66732 14.3334V9.00008H14.0007V14.3334H8.66732Z" fill="#344054"/>
</svg>""", # noqa: E501
ToolLabelEnum.RAG: """<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16" fill="none">
<path d="M8.00065 1.3335H9.33398V2.66683H8.00065V1.3335ZM5.33398 1.3335H6.66732V2.66683H5.33398V1.3335ZM3.99935 2.66683C3.99935 2.29864 4.29783 2.00016 4.66602 2.00016H12.3327C12.7009 2.00016 13.0007 2.29864 13.0007 2.66683V13.3335C13.0007 13.7017 12.7009 14.0002 12.3327 14.0002H4.66602C4.29783 14.0002 3.99935 13.7017 3.99935 13.3335V2.66683ZM4.66602 12.6668C4.29783 12.6668 3.99935 12.3683 3.99935 12.0002V10.6668H5.33398V12.0002C5.33398 12.3683 5.0355 12.6668 4.66602 12.6668ZM5.33398 8.66683H6.66732V10.0002H5.33398V8.66683ZM5.33398 6.66683H6.66732V8.00016H5.33398V6.66683ZM3.99935 4.66683H6.66602V6.00016H3.99935V4.66683ZM6.66602 1.3335H12.3327V2.66683H6.66602V1.3335Z" fill="#344054"/>
</svg>""", # noqa: E501
}
@@ -105,6 +108,9 @@ default_tool_label_dict = {
ToolLabelEnum.OTHER: ToolLabel(
name="other", label=I18nObject(en_US="Other", zh_Hans="其他"), icon=ICONS[ToolLabelEnum.OTHER]
),
ToolLabelEnum.RAG: ToolLabel(
name="rag", label=I18nObject(en_US="RAG", zh_Hans="RAG"), icon=ICONS[ToolLabelEnum.RAG]
),
}
default_tool_labels = list(default_tool_label_dict.values())

View File

@@ -72,7 +72,6 @@ class MCPToolProviderController(ToolProviderController):
),
llm=remote_mcp_tool.description or "",
),
output_schema=None,
has_runtime_parameters=len(remote_mcp_tool.inputSchema) > 0,
)
for remote_mcp_tool in remote_mcp_tools

View File

@@ -152,7 +152,6 @@ class ToolEngine:
user_id: str,
workflow_tool_callback: DifyWorkflowCallbackHandler,
workflow_call_depth: int,
thread_pool_id: str | None = None,
conversation_id: str | None = None,
app_id: str | None = None,
message_id: str | None = None,
@@ -166,7 +165,6 @@ class ToolEngine:
if isinstance(tool, WorkflowTool):
tool.workflow_call_depth = workflow_call_depth + 1
tool.thread_pool_id = thread_pool_id
if tool.runtime and tool.runtime.runtime_parameters:
tool_parameters = {**tool.runtime.runtime_parameters, **tool_parameters}

View File

@@ -5,7 +5,7 @@ import time
from collections.abc import Generator, Mapping
from os import listdir, path
from threading import Lock
from typing import TYPE_CHECKING, Any, Literal, Union, cast
from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
import sqlalchemy as sa
from pydantic import TypeAdapter
@@ -14,32 +14,17 @@ from sqlalchemy.orm import Session
from yarl import URL
import contexts
from core.helper.provider_cache import ToolProviderCredentialsCache
from core.plugin.entities.plugin import ToolProviderID
from core.plugin.impl.oauth import OAuthHandler
from core.plugin.impl.tool import PluginToolManager
from core.tools.__base.tool_provider import ToolProviderController
from core.tools.__base.tool_runtime import ToolRuntime
from core.tools.mcp_tool.provider import MCPToolProviderController
from core.tools.mcp_tool.tool import MCPTool
from core.tools.plugin_tool.provider import PluginToolProviderController
from core.tools.plugin_tool.tool import PluginTool
from core.tools.utils.uuid_utils import is_valid_uuid
from core.tools.workflow_as_tool.provider import WorkflowToolProviderController
from core.workflow.entities.variable_pool import VariablePool
from services.enterprise.plugin_manager_service import PluginCredentialType
from services.tools.mcp_tools_manage_service import MCPToolManageService
if TYPE_CHECKING:
from core.workflow.nodes.tool.entities import ToolEntity
from configs import dify_config
from core.agent.entities import AgentToolEntity
from core.app.entities.app_invoke_entities import InvokeFrom
from core.helper.module_import_helper import load_single_subclass_from_source
from core.helper.position_helper import is_filtered
from core.helper.provider_cache import ToolProviderCredentialsCache
from core.model_runtime.utils.encoders import jsonable_encoder
from core.plugin.impl.tool import PluginToolManager
from core.tools.__base.tool import Tool
from core.tools.__base.tool_provider import ToolProviderController
from core.tools.__base.tool_runtime import ToolRuntime
from core.tools.builtin_tool.provider import BuiltinToolProviderController
from core.tools.builtin_tool.providers._positions import BuiltinToolProviderSort
from core.tools.builtin_tool.tool import BuiltinTool
@@ -55,14 +40,27 @@ from core.tools.entities.tool_entities import (
ToolProviderType,
)
from core.tools.errors import ToolProviderNotFoundError
from core.tools.mcp_tool.provider import MCPToolProviderController
from core.tools.mcp_tool.tool import MCPTool
from core.tools.plugin_tool.provider import PluginToolProviderController
from core.tools.plugin_tool.tool import PluginTool
from core.tools.tool_label_manager import ToolLabelManager
from core.tools.utils.configuration import ToolParameterConfigurationManager
from core.tools.utils.encryption import create_provider_encrypter, create_tool_provider_encrypter
from core.tools.utils.uuid_utils import is_valid_uuid
from core.tools.workflow_as_tool.provider import WorkflowToolProviderController
from core.tools.workflow_as_tool.tool import WorkflowTool
from extensions.ext_database import db
from models.provider_ids import ToolProviderID
from models.tools import ApiToolProvider, BuiltinToolProvider, MCPToolProvider, WorkflowToolProvider
from services.enterprise.plugin_manager_service import PluginCredentialType
from services.tools.mcp_tools_manage_service import MCPToolManageService
from services.tools.tools_transform_service import ToolTransformService
if TYPE_CHECKING:
from core.workflow.entities import VariablePool
from core.workflow.nodes.tool.entities import ToolEntity
logger = logging.getLogger(__name__)
@@ -117,6 +115,7 @@ class ToolManager:
get the plugin provider
"""
# check if context is set
try:
contexts.plugin_tool_providers.get()
except LookupError:
@@ -172,6 +171,7 @@ class ToolManager:
:return: the tool
"""
if provider_type == ToolProviderType.BUILT_IN:
# check if the builtin tool need credentials
provider_controller = cls.get_builtin_provider(provider_id, tenant_id)
@@ -213,16 +213,16 @@ class ToolManager:
# fallback to the default provider
if builtin_provider is None:
# use the default provider
builtin_provider = (
db.session.query(BuiltinToolProvider)
.where(
BuiltinToolProvider.tenant_id == tenant_id,
(BuiltinToolProvider.provider == str(provider_id_entity))
| (BuiltinToolProvider.provider == provider_id_entity.provider_name),
with Session(db.engine) as session:
builtin_provider = session.scalar(
sa.select(BuiltinToolProvider)
.where(
BuiltinToolProvider.tenant_id == tenant_id,
(BuiltinToolProvider.provider == str(provider_id_entity))
| (BuiltinToolProvider.provider == provider_id_entity.provider_name),
)
.order_by(BuiltinToolProvider.is_default.desc(), BuiltinToolProvider.created_at.asc())
)
.order_by(BuiltinToolProvider.is_default.desc(), BuiltinToolProvider.created_at.asc())
.first()
)
if builtin_provider is None:
raise ToolProviderNotFoundError(f"no default provider for {provider_id}")
else:
@@ -263,6 +263,7 @@ class ToolManager:
# check if the credentials is expired
if builtin_provider.expires_at != -1 and (builtin_provider.expires_at - 60) < int(time.time()):
# TODO: circular import
from core.plugin.impl.oauth import OAuthHandler
from services.tools.builtin_tools_manage_service import BuiltinToolManageService
# refresh the credentials
@@ -270,6 +271,7 @@ class ToolManager:
provider_name = tool_provider.provider_name
redirect_uri = f"{dify_config.CONSOLE_API_URL}/console/api/oauth/plugin/{provider_id}/tool/callback"
system_credentials = BuiltinToolManageService.get_oauth_client(tenant_id, provider_id)
oauth_handler = OAuthHandler()
# refresh the credentials
refreshed_credentials = oauth_handler.refresh_credentials(
@@ -358,7 +360,7 @@ class ToolManager:
app_id: str,
agent_tool: AgentToolEntity,
invoke_from: InvokeFrom = InvokeFrom.DEBUGGER,
variable_pool: VariablePool | None = None,
variable_pool: Optional["VariablePool"] = None,
) -> Tool:
"""
get the agent tool runtime
@@ -400,7 +402,7 @@ class ToolManager:
node_id: str,
workflow_tool: "ToolEntity",
invoke_from: InvokeFrom = InvokeFrom.DEBUGGER,
variable_pool: VariablePool | None = None,
variable_pool: Optional["VariablePool"] = None,
) -> Tool:
"""
get the workflow tool runtime
@@ -516,6 +518,7 @@ class ToolManager:
"""
list all the plugin providers
"""
manager = PluginToolManager()
provider_entities = manager.fetch_tool_providers(tenant_id)
return [
@@ -882,7 +885,7 @@ class ToolManager:
)
@classmethod
def generate_workflow_tool_icon_url(cls, tenant_id: str, provider_id: str):
def generate_workflow_tool_icon_url(cls, tenant_id: str, provider_id: str) -> Mapping[str, str]:
try:
workflow_provider: WorkflowToolProvider | None = (
db.session.query(WorkflowToolProvider)
@@ -893,13 +896,13 @@ class ToolManager:
if workflow_provider is None:
raise ToolProviderNotFoundError(f"workflow provider {provider_id} not found")
icon: dict = json.loads(workflow_provider.icon)
icon = json.loads(workflow_provider.icon)
return icon
except Exception:
return {"background": "#252525", "content": "\ud83d\ude01"}
@classmethod
def generate_api_tool_icon_url(cls, tenant_id: str, provider_id: str):
def generate_api_tool_icon_url(cls, tenant_id: str, provider_id: str) -> Mapping[str, str]:
try:
api_provider: ApiToolProvider | None = (
db.session.query(ApiToolProvider)
@@ -910,13 +913,13 @@ class ToolManager:
if api_provider is None:
raise ToolProviderNotFoundError(f"api provider {provider_id} not found")
icon: dict = json.loads(api_provider.icon)
icon = json.loads(api_provider.icon)
return icon
except Exception:
return {"background": "#252525", "content": "\ud83d\ude01"}
@classmethod
def generate_mcp_tool_icon_url(cls, tenant_id: str, provider_id: str) -> dict[str, str] | str:
def generate_mcp_tool_icon_url(cls, tenant_id: str, provider_id: str) -> Mapping[str, str] | str:
try:
mcp_provider: MCPToolProvider | None = (
db.session.query(MCPToolProvider)
@@ -937,7 +940,7 @@ class ToolManager:
tenant_id: str,
provider_type: ToolProviderType,
provider_id: str,
) -> Union[str, dict[str, Any]]:
) -> str | Mapping[str, str]:
"""
get the tool icon
@@ -962,11 +965,10 @@ class ToolManager:
return cls.generate_workflow_tool_icon_url(tenant_id, provider_id)
elif provider_type == ToolProviderType.PLUGIN:
provider = ToolManager.get_plugin_provider(provider_id, tenant_id)
if isinstance(provider, PluginToolProviderController):
try:
return cls.generate_plugin_tool_icon_url(tenant_id, provider.entity.identity.icon)
except Exception:
return {"background": "#252525", "content": "\ud83d\ude01"}
try:
return cls.generate_plugin_tool_icon_url(tenant_id, provider.entity.identity.icon)
except Exception:
return {"background": "#252525", "content": "\ud83d\ude01"}
raise ValueError(f"plugin provider {provider_id} not found")
elif provider_type == ToolProviderType.MCP:
return cls.generate_mcp_tool_icon_url(tenant_id, provider_id)
@@ -977,7 +979,7 @@ class ToolManager:
def _convert_tool_parameters_type(
cls,
parameters: list[ToolParameter],
variable_pool: VariablePool | None,
variable_pool: Optional["VariablePool"],
tool_configurations: dict[str, Any],
typ: Literal["agent", "workflow", "tool"] = "workflow",
) -> dict[str, Any]:

View File

@@ -123,11 +123,15 @@ class ProviderConfigEncrypter:
return data
def create_provider_encrypter(tenant_id: str, config: list[BasicProviderConfig], cache: ProviderConfigCache):
def create_provider_encrypter(
tenant_id: str, config: list[BasicProviderConfig], cache: ProviderConfigCache
) -> tuple[ProviderConfigEncrypter, ProviderConfigCache]:
return ProviderConfigEncrypter(tenant_id=tenant_id, config=config, provider_config_cache=cache), cache
def create_tool_provider_encrypter(tenant_id: str, controller: ToolProviderController):
def create_tool_provider_encrypter(
tenant_id: str, controller: ToolProviderController
) -> tuple[ProviderConfigEncrypter, ProviderConfigCache]:
cache = SingletonProviderCredentialsCache(
tenant_id=tenant_id,
provider_type=controller.provider_type.value,

View File

@@ -39,14 +39,12 @@ class WorkflowTool(Tool):
entity: ToolEntity,
runtime: ToolRuntime,
label: str = "Workflow",
thread_pool_id: str | None = None,
):
self.workflow_app_id = workflow_app_id
self.workflow_as_tool_id = workflow_as_tool_id
self.version = version
self.workflow_entities = workflow_entities
self.workflow_call_depth = workflow_call_depth
self.thread_pool_id = thread_pool_id
self.label = label
super().__init__(entity=entity, runtime=runtime)
@@ -90,7 +88,6 @@ class WorkflowTool(Tool):
invoke_from=self.runtime.invoke_from,
streaming=False,
call_depth=self.workflow_call_depth + 1,
workflow_thread_pool_id=self.thread_pool_id,
)
assert isinstance(result, dict)
data = result.get("data", {})