mirror of
https://github.com/langgenius/dify.git
synced 2025-12-20 22:52:26 +00:00
Compare commits
11 Commits
fix/plugin
...
config-too
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a84437b245 | ||
|
|
2e87e85474 | ||
|
|
23ee92e52e | ||
|
|
3449065585 | ||
|
|
84eec68421 | ||
|
|
1856a743fc | ||
|
|
76883751eb | ||
|
|
d90e30d4b6 | ||
|
|
7f2e18be0c | ||
|
|
e115340e83 | ||
|
|
ed3066a069 |
@@ -655,6 +655,11 @@ class ToolConfig(BaseSettings):
|
||||
default=3600,
|
||||
)
|
||||
|
||||
TOOL_FILE_MAX_SIZE: PositiveInt = Field(
|
||||
description="Maximum size in bytes for tool generated files",
|
||||
default=30 * 1024 * 1024,
|
||||
)
|
||||
|
||||
|
||||
class MailConfig(BaseSettings):
|
||||
"""
|
||||
|
||||
@@ -1,14 +1,50 @@
|
||||
from collections.abc import Generator
|
||||
from typing import Any, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
|
||||
|
||||
from configs import dify_config
|
||||
from core.plugin.entities.plugin import GenericProviderID, ToolProviderID
|
||||
from core.plugin.entities.plugin_daemon import PluginBasicBooleanResponse, PluginToolProviderEntity
|
||||
from core.plugin.impl.base import BasePluginClient
|
||||
from core.tools.entities.tool_entities import CredentialType, ToolInvokeMessage, ToolParameter
|
||||
|
||||
|
||||
class FileChunk(BaseModel):
|
||||
"""File chunk buffer for assembling blob data from chunks."""
|
||||
|
||||
bytes_written: int = 0
|
||||
total_length: int
|
||||
data: bytearray = Field(default_factory=bytearray)
|
||||
|
||||
def __iadd__(self, other: bytes) -> "FileChunk":
|
||||
self.data[self.bytes_written : self.bytes_written + len(other)] = other
|
||||
self.bytes_written += len(other)
|
||||
if self.bytes_written > self.total_length:
|
||||
raise ValueError(f"File chunk is too large which reached the limit of {self.total_length} bytes")
|
||||
return self
|
||||
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
@field_validator("total_length")
|
||||
@classmethod
|
||||
def validate_total_length(cls, v: int) -> int:
|
||||
if v <= 0:
|
||||
raise ValueError("total_length must be positive")
|
||||
if v > dify_config.TOOL_FILE_MAX_SIZE:
|
||||
raise ValueError(f"total_length exceeds maximum file size of {dify_config.TOOL_FILE_MAX_SIZE} bytes")
|
||||
return v
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def initialize_data_buffer(cls, values):
|
||||
if isinstance(values, dict):
|
||||
if "data" not in values or values["data"] is None:
|
||||
if "total_length" in values:
|
||||
values["data"] = bytearray(values["total_length"])
|
||||
return values
|
||||
|
||||
|
||||
class PluginToolManager(BasePluginClient):
|
||||
def fetch_tool_providers(self, tenant_id: str) -> list[PluginToolProviderEntity]:
|
||||
"""
|
||||
@@ -41,6 +77,59 @@ class PluginToolManager(BasePluginClient):
|
||||
|
||||
return response
|
||||
|
||||
def _process_blob_chunks(
|
||||
self,
|
||||
response: Generator[ToolInvokeMessage, None, None],
|
||||
chunk_size_limit: int = 8192,
|
||||
) -> Generator[ToolInvokeMessage, None, None]:
|
||||
"""
|
||||
Process blob chunks from tool invocation responses.
|
||||
|
||||
Args:
|
||||
response: Generator yielding ToolInvokeMessage instances
|
||||
chunk_size_limit: Maximum size for a single chunk (default 8KB)
|
||||
|
||||
Yields:
|
||||
ToolInvokeMessage: Processed messages with complete blobs assembled from chunks
|
||||
|
||||
Raises:
|
||||
ValueError: If chunk or file size limits are exceeded
|
||||
"""
|
||||
files: dict[str, FileChunk] = {}
|
||||
|
||||
for resp in response:
|
||||
if resp.type != ToolInvokeMessage.MessageType.BLOB_CHUNK:
|
||||
yield resp
|
||||
continue
|
||||
|
||||
assert isinstance(resp.message, ToolInvokeMessage.BlobChunkMessage)
|
||||
|
||||
# Get blob chunk information
|
||||
chunk_id = resp.message.id
|
||||
total_length = resp.message.total_length
|
||||
blob_data = resp.message.blob
|
||||
is_end = resp.message.end
|
||||
|
||||
# Initialize buffer for this file if it doesn't exist
|
||||
if chunk_id not in files:
|
||||
if total_length > dify_config.TOOL_FILE_MAX_SIZE:
|
||||
raise ValueError(
|
||||
f"File is too large which reached the limit of {dify_config.TOOL_FILE_MAX_SIZE} bytes"
|
||||
)
|
||||
files[chunk_id] = FileChunk(total_length=total_length)
|
||||
|
||||
# Append the blob data to the buffer
|
||||
files[chunk_id] += blob_data
|
||||
|
||||
# If this is the final chunk, yield a complete blob message
|
||||
if is_end:
|
||||
yield ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.BLOB,
|
||||
message=ToolInvokeMessage.BlobMessage(blob=files[chunk_id].data),
|
||||
meta=resp.meta,
|
||||
)
|
||||
del files[chunk_id]
|
||||
|
||||
def fetch_tool_provider(self, tenant_id: str, provider: str) -> PluginToolProviderEntity:
|
||||
"""
|
||||
Fetch tool provider for the given tenant and plugin.
|
||||
@@ -113,61 +202,8 @@ class PluginToolManager(BasePluginClient):
|
||||
},
|
||||
)
|
||||
|
||||
class FileChunk:
|
||||
"""
|
||||
Only used for internal processing.
|
||||
"""
|
||||
|
||||
bytes_written: int
|
||||
total_length: int
|
||||
data: bytearray
|
||||
|
||||
def __init__(self, total_length: int):
|
||||
self.bytes_written = 0
|
||||
self.total_length = total_length
|
||||
self.data = bytearray(total_length)
|
||||
|
||||
files: dict[str, FileChunk] = {}
|
||||
for resp in response:
|
||||
if resp.type == ToolInvokeMessage.MessageType.BLOB_CHUNK:
|
||||
assert isinstance(resp.message, ToolInvokeMessage.BlobChunkMessage)
|
||||
# Get blob chunk information
|
||||
chunk_id = resp.message.id
|
||||
total_length = resp.message.total_length
|
||||
blob_data = resp.message.blob
|
||||
is_end = resp.message.end
|
||||
|
||||
# Initialize buffer for this file if it doesn't exist
|
||||
if chunk_id not in files:
|
||||
files[chunk_id] = FileChunk(total_length)
|
||||
|
||||
# If this is the final chunk, yield a complete blob message
|
||||
if is_end:
|
||||
yield ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.BLOB,
|
||||
message=ToolInvokeMessage.BlobMessage(blob=files[chunk_id].data),
|
||||
meta=resp.meta,
|
||||
)
|
||||
else:
|
||||
# Check if file is too large (30MB limit)
|
||||
if files[chunk_id].bytes_written + len(blob_data) > 30 * 1024 * 1024:
|
||||
# Delete the file if it's too large
|
||||
del files[chunk_id]
|
||||
# Skip yielding this message
|
||||
raise ValueError("File is too large which reached the limit of 30MB")
|
||||
|
||||
# Check if single chunk is too large (8KB limit)
|
||||
if len(blob_data) > 8192:
|
||||
# Skip yielding this message
|
||||
raise ValueError("File chunk is too large which reached the limit of 8KB")
|
||||
|
||||
# Append the blob data to the buffer
|
||||
files[chunk_id].data[
|
||||
files[chunk_id].bytes_written : files[chunk_id].bytes_written + len(blob_data)
|
||||
] = blob_data
|
||||
files[chunk_id].bytes_written += len(blob_data)
|
||||
else:
|
||||
yield resp
|
||||
# Process blob chunks using the handler method
|
||||
return self._process_blob_chunks(response)
|
||||
|
||||
def validate_provider_credentials(
|
||||
self, tenant_id: str, user_id: str, provider: str, credentials: dict[str, Any]
|
||||
|
||||
@@ -66,7 +66,7 @@ dependencies = [
|
||||
"pycryptodome==3.19.1",
|
||||
"pydantic~=2.11.4",
|
||||
"pydantic-extra-types~=2.10.3",
|
||||
"pydantic-settings~=2.9.1",
|
||||
"pydantic-settings~=2.10.1",
|
||||
"pyjwt~=2.8.0",
|
||||
"pypdfium2==4.30.0",
|
||||
"python-docx~=1.1.0",
|
||||
|
||||
0
api/tests/unit_tests/core/plugin/__init__.py
Normal file
0
api/tests/unit_tests/core/plugin/__init__.py
Normal file
0
api/tests/unit_tests/core/plugin/impl/__init__.py
Normal file
0
api/tests/unit_tests/core/plugin/impl/__init__.py
Normal file
235
api/tests/unit_tests/core/plugin/impl/test_tool.py
Normal file
235
api/tests/unit_tests/core/plugin/impl/test_tool.py
Normal file
@@ -0,0 +1,235 @@
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from core.plugin.impl.tool import FileChunk, PluginToolManager
|
||||
from core.tools.entities.tool_entities import ToolInvokeMessage
|
||||
|
||||
|
||||
class TestFileChunk(unittest.TestCase):
|
||||
def test_file_chunk_creation(self):
|
||||
"""Test FileChunk creation with specified total length."""
|
||||
chunk = FileChunk(total_length=1024)
|
||||
|
||||
assert chunk.total_length == 1024
|
||||
assert chunk.bytes_written == 0
|
||||
assert len(chunk.data) == 1024
|
||||
assert isinstance(chunk.data, bytearray)
|
||||
|
||||
def test_file_chunk_pydantic_model(self):
|
||||
"""Test FileChunk as a Pydantic model."""
|
||||
chunk = FileChunk(total_length=512, bytes_written=100, data=bytearray(512))
|
||||
|
||||
assert chunk.total_length == 512
|
||||
assert chunk.bytes_written == 100
|
||||
assert len(chunk.data) == 512
|
||||
|
||||
|
||||
class TestBlobChunkProcessing(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.manager = PluginToolManager()
|
||||
|
||||
def test_process_non_blob_chunk_messages(self):
|
||||
"""Test that non-blob chunk messages are passed through unchanged."""
|
||||
# Create test messages
|
||||
text_message = ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.TEXT, message=ToolInvokeMessage.TextMessage(text="Test message")
|
||||
)
|
||||
|
||||
def response_generator():
|
||||
yield text_message
|
||||
|
||||
# Process the response
|
||||
result = list(self.manager._process_blob_chunks(response_generator()))
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0] == text_message
|
||||
|
||||
def test_process_single_blob_chunk(self):
|
||||
"""Test processing a complete blob in a single chunk (marked as end)."""
|
||||
test_data = b"Test file content"
|
||||
|
||||
# Create a blob chunk message marked as end
|
||||
chunk_message = ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.BLOB_CHUNK,
|
||||
message=ToolInvokeMessage.BlobChunkMessage(
|
||||
id="file1", sequence=0, total_length=len(test_data), blob=test_data, end=True
|
||||
),
|
||||
meta={"test": "meta"},
|
||||
)
|
||||
|
||||
def response_generator():
|
||||
yield chunk_message
|
||||
|
||||
# Process the response
|
||||
result = list(self.manager._process_blob_chunks(response_generator()))
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0].type == ToolInvokeMessage.MessageType.BLOB
|
||||
assert isinstance(result[0].message, ToolInvokeMessage.BlobMessage)
|
||||
# The blob should be the complete file buffer, not just the chunk data
|
||||
assert len(result[0].message.blob) == len(test_data)
|
||||
assert result[0].meta == {"test": "meta"}
|
||||
|
||||
def test_process_multiple_blob_chunks(self):
|
||||
"""Test assembling a blob from multiple chunks."""
|
||||
chunk1_data = b"First part"
|
||||
chunk2_data = b" Second part"
|
||||
total_data = chunk1_data + chunk2_data
|
||||
|
||||
# Create multiple chunk messages
|
||||
chunk1 = ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.BLOB_CHUNK,
|
||||
message=ToolInvokeMessage.BlobChunkMessage(
|
||||
id="file1", sequence=0, total_length=len(total_data), blob=chunk1_data, end=False
|
||||
),
|
||||
)
|
||||
|
||||
chunk2 = ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.BLOB_CHUNK,
|
||||
message=ToolInvokeMessage.BlobChunkMessage(
|
||||
id="file1", sequence=1, total_length=len(total_data), blob=chunk2_data, end=True
|
||||
),
|
||||
)
|
||||
|
||||
def response_generator():
|
||||
yield chunk1
|
||||
yield chunk2
|
||||
|
||||
# Process the response
|
||||
result = list(self.manager._process_blob_chunks(response_generator()))
|
||||
|
||||
# Should only yield one complete blob message
|
||||
assert len(result) == 1
|
||||
assert result[0].type == ToolInvokeMessage.MessageType.BLOB
|
||||
assert isinstance(result[0].message, ToolInvokeMessage.BlobMessage)
|
||||
assert result[0].message.blob[: len(total_data)] == total_data
|
||||
|
||||
def test_chunk_size_limit_exceeded(self):
|
||||
"""Test that chunks exceeding size limit raise an error."""
|
||||
# Create a chunk that exceeds the 12KB limit
|
||||
oversized_data = b"x" * 12222 # 12KB
|
||||
|
||||
chunk_message = ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.BLOB_CHUNK,
|
||||
message=ToolInvokeMessage.BlobChunkMessage(
|
||||
id="file1", sequence=0, total_length=10000, blob=oversized_data, end=False
|
||||
),
|
||||
)
|
||||
|
||||
def response_generator():
|
||||
yield chunk_message
|
||||
|
||||
# Should raise ValueError for oversized chunk
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
list(self.manager._process_blob_chunks(response_generator()))
|
||||
|
||||
assert "File chunk is too large" in str(exc_info.value)
|
||||
assert "10000 bytes" in str(exc_info.value)
|
||||
|
||||
@patch("core.plugin.impl.tool.dify_config")
|
||||
def test_file_size_limit_exceeded(self, mock_config):
|
||||
"""Test that files exceeding total size limit raise an error."""
|
||||
mock_config.TOOL_FILE_MAX_SIZE = 1024 # Set limit to 1KB
|
||||
|
||||
# Create chunks that together exceed the limit
|
||||
chunk1 = ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.BLOB_CHUNK,
|
||||
message=ToolInvokeMessage.BlobChunkMessage(
|
||||
id="file1", sequence=0, total_length=2000, blob=b"x" * 600, end=False
|
||||
),
|
||||
)
|
||||
|
||||
chunk2 = ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.BLOB_CHUNK,
|
||||
message=ToolInvokeMessage.BlobChunkMessage(
|
||||
id="file1", sequence=1, total_length=2000, blob=b"x" * 600, end=False
|
||||
),
|
||||
)
|
||||
|
||||
def response_generator():
|
||||
yield chunk1
|
||||
yield chunk2
|
||||
|
||||
# Process first chunk successfully, second should fail
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
list(self.manager._process_blob_chunks(response_generator()))
|
||||
|
||||
assert "File is too large" in str(exc_info.value)
|
||||
assert "1024 bytes" in str(exc_info.value)
|
||||
|
||||
def test_multiple_files_concurrent_processing(self):
|
||||
"""Test processing chunks from multiple files concurrently."""
|
||||
# Create chunks for two different files
|
||||
file1_chunk1 = ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.BLOB_CHUNK,
|
||||
message=ToolInvokeMessage.BlobChunkMessage(
|
||||
id="file1", sequence=0, total_length=10, blob=b"File1 data", end=False
|
||||
),
|
||||
)
|
||||
|
||||
file2_chunk1 = ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.BLOB_CHUNK,
|
||||
message=ToolInvokeMessage.BlobChunkMessage(
|
||||
id="file2", sequence=0, total_length=10, blob=b"File2 data", end=False
|
||||
),
|
||||
)
|
||||
|
||||
file1_chunk2 = ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.BLOB_CHUNK,
|
||||
message=ToolInvokeMessage.BlobChunkMessage(id="file1", sequence=1, total_length=10, blob=b"", end=True),
|
||||
)
|
||||
|
||||
file2_chunk2 = ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.BLOB_CHUNK,
|
||||
message=ToolInvokeMessage.BlobChunkMessage(id="file2", sequence=1, total_length=10, blob=b"", end=True),
|
||||
)
|
||||
|
||||
def response_generator():
|
||||
yield file1_chunk1
|
||||
yield file2_chunk1
|
||||
yield file1_chunk2
|
||||
yield file2_chunk2
|
||||
|
||||
# Process the response
|
||||
result = list(self.manager._process_blob_chunks(response_generator()))
|
||||
|
||||
# Should get two complete blobs
|
||||
assert len(result) == 2
|
||||
assert all(r.type == ToolInvokeMessage.MessageType.BLOB for r in result)
|
||||
|
||||
def test_mixed_message_types(self):
|
||||
"""Test processing a mix of blob chunks and other message types."""
|
||||
text_msg = ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.TEXT, message=ToolInvokeMessage.TextMessage(text="Status update")
|
||||
)
|
||||
|
||||
chunk_msg = ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.BLOB_CHUNK,
|
||||
message=ToolInvokeMessage.BlobChunkMessage(id="file1", sequence=0, total_length=4, blob=b"Data", end=True),
|
||||
)
|
||||
|
||||
# Use LOG message type with ERROR status instead of non-existent ERROR message type
|
||||
error_msg = ToolInvokeMessage(
|
||||
type=ToolInvokeMessage.MessageType.LOG,
|
||||
message=ToolInvokeMessage.LogMessage(
|
||||
id="error1",
|
||||
label="Error Log",
|
||||
status=ToolInvokeMessage.LogMessage.LogStatus.ERROR,
|
||||
data={"error": "Test error"},
|
||||
),
|
||||
)
|
||||
|
||||
def response_generator():
|
||||
yield text_msg
|
||||
yield chunk_msg
|
||||
yield error_msg
|
||||
|
||||
# Process the response
|
||||
result = list(self.manager._process_blob_chunks(response_generator()))
|
||||
|
||||
assert len(result) == 3
|
||||
assert result[0].type == ToolInvokeMessage.MessageType.TEXT
|
||||
assert result[1].type == ToolInvokeMessage.MessageType.BLOB
|
||||
assert result[2].type == ToolInvokeMessage.MessageType.LOG
|
||||
8
api/uv.lock
generated
8
api/uv.lock
generated
@@ -1486,7 +1486,7 @@ requires-dist = [
|
||||
{ name = "pycryptodome", specifier = "==3.19.1" },
|
||||
{ name = "pydantic", specifier = "~=2.11.4" },
|
||||
{ name = "pydantic-extra-types", specifier = "~=2.10.3" },
|
||||
{ name = "pydantic-settings", specifier = "~=2.9.1" },
|
||||
{ name = "pydantic-settings", specifier = "~=2.10.1" },
|
||||
{ name = "pyjwt", specifier = "~=2.8.0" },
|
||||
{ name = "pypdfium2", specifier = "==4.30.0" },
|
||||
{ name = "python-docx", specifier = "~=1.1.0" },
|
||||
@@ -4474,16 +4474,16 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "pydantic-settings"
|
||||
version = "2.9.1"
|
||||
version = "2.10.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pydantic" },
|
||||
{ name = "python-dotenv" },
|
||||
{ name = "typing-inspection" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/67/1d/42628a2c33e93f8e9acbde0d5d735fa0850f3e6a2f8cb1eb6c40b9a732ac/pydantic_settings-2.9.1.tar.gz", hash = "sha256:c509bf79d27563add44e8446233359004ed85066cd096d8b510f715e6ef5d268", size = 163234, upload-time = "2025-04-18T16:44:48.265Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/68/85/1ea668bbab3c50071ca613c6ab30047fb36ab0da1b92fa8f17bbc38fd36c/pydantic_settings-2.10.1.tar.gz", hash = "sha256:06f0062169818d0f5524420a360d632d5857b83cffd4d42fe29597807a1614ee", size = 172583, upload-time = "2025-06-24T13:26:46.841Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b6/5f/d6d641b490fd3ec2c4c13b4244d68deea3a1b970a97be64f34fb5504ff72/pydantic_settings-2.9.1-py3-none-any.whl", hash = "sha256:59b4f431b1defb26fe620c71a7d3968a710d719f5f4cdbbdb7926edeb770f6ef", size = 44356, upload-time = "2025-04-18T16:44:46.617Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/58/f0/427018098906416f580e3cf1366d3b1abfb408a0652e9f31600c24a1903c/pydantic_settings-2.10.1-py3-none-any.whl", hash = "sha256:a60952460b99cf661dc25c29c0ef171721f98bfcb52ef8d9ea4c943d7c8cc796", size = 45235, upload-time = "2025-06-24T13:26:45.485Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
Reference in New Issue
Block a user